sax.js 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565
  1. ;(function (sax) { // wrapper for non-node envs
  2. sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
  3. sax.SAXParser = SAXParser
  4. sax.SAXStream = SAXStream
  5. sax.createStream = createStream
  6. // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
  7. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
  8. // since that's the earliest that a buffer overrun could occur. This way, checks are
  9. // as rare as required, but as often as necessary to ensure never crossing this bound.
  10. // Furthermore, buffers are only tested at most once per write(), so passing a very
  11. // large string into write() might have undesirable effects, but this is manageable by
  12. // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
  13. // edge case, result in creating at most one complete copy of the string passed in.
  14. // Set to Infinity to have unlimited buffers.
  15. sax.MAX_BUFFER_LENGTH = 64 * 1024
  16. var buffers = [
  17. 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
  18. 'procInstName', 'procInstBody', 'entity', 'attribName',
  19. 'attribValue', 'cdata', 'script'
  20. ]
  21. sax.EVENTS = [
  22. 'text',
  23. 'processinginstruction',
  24. 'sgmldeclaration',
  25. 'doctype',
  26. 'comment',
  27. 'opentagstart',
  28. 'attribute',
  29. 'opentag',
  30. 'closetag',
  31. 'opencdata',
  32. 'cdata',
  33. 'closecdata',
  34. 'error',
  35. 'end',
  36. 'ready',
  37. 'script',
  38. 'opennamespace',
  39. 'closenamespace'
  40. ]
  41. function SAXParser (strict, opt) {
  42. if (!(this instanceof SAXParser)) {
  43. return new SAXParser(strict, opt)
  44. }
  45. var parser = this
  46. clearBuffers(parser)
  47. parser.q = parser.c = ''
  48. parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
  49. parser.opt = opt || {}
  50. parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
  51. parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
  52. parser.tags = []
  53. parser.closed = parser.closedRoot = parser.sawRoot = false
  54. parser.tag = parser.error = null
  55. parser.strict = !!strict
  56. parser.noscript = !!(strict || parser.opt.noscript)
  57. parser.state = S.BEGIN
  58. parser.strictEntities = parser.opt.strictEntities
  59. parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
  60. parser.attribList = []
  61. // namespaces form a prototype chain.
  62. // it always points at the current tag,
  63. // which protos to its parent tag.
  64. if (parser.opt.xmlns) {
  65. parser.ns = Object.create(rootNS)
  66. }
  67. // mostly just for error reporting
  68. parser.trackPosition = parser.opt.position !== false
  69. if (parser.trackPosition) {
  70. parser.position = parser.line = parser.column = 0
  71. }
  72. emit(parser, 'onready')
  73. }
  74. if (!Object.create) {
  75. Object.create = function (o) {
  76. function F () {}
  77. F.prototype = o
  78. var newf = new F()
  79. return newf
  80. }
  81. }
  82. if (!Object.keys) {
  83. Object.keys = function (o) {
  84. var a = []
  85. for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
  86. return a
  87. }
  88. }
  89. function checkBufferLength (parser) {
  90. var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
  91. var maxActual = 0
  92. for (var i = 0, l = buffers.length; i < l; i++) {
  93. var len = parser[buffers[i]].length
  94. if (len > maxAllowed) {
  95. // Text/cdata nodes can get big, and since they're buffered,
  96. // we can get here under normal conditions.
  97. // Avoid issues by emitting the text node now,
  98. // so at least it won't get any bigger.
  99. switch (buffers[i]) {
  100. case 'textNode':
  101. closeText(parser)
  102. break
  103. case 'cdata':
  104. emitNode(parser, 'oncdata', parser.cdata)
  105. parser.cdata = ''
  106. break
  107. case 'script':
  108. emitNode(parser, 'onscript', parser.script)
  109. parser.script = ''
  110. break
  111. default:
  112. error(parser, 'Max buffer length exceeded: ' + buffers[i])
  113. }
  114. }
  115. maxActual = Math.max(maxActual, len)
  116. }
  117. // schedule the next check for the earliest possible buffer overrun.
  118. var m = sax.MAX_BUFFER_LENGTH - maxActual
  119. parser.bufferCheckPosition = m + parser.position
  120. }
  121. function clearBuffers (parser) {
  122. for (var i = 0, l = buffers.length; i < l; i++) {
  123. parser[buffers[i]] = ''
  124. }
  125. }
  126. function flushBuffers (parser) {
  127. closeText(parser)
  128. if (parser.cdata !== '') {
  129. emitNode(parser, 'oncdata', parser.cdata)
  130. parser.cdata = ''
  131. }
  132. if (parser.script !== '') {
  133. emitNode(parser, 'onscript', parser.script)
  134. parser.script = ''
  135. }
  136. }
  137. SAXParser.prototype = {
  138. end: function () { end(this) },
  139. write: write,
  140. resume: function () { this.error = null; return this },
  141. close: function () { return this.write(null) },
  142. flush: function () { flushBuffers(this) }
  143. }
  144. var Stream
  145. try {
  146. Stream = require('stream').Stream
  147. } catch (ex) {
  148. Stream = function () {}
  149. }
  150. var streamWraps = sax.EVENTS.filter(function (ev) {
  151. return ev !== 'error' && ev !== 'end'
  152. })
  153. function createStream (strict, opt) {
  154. return new SAXStream(strict, opt)
  155. }
  156. function SAXStream (strict, opt) {
  157. if (!(this instanceof SAXStream)) {
  158. return new SAXStream(strict, opt)
  159. }
  160. Stream.apply(this)
  161. this._parser = new SAXParser(strict, opt)
  162. this.writable = true
  163. this.readable = true
  164. var me = this
  165. this._parser.onend = function () {
  166. me.emit('end')
  167. }
  168. this._parser.onerror = function (er) {
  169. me.emit('error', er)
  170. // if didn't throw, then means error was handled.
  171. // go ahead and clear error, so we can write again.
  172. me._parser.error = null
  173. }
  174. this._decoder = null
  175. streamWraps.forEach(function (ev) {
  176. Object.defineProperty(me, 'on' + ev, {
  177. get: function () {
  178. return me._parser['on' + ev]
  179. },
  180. set: function (h) {
  181. if (!h) {
  182. me.removeAllListeners(ev)
  183. me._parser['on' + ev] = h
  184. return h
  185. }
  186. me.on(ev, h)
  187. },
  188. enumerable: true,
  189. configurable: false
  190. })
  191. })
  192. }
  193. SAXStream.prototype = Object.create(Stream.prototype, {
  194. constructor: {
  195. value: SAXStream
  196. }
  197. })
  198. SAXStream.prototype.write = function (data) {
  199. if (typeof Buffer === 'function' &&
  200. typeof Buffer.isBuffer === 'function' &&
  201. Buffer.isBuffer(data)) {
  202. if (!this._decoder) {
  203. var SD = require('string_decoder').StringDecoder
  204. this._decoder = new SD('utf8')
  205. }
  206. data = this._decoder.write(data)
  207. }
  208. this._parser.write(data.toString())
  209. this.emit('data', data)
  210. return true
  211. }
  212. SAXStream.prototype.end = function (chunk) {
  213. if (chunk && chunk.length) {
  214. this.write(chunk)
  215. }
  216. this._parser.end()
  217. return true
  218. }
  219. SAXStream.prototype.on = function (ev, handler) {
  220. var me = this
  221. if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
  222. me._parser['on' + ev] = function () {
  223. var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
  224. args.splice(0, 0, ev)
  225. me.emit.apply(me, args)
  226. }
  227. }
  228. return Stream.prototype.on.call(me, ev, handler)
  229. }
  230. // this really needs to be replaced with character classes.
  231. // XML allows all manner of ridiculous numbers and digits.
  232. var CDATA = '[CDATA['
  233. var DOCTYPE = 'DOCTYPE'
  234. var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
  235. var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
  236. var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
  237. // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
  238. // This implementation works on strings, a single character at a time
  239. // as such, it cannot ever support astral-plane characters (10000-EFFFF)
  240. // without a significant breaking change to either this parser, or the
  241. // JavaScript language. Implementation of an emoji-capable xml parser
  242. // is left as an exercise for the reader.
  243. var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  244. var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  245. var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  246. var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  247. function isWhitespace (c) {
  248. return c === ' ' || c === '\n' || c === '\r' || c === '\t'
  249. }
  250. function isQuote (c) {
  251. return c === '"' || c === '\''
  252. }
  253. function isAttribEnd (c) {
  254. return c === '>' || isWhitespace(c)
  255. }
  256. function isMatch (regex, c) {
  257. return regex.test(c)
  258. }
  259. function notMatch (regex, c) {
  260. return !isMatch(regex, c)
  261. }
  262. var S = 0
  263. sax.STATE = {
  264. BEGIN: S++, // leading byte order mark or whitespace
  265. BEGIN_WHITESPACE: S++, // leading whitespace
  266. TEXT: S++, // general stuff
  267. TEXT_ENTITY: S++, // &amp and such.
  268. OPEN_WAKA: S++, // <
  269. SGML_DECL: S++, // <!BLARG
  270. SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
  271. DOCTYPE: S++, // <!DOCTYPE
  272. DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
  273. DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
  274. DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
  275. COMMENT_STARTING: S++, // <!-
  276. COMMENT: S++, // <!--
  277. COMMENT_ENDING: S++, // <!-- blah -
  278. COMMENT_ENDED: S++, // <!-- blah --
  279. CDATA: S++, // <![CDATA[ something
  280. CDATA_ENDING: S++, // ]
  281. CDATA_ENDING_2: S++, // ]]
  282. PROC_INST: S++, // <?hi
  283. PROC_INST_BODY: S++, // <?hi there
  284. PROC_INST_ENDING: S++, // <?hi "there" ?
  285. OPEN_TAG: S++, // <strong
  286. OPEN_TAG_SLASH: S++, // <strong /
  287. ATTRIB: S++, // <a
  288. ATTRIB_NAME: S++, // <a foo
  289. ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
  290. ATTRIB_VALUE: S++, // <a foo=
  291. ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
  292. ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
  293. ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
  294. ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
  295. ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
  296. CLOSE_TAG: S++, // </a
  297. CLOSE_TAG_SAW_WHITE: S++, // </a >
  298. SCRIPT: S++, // <script> ...
  299. SCRIPT_ENDING: S++ // <script> ... <
  300. }
  301. sax.XML_ENTITIES = {
  302. 'amp': '&',
  303. 'gt': '>',
  304. 'lt': '<',
  305. 'quot': '"',
  306. 'apos': "'"
  307. }
  308. sax.ENTITIES = {
  309. 'amp': '&',
  310. 'gt': '>',
  311. 'lt': '<',
  312. 'quot': '"',
  313. 'apos': "'",
  314. 'AElig': 198,
  315. 'Aacute': 193,
  316. 'Acirc': 194,
  317. 'Agrave': 192,
  318. 'Aring': 197,
  319. 'Atilde': 195,
  320. 'Auml': 196,
  321. 'Ccedil': 199,
  322. 'ETH': 208,
  323. 'Eacute': 201,
  324. 'Ecirc': 202,
  325. 'Egrave': 200,
  326. 'Euml': 203,
  327. 'Iacute': 205,
  328. 'Icirc': 206,
  329. 'Igrave': 204,
  330. 'Iuml': 207,
  331. 'Ntilde': 209,
  332. 'Oacute': 211,
  333. 'Ocirc': 212,
  334. 'Ograve': 210,
  335. 'Oslash': 216,
  336. 'Otilde': 213,
  337. 'Ouml': 214,
  338. 'THORN': 222,
  339. 'Uacute': 218,
  340. 'Ucirc': 219,
  341. 'Ugrave': 217,
  342. 'Uuml': 220,
  343. 'Yacute': 221,
  344. 'aacute': 225,
  345. 'acirc': 226,
  346. 'aelig': 230,
  347. 'agrave': 224,
  348. 'aring': 229,
  349. 'atilde': 227,
  350. 'auml': 228,
  351. 'ccedil': 231,
  352. 'eacute': 233,
  353. 'ecirc': 234,
  354. 'egrave': 232,
  355. 'eth': 240,
  356. 'euml': 235,
  357. 'iacute': 237,
  358. 'icirc': 238,
  359. 'igrave': 236,
  360. 'iuml': 239,
  361. 'ntilde': 241,
  362. 'oacute': 243,
  363. 'ocirc': 244,
  364. 'ograve': 242,
  365. 'oslash': 248,
  366. 'otilde': 245,
  367. 'ouml': 246,
  368. 'szlig': 223,
  369. 'thorn': 254,
  370. 'uacute': 250,
  371. 'ucirc': 251,
  372. 'ugrave': 249,
  373. 'uuml': 252,
  374. 'yacute': 253,
  375. 'yuml': 255,
  376. 'copy': 169,
  377. 'reg': 174,
  378. 'nbsp': 160,
  379. 'iexcl': 161,
  380. 'cent': 162,
  381. 'pound': 163,
  382. 'curren': 164,
  383. 'yen': 165,
  384. 'brvbar': 166,
  385. 'sect': 167,
  386. 'uml': 168,
  387. 'ordf': 170,
  388. 'laquo': 171,
  389. 'not': 172,
  390. 'shy': 173,
  391. 'macr': 175,
  392. 'deg': 176,
  393. 'plusmn': 177,
  394. 'sup1': 185,
  395. 'sup2': 178,
  396. 'sup3': 179,
  397. 'acute': 180,
  398. 'micro': 181,
  399. 'para': 182,
  400. 'middot': 183,
  401. 'cedil': 184,
  402. 'ordm': 186,
  403. 'raquo': 187,
  404. 'frac14': 188,
  405. 'frac12': 189,
  406. 'frac34': 190,
  407. 'iquest': 191,
  408. 'times': 215,
  409. 'divide': 247,
  410. 'OElig': 338,
  411. 'oelig': 339,
  412. 'Scaron': 352,
  413. 'scaron': 353,
  414. 'Yuml': 376,
  415. 'fnof': 402,
  416. 'circ': 710,
  417. 'tilde': 732,
  418. 'Alpha': 913,
  419. 'Beta': 914,
  420. 'Gamma': 915,
  421. 'Delta': 916,
  422. 'Epsilon': 917,
  423. 'Zeta': 918,
  424. 'Eta': 919,
  425. 'Theta': 920,
  426. 'Iota': 921,
  427. 'Kappa': 922,
  428. 'Lambda': 923,
  429. 'Mu': 924,
  430. 'Nu': 925,
  431. 'Xi': 926,
  432. 'Omicron': 927,
  433. 'Pi': 928,
  434. 'Rho': 929,
  435. 'Sigma': 931,
  436. 'Tau': 932,
  437. 'Upsilon': 933,
  438. 'Phi': 934,
  439. 'Chi': 935,
  440. 'Psi': 936,
  441. 'Omega': 937,
  442. 'alpha': 945,
  443. 'beta': 946,
  444. 'gamma': 947,
  445. 'delta': 948,
  446. 'epsilon': 949,
  447. 'zeta': 950,
  448. 'eta': 951,
  449. 'theta': 952,
  450. 'iota': 953,
  451. 'kappa': 954,
  452. 'lambda': 955,
  453. 'mu': 956,
  454. 'nu': 957,
  455. 'xi': 958,
  456. 'omicron': 959,
  457. 'pi': 960,
  458. 'rho': 961,
  459. 'sigmaf': 962,
  460. 'sigma': 963,
  461. 'tau': 964,
  462. 'upsilon': 965,
  463. 'phi': 966,
  464. 'chi': 967,
  465. 'psi': 968,
  466. 'omega': 969,
  467. 'thetasym': 977,
  468. 'upsih': 978,
  469. 'piv': 982,
  470. 'ensp': 8194,
  471. 'emsp': 8195,
  472. 'thinsp': 8201,
  473. 'zwnj': 8204,
  474. 'zwj': 8205,
  475. 'lrm': 8206,
  476. 'rlm': 8207,
  477. 'ndash': 8211,
  478. 'mdash': 8212,
  479. 'lsquo': 8216,
  480. 'rsquo': 8217,
  481. 'sbquo': 8218,
  482. 'ldquo': 8220,
  483. 'rdquo': 8221,
  484. 'bdquo': 8222,
  485. 'dagger': 8224,
  486. 'Dagger': 8225,
  487. 'bull': 8226,
  488. 'hellip': 8230,
  489. 'permil': 8240,
  490. 'prime': 8242,
  491. 'Prime': 8243,
  492. 'lsaquo': 8249,
  493. 'rsaquo': 8250,
  494. 'oline': 8254,
  495. 'frasl': 8260,
  496. 'euro': 8364,
  497. 'image': 8465,
  498. 'weierp': 8472,
  499. 'real': 8476,
  500. 'trade': 8482,
  501. 'alefsym': 8501,
  502. 'larr': 8592,
  503. 'uarr': 8593,
  504. 'rarr': 8594,
  505. 'darr': 8595,
  506. 'harr': 8596,
  507. 'crarr': 8629,
  508. 'lArr': 8656,
  509. 'uArr': 8657,
  510. 'rArr': 8658,
  511. 'dArr': 8659,
  512. 'hArr': 8660,
  513. 'forall': 8704,
  514. 'part': 8706,
  515. 'exist': 8707,
  516. 'empty': 8709,
  517. 'nabla': 8711,
  518. 'isin': 8712,
  519. 'notin': 8713,
  520. 'ni': 8715,
  521. 'prod': 8719,
  522. 'sum': 8721,
  523. 'minus': 8722,
  524. 'lowast': 8727,
  525. 'radic': 8730,
  526. 'prop': 8733,
  527. 'infin': 8734,
  528. 'ang': 8736,
  529. 'and': 8743,
  530. 'or': 8744,
  531. 'cap': 8745,
  532. 'cup': 8746,
  533. 'int': 8747,
  534. 'there4': 8756,
  535. 'sim': 8764,
  536. 'cong': 8773,
  537. 'asymp': 8776,
  538. 'ne': 8800,
  539. 'equiv': 8801,
  540. 'le': 8804,
  541. 'ge': 8805,
  542. 'sub': 8834,
  543. 'sup': 8835,
  544. 'nsub': 8836,
  545. 'sube': 8838,
  546. 'supe': 8839,
  547. 'oplus': 8853,
  548. 'otimes': 8855,
  549. 'perp': 8869,
  550. 'sdot': 8901,
  551. 'lceil': 8968,
  552. 'rceil': 8969,
  553. 'lfloor': 8970,
  554. 'rfloor': 8971,
  555. 'lang': 9001,
  556. 'rang': 9002,
  557. 'loz': 9674,
  558. 'spades': 9824,
  559. 'clubs': 9827,
  560. 'hearts': 9829,
  561. 'diams': 9830
  562. }
  563. Object.keys(sax.ENTITIES).forEach(function (key) {
  564. var e = sax.ENTITIES[key]
  565. var s = typeof e === 'number' ? String.fromCharCode(e) : e
  566. sax.ENTITIES[key] = s
  567. })
  568. for (var s in sax.STATE) {
  569. sax.STATE[sax.STATE[s]] = s
  570. }
  571. // shorthand
  572. S = sax.STATE
  573. function emit (parser, event, data) {
  574. parser[event] && parser[event](data)
  575. }
  576. function emitNode (parser, nodeType, data) {
  577. if (parser.textNode) closeText(parser)
  578. emit(parser, nodeType, data)
  579. }
  580. function closeText (parser) {
  581. parser.textNode = textopts(parser.opt, parser.textNode)
  582. if (parser.textNode) emit(parser, 'ontext', parser.textNode)
  583. parser.textNode = ''
  584. }
  585. function textopts (opt, text) {
  586. if (opt.trim) text = text.trim()
  587. if (opt.normalize) text = text.replace(/\s+/g, ' ')
  588. return text
  589. }
  590. function error (parser, er) {
  591. closeText(parser)
  592. if (parser.trackPosition) {
  593. er += '\nLine: ' + parser.line +
  594. '\nColumn: ' + parser.column +
  595. '\nChar: ' + parser.c
  596. }
  597. er = new Error(er)
  598. parser.error = er
  599. emit(parser, 'onerror', er)
  600. return parser
  601. }
  602. function end (parser) {
  603. if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
  604. if ((parser.state !== S.BEGIN) &&
  605. (parser.state !== S.BEGIN_WHITESPACE) &&
  606. (parser.state !== S.TEXT)) {
  607. error(parser, 'Unexpected end')
  608. }
  609. closeText(parser)
  610. parser.c = ''
  611. parser.closed = true
  612. emit(parser, 'onend')
  613. SAXParser.call(parser, parser.strict, parser.opt)
  614. return parser
  615. }
  616. function strictFail (parser, message) {
  617. if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
  618. throw new Error('bad call to strictFail')
  619. }
  620. if (parser.strict) {
  621. error(parser, message)
  622. }
  623. }
  624. function newTag (parser) {
  625. if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
  626. var parent = parser.tags[parser.tags.length - 1] || parser
  627. var tag = parser.tag = { name: parser.tagName, attributes: {} }
  628. // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
  629. if (parser.opt.xmlns) {
  630. tag.ns = parent.ns
  631. }
  632. parser.attribList.length = 0
  633. emitNode(parser, 'onopentagstart', tag)
  634. }
  635. function qname (name, attribute) {
  636. var i = name.indexOf(':')
  637. var qualName = i < 0 ? [ '', name ] : name.split(':')
  638. var prefix = qualName[0]
  639. var local = qualName[1]
  640. // <x "xmlns"="http://foo">
  641. if (attribute && name === 'xmlns') {
  642. prefix = 'xmlns'
  643. local = ''
  644. }
  645. return { prefix: prefix, local: local }
  646. }
  647. function attrib (parser) {
  648. if (!parser.strict) {
  649. parser.attribName = parser.attribName[parser.looseCase]()
  650. }
  651. if (parser.attribList.indexOf(parser.attribName) !== -1 ||
  652. parser.tag.attributes.hasOwnProperty(parser.attribName)) {
  653. parser.attribName = parser.attribValue = ''
  654. return
  655. }
  656. if (parser.opt.xmlns) {
  657. var qn = qname(parser.attribName, true)
  658. var prefix = qn.prefix
  659. var local = qn.local
  660. if (prefix === 'xmlns') {
  661. // namespace binding attribute. push the binding into scope
  662. if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
  663. strictFail(parser,
  664. 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
  665. 'Actual: ' + parser.attribValue)
  666. } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
  667. strictFail(parser,
  668. 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
  669. 'Actual: ' + parser.attribValue)
  670. } else {
  671. var tag = parser.tag
  672. var parent = parser.tags[parser.tags.length - 1] || parser
  673. if (tag.ns === parent.ns) {
  674. tag.ns = Object.create(parent.ns)
  675. }
  676. tag.ns[local] = parser.attribValue
  677. }
  678. }
  679. // defer onattribute events until all attributes have been seen
  680. // so any new bindings can take effect. preserve attribute order
  681. // so deferred events can be emitted in document order
  682. parser.attribList.push([parser.attribName, parser.attribValue])
  683. } else {
  684. // in non-xmlns mode, we can emit the event right away
  685. parser.tag.attributes[parser.attribName] = parser.attribValue
  686. emitNode(parser, 'onattribute', {
  687. name: parser.attribName,
  688. value: parser.attribValue
  689. })
  690. }
  691. parser.attribName = parser.attribValue = ''
  692. }
  693. function openTag (parser, selfClosing) {
  694. if (parser.opt.xmlns) {
  695. // emit namespace binding events
  696. var tag = parser.tag
  697. // add namespace info to tag
  698. var qn = qname(parser.tagName)
  699. tag.prefix = qn.prefix
  700. tag.local = qn.local
  701. tag.uri = tag.ns[qn.prefix] || ''
  702. if (tag.prefix && !tag.uri) {
  703. strictFail(parser, 'Unbound namespace prefix: ' +
  704. JSON.stringify(parser.tagName))
  705. tag.uri = qn.prefix
  706. }
  707. var parent = parser.tags[parser.tags.length - 1] || parser
  708. if (tag.ns && parent.ns !== tag.ns) {
  709. Object.keys(tag.ns).forEach(function (p) {
  710. emitNode(parser, 'onopennamespace', {
  711. prefix: p,
  712. uri: tag.ns[p]
  713. })
  714. })
  715. }
  716. // handle deferred onattribute events
  717. // Note: do not apply default ns to attributes:
  718. // http://www.w3.org/TR/REC-xml-names/#defaulting
  719. for (var i = 0, l = parser.attribList.length; i < l; i++) {
  720. var nv = parser.attribList[i]
  721. var name = nv[0]
  722. var value = nv[1]
  723. var qualName = qname(name, true)
  724. var prefix = qualName.prefix
  725. var local = qualName.local
  726. var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
  727. var a = {
  728. name: name,
  729. value: value,
  730. prefix: prefix,
  731. local: local,
  732. uri: uri
  733. }
  734. // if there's any attributes with an undefined namespace,
  735. // then fail on them now.
  736. if (prefix && prefix !== 'xmlns' && !uri) {
  737. strictFail(parser, 'Unbound namespace prefix: ' +
  738. JSON.stringify(prefix))
  739. a.uri = prefix
  740. }
  741. parser.tag.attributes[name] = a
  742. emitNode(parser, 'onattribute', a)
  743. }
  744. parser.attribList.length = 0
  745. }
  746. parser.tag.isSelfClosing = !!selfClosing
  747. // process the tag
  748. parser.sawRoot = true
  749. parser.tags.push(parser.tag)
  750. emitNode(parser, 'onopentag', parser.tag)
  751. if (!selfClosing) {
  752. // special case for <script> in non-strict mode.
  753. if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
  754. parser.state = S.SCRIPT
  755. } else {
  756. parser.state = S.TEXT
  757. }
  758. parser.tag = null
  759. parser.tagName = ''
  760. }
  761. parser.attribName = parser.attribValue = ''
  762. parser.attribList.length = 0
  763. }
  764. function closeTag (parser) {
  765. if (!parser.tagName) {
  766. strictFail(parser, 'Weird empty close tag.')
  767. parser.textNode += '</>'
  768. parser.state = S.TEXT
  769. return
  770. }
  771. if (parser.script) {
  772. if (parser.tagName !== 'script') {
  773. parser.script += '</' + parser.tagName + '>'
  774. parser.tagName = ''
  775. parser.state = S.SCRIPT
  776. return
  777. }
  778. emitNode(parser, 'onscript', parser.script)
  779. parser.script = ''
  780. }
  781. // first make sure that the closing tag actually exists.
  782. // <a><b></c></b></a> will close everything, otherwise.
  783. var t = parser.tags.length
  784. var tagName = parser.tagName
  785. if (!parser.strict) {
  786. tagName = tagName[parser.looseCase]()
  787. }
  788. var closeTo = tagName
  789. while (t--) {
  790. var close = parser.tags[t]
  791. if (close.name !== closeTo) {
  792. // fail the first time in strict mode
  793. strictFail(parser, 'Unexpected close tag')
  794. } else {
  795. break
  796. }
  797. }
  798. // didn't find it. we already failed for strict, so just abort.
  799. if (t < 0) {
  800. strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
  801. parser.textNode += '</' + parser.tagName + '>'
  802. parser.state = S.TEXT
  803. return
  804. }
  805. parser.tagName = tagName
  806. var s = parser.tags.length
  807. while (s-- > t) {
  808. var tag = parser.tag = parser.tags.pop()
  809. parser.tagName = parser.tag.name
  810. emitNode(parser, 'onclosetag', parser.tagName)
  811. var x = {}
  812. for (var i in tag.ns) {
  813. x[i] = tag.ns[i]
  814. }
  815. var parent = parser.tags[parser.tags.length - 1] || parser
  816. if (parser.opt.xmlns && tag.ns !== parent.ns) {
  817. // remove namespace bindings introduced by tag
  818. Object.keys(tag.ns).forEach(function (p) {
  819. var n = tag.ns[p]
  820. emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
  821. })
  822. }
  823. }
  824. if (t === 0) parser.closedRoot = true
  825. parser.tagName = parser.attribValue = parser.attribName = ''
  826. parser.attribList.length = 0
  827. parser.state = S.TEXT
  828. }
  829. function parseEntity (parser) {
  830. var entity = parser.entity
  831. var entityLC = entity.toLowerCase()
  832. var num
  833. var numStr = ''
  834. if (parser.ENTITIES[entity]) {
  835. return parser.ENTITIES[entity]
  836. }
  837. if (parser.ENTITIES[entityLC]) {
  838. return parser.ENTITIES[entityLC]
  839. }
  840. entity = entityLC
  841. if (entity.charAt(0) === '#') {
  842. if (entity.charAt(1) === 'x') {
  843. entity = entity.slice(2)
  844. num = parseInt(entity, 16)
  845. numStr = num.toString(16)
  846. } else {
  847. entity = entity.slice(1)
  848. num = parseInt(entity, 10)
  849. numStr = num.toString(10)
  850. }
  851. }
  852. entity = entity.replace(/^0+/, '')
  853. if (isNaN(num) || numStr.toLowerCase() !== entity) {
  854. strictFail(parser, 'Invalid character entity')
  855. return '&' + parser.entity + ';'
  856. }
  857. return String.fromCodePoint(num)
  858. }
  859. function beginWhiteSpace (parser, c) {
  860. if (c === '<') {
  861. parser.state = S.OPEN_WAKA
  862. parser.startTagPosition = parser.position
  863. } else if (!isWhitespace(c)) {
  864. // have to process this as a text node.
  865. // weird, but happens.
  866. strictFail(parser, 'Non-whitespace before first tag.')
  867. parser.textNode = c
  868. parser.state = S.TEXT
  869. }
  870. }
  871. function charAt (chunk, i) {
  872. var result = ''
  873. if (i < chunk.length) {
  874. result = chunk.charAt(i)
  875. }
  876. return result
  877. }
  878. function write (chunk) {
  879. var parser = this
  880. if (this.error) {
  881. throw this.error
  882. }
  883. if (parser.closed) {
  884. return error(parser,
  885. 'Cannot write after close. Assign an onready handler.')
  886. }
  887. if (chunk === null) {
  888. return end(parser)
  889. }
  890. if (typeof chunk === 'object') {
  891. chunk = chunk.toString()
  892. }
  893. var i = 0
  894. var c = ''
  895. while (true) {
  896. c = charAt(chunk, i++)
  897. parser.c = c
  898. if (!c) {
  899. break
  900. }
  901. if (parser.trackPosition) {
  902. parser.position++
  903. if (c === '\n') {
  904. parser.line++
  905. parser.column = 0
  906. } else {
  907. parser.column++
  908. }
  909. }
  910. switch (parser.state) {
  911. case S.BEGIN:
  912. parser.state = S.BEGIN_WHITESPACE
  913. if (c === '\uFEFF') {
  914. continue
  915. }
  916. beginWhiteSpace(parser, c)
  917. continue
  918. case S.BEGIN_WHITESPACE:
  919. beginWhiteSpace(parser, c)
  920. continue
  921. case S.TEXT:
  922. if (parser.sawRoot && !parser.closedRoot) {
  923. var starti = i - 1
  924. while (c && c !== '<' && c !== '&') {
  925. c = charAt(chunk, i++)
  926. if (c && parser.trackPosition) {
  927. parser.position++
  928. if (c === '\n') {
  929. parser.line++
  930. parser.column = 0
  931. } else {
  932. parser.column++
  933. }
  934. }
  935. }
  936. parser.textNode += chunk.substring(starti, i - 1)
  937. }
  938. if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
  939. parser.state = S.OPEN_WAKA
  940. parser.startTagPosition = parser.position
  941. } else {
  942. if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) {
  943. strictFail(parser, 'Text data outside of root node.')
  944. }
  945. if (c === '&') {
  946. parser.state = S.TEXT_ENTITY
  947. } else {
  948. parser.textNode += c
  949. }
  950. }
  951. continue
  952. case S.SCRIPT:
  953. // only non-strict
  954. if (c === '<') {
  955. parser.state = S.SCRIPT_ENDING
  956. } else {
  957. parser.script += c
  958. }
  959. continue
  960. case S.SCRIPT_ENDING:
  961. if (c === '/') {
  962. parser.state = S.CLOSE_TAG
  963. } else {
  964. parser.script += '<' + c
  965. parser.state = S.SCRIPT
  966. }
  967. continue
  968. case S.OPEN_WAKA:
  969. // either a /, ?, !, or text is coming next.
  970. if (c === '!') {
  971. parser.state = S.SGML_DECL
  972. parser.sgmlDecl = ''
  973. } else if (isWhitespace(c)) {
  974. // wait for it...
  975. } else if (isMatch(nameStart, c)) {
  976. parser.state = S.OPEN_TAG
  977. parser.tagName = c
  978. } else if (c === '/') {
  979. parser.state = S.CLOSE_TAG
  980. parser.tagName = ''
  981. } else if (c === '?') {
  982. parser.state = S.PROC_INST
  983. parser.procInstName = parser.procInstBody = ''
  984. } else {
  985. strictFail(parser, 'Unencoded <')
  986. // if there was some whitespace, then add that in.
  987. if (parser.startTagPosition + 1 < parser.position) {
  988. var pad = parser.position - parser.startTagPosition
  989. c = new Array(pad).join(' ') + c
  990. }
  991. parser.textNode += '<' + c
  992. parser.state = S.TEXT
  993. }
  994. continue
  995. case S.SGML_DECL:
  996. if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
  997. emitNode(parser, 'onopencdata')
  998. parser.state = S.CDATA
  999. parser.sgmlDecl = ''
  1000. parser.cdata = ''
  1001. } else if (parser.sgmlDecl + c === '--') {
  1002. parser.state = S.COMMENT
  1003. parser.comment = ''
  1004. parser.sgmlDecl = ''
  1005. } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
  1006. parser.state = S.DOCTYPE
  1007. if (parser.doctype || parser.sawRoot) {
  1008. strictFail(parser,
  1009. 'Inappropriately located doctype declaration')
  1010. }
  1011. parser.doctype = ''
  1012. parser.sgmlDecl = ''
  1013. } else if (c === '>') {
  1014. emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
  1015. parser.sgmlDecl = ''
  1016. parser.state = S.TEXT
  1017. } else if (isQuote(c)) {
  1018. parser.state = S.SGML_DECL_QUOTED
  1019. parser.sgmlDecl += c
  1020. } else {
  1021. parser.sgmlDecl += c
  1022. }
  1023. continue
  1024. case S.SGML_DECL_QUOTED:
  1025. if (c === parser.q) {
  1026. parser.state = S.SGML_DECL
  1027. parser.q = ''
  1028. }
  1029. parser.sgmlDecl += c
  1030. continue
  1031. case S.DOCTYPE:
  1032. if (c === '>') {
  1033. parser.state = S.TEXT
  1034. emitNode(parser, 'ondoctype', parser.doctype)
  1035. parser.doctype = true // just remember that we saw it.
  1036. } else {
  1037. parser.doctype += c
  1038. if (c === '[') {
  1039. parser.state = S.DOCTYPE_DTD
  1040. } else if (isQuote(c)) {
  1041. parser.state = S.DOCTYPE_QUOTED
  1042. parser.q = c
  1043. }
  1044. }
  1045. continue
  1046. case S.DOCTYPE_QUOTED:
  1047. parser.doctype += c
  1048. if (c === parser.q) {
  1049. parser.q = ''
  1050. parser.state = S.DOCTYPE
  1051. }
  1052. continue
  1053. case S.DOCTYPE_DTD:
  1054. parser.doctype += c
  1055. if (c === ']') {
  1056. parser.state = S.DOCTYPE
  1057. } else if (isQuote(c)) {
  1058. parser.state = S.DOCTYPE_DTD_QUOTED
  1059. parser.q = c
  1060. }
  1061. continue
  1062. case S.DOCTYPE_DTD_QUOTED:
  1063. parser.doctype += c
  1064. if (c === parser.q) {
  1065. parser.state = S.DOCTYPE_DTD
  1066. parser.q = ''
  1067. }
  1068. continue
  1069. case S.COMMENT:
  1070. if (c === '-') {
  1071. parser.state = S.COMMENT_ENDING
  1072. } else {
  1073. parser.comment += c
  1074. }
  1075. continue
  1076. case S.COMMENT_ENDING:
  1077. if (c === '-') {
  1078. parser.state = S.COMMENT_ENDED
  1079. parser.comment = textopts(parser.opt, parser.comment)
  1080. if (parser.comment) {
  1081. emitNode(parser, 'oncomment', parser.comment)
  1082. }
  1083. parser.comment = ''
  1084. } else {
  1085. parser.comment += '-' + c
  1086. parser.state = S.COMMENT
  1087. }
  1088. continue
  1089. case S.COMMENT_ENDED:
  1090. if (c !== '>') {
  1091. strictFail(parser, 'Malformed comment')
  1092. // allow <!-- blah -- bloo --> in non-strict mode,
  1093. // which is a comment of " blah -- bloo "
  1094. parser.comment += '--' + c
  1095. parser.state = S.COMMENT
  1096. } else {
  1097. parser.state = S.TEXT
  1098. }
  1099. continue
  1100. case S.CDATA:
  1101. if (c === ']') {
  1102. parser.state = S.CDATA_ENDING
  1103. } else {
  1104. parser.cdata += c
  1105. }
  1106. continue
  1107. case S.CDATA_ENDING:
  1108. if (c === ']') {
  1109. parser.state = S.CDATA_ENDING_2
  1110. } else {
  1111. parser.cdata += ']' + c
  1112. parser.state = S.CDATA
  1113. }
  1114. continue
  1115. case S.CDATA_ENDING_2:
  1116. if (c === '>') {
  1117. if (parser.cdata) {
  1118. emitNode(parser, 'oncdata', parser.cdata)
  1119. }
  1120. emitNode(parser, 'onclosecdata')
  1121. parser.cdata = ''
  1122. parser.state = S.TEXT
  1123. } else if (c === ']') {
  1124. parser.cdata += ']'
  1125. } else {
  1126. parser.cdata += ']]' + c
  1127. parser.state = S.CDATA
  1128. }
  1129. continue
  1130. case S.PROC_INST:
  1131. if (c === '?') {
  1132. parser.state = S.PROC_INST_ENDING
  1133. } else if (isWhitespace(c)) {
  1134. parser.state = S.PROC_INST_BODY
  1135. } else {
  1136. parser.procInstName += c
  1137. }
  1138. continue
  1139. case S.PROC_INST_BODY:
  1140. if (!parser.procInstBody && isWhitespace(c)) {
  1141. continue
  1142. } else if (c === '?') {
  1143. parser.state = S.PROC_INST_ENDING
  1144. } else {
  1145. parser.procInstBody += c
  1146. }
  1147. continue
  1148. case S.PROC_INST_ENDING:
  1149. if (c === '>') {
  1150. emitNode(parser, 'onprocessinginstruction', {
  1151. name: parser.procInstName,
  1152. body: parser.procInstBody
  1153. })
  1154. parser.procInstName = parser.procInstBody = ''
  1155. parser.state = S.TEXT
  1156. } else {
  1157. parser.procInstBody += '?' + c
  1158. parser.state = S.PROC_INST_BODY
  1159. }
  1160. continue
  1161. case S.OPEN_TAG:
  1162. if (isMatch(nameBody, c)) {
  1163. parser.tagName += c
  1164. } else {
  1165. newTag(parser)
  1166. if (c === '>') {
  1167. openTag(parser)
  1168. } else if (c === '/') {
  1169. parser.state = S.OPEN_TAG_SLASH
  1170. } else {
  1171. if (!isWhitespace(c)) {
  1172. strictFail(parser, 'Invalid character in tag name')
  1173. }
  1174. parser.state = S.ATTRIB
  1175. }
  1176. }
  1177. continue
  1178. case S.OPEN_TAG_SLASH:
  1179. if (c === '>') {
  1180. openTag(parser, true)
  1181. closeTag(parser)
  1182. } else {
  1183. strictFail(parser, 'Forward-slash in opening tag not followed by >')
  1184. parser.state = S.ATTRIB
  1185. }
  1186. continue
  1187. case S.ATTRIB:
  1188. // haven't read the attribute name yet.
  1189. if (isWhitespace(c)) {
  1190. continue
  1191. } else if (c === '>') {
  1192. openTag(parser)
  1193. } else if (c === '/') {
  1194. parser.state = S.OPEN_TAG_SLASH
  1195. } else if (isMatch(nameStart, c)) {
  1196. parser.attribName = c
  1197. parser.attribValue = ''
  1198. parser.state = S.ATTRIB_NAME
  1199. } else {
  1200. strictFail(parser, 'Invalid attribute name')
  1201. }
  1202. continue
  1203. case S.ATTRIB_NAME:
  1204. if (c === '=') {
  1205. parser.state = S.ATTRIB_VALUE
  1206. } else if (c === '>') {
  1207. strictFail(parser, 'Attribute without value')
  1208. parser.attribValue = parser.attribName
  1209. attrib(parser)
  1210. openTag(parser)
  1211. } else if (isWhitespace(c)) {
  1212. parser.state = S.ATTRIB_NAME_SAW_WHITE
  1213. } else if (isMatch(nameBody, c)) {
  1214. parser.attribName += c
  1215. } else {
  1216. strictFail(parser, 'Invalid attribute name')
  1217. }
  1218. continue
  1219. case S.ATTRIB_NAME_SAW_WHITE:
  1220. if (c === '=') {
  1221. parser.state = S.ATTRIB_VALUE
  1222. } else if (isWhitespace(c)) {
  1223. continue
  1224. } else {
  1225. strictFail(parser, 'Attribute without value')
  1226. parser.tag.attributes[parser.attribName] = ''
  1227. parser.attribValue = ''
  1228. emitNode(parser, 'onattribute', {
  1229. name: parser.attribName,
  1230. value: ''
  1231. })
  1232. parser.attribName = ''
  1233. if (c === '>') {
  1234. openTag(parser)
  1235. } else if (isMatch(nameStart, c)) {
  1236. parser.attribName = c
  1237. parser.state = S.ATTRIB_NAME
  1238. } else {
  1239. strictFail(parser, 'Invalid attribute name')
  1240. parser.state = S.ATTRIB
  1241. }
  1242. }
  1243. continue
  1244. case S.ATTRIB_VALUE:
  1245. if (isWhitespace(c)) {
  1246. continue
  1247. } else if (isQuote(c)) {
  1248. parser.q = c
  1249. parser.state = S.ATTRIB_VALUE_QUOTED
  1250. } else {
  1251. strictFail(parser, 'Unquoted attribute value')
  1252. parser.state = S.ATTRIB_VALUE_UNQUOTED
  1253. parser.attribValue = c
  1254. }
  1255. continue
  1256. case S.ATTRIB_VALUE_QUOTED:
  1257. if (c !== parser.q) {
  1258. if (c === '&') {
  1259. parser.state = S.ATTRIB_VALUE_ENTITY_Q
  1260. } else {
  1261. parser.attribValue += c
  1262. }
  1263. continue
  1264. }
  1265. attrib(parser)
  1266. parser.q = ''
  1267. parser.state = S.ATTRIB_VALUE_CLOSED
  1268. continue
  1269. case S.ATTRIB_VALUE_CLOSED:
  1270. if (isWhitespace(c)) {
  1271. parser.state = S.ATTRIB
  1272. } else if (c === '>') {
  1273. openTag(parser)
  1274. } else if (c === '/') {
  1275. parser.state = S.OPEN_TAG_SLASH
  1276. } else if (isMatch(nameStart, c)) {
  1277. strictFail(parser, 'No whitespace between attributes')
  1278. parser.attribName = c
  1279. parser.attribValue = ''
  1280. parser.state = S.ATTRIB_NAME
  1281. } else {
  1282. strictFail(parser, 'Invalid attribute name')
  1283. }
  1284. continue
  1285. case S.ATTRIB_VALUE_UNQUOTED:
  1286. if (!isAttribEnd(c)) {
  1287. if (c === '&') {
  1288. parser.state = S.ATTRIB_VALUE_ENTITY_U
  1289. } else {
  1290. parser.attribValue += c
  1291. }
  1292. continue
  1293. }
  1294. attrib(parser)
  1295. if (c === '>') {
  1296. openTag(parser)
  1297. } else {
  1298. parser.state = S.ATTRIB
  1299. }
  1300. continue
  1301. case S.CLOSE_TAG:
  1302. if (!parser.tagName) {
  1303. if (isWhitespace(c)) {
  1304. continue
  1305. } else if (notMatch(nameStart, c)) {
  1306. if (parser.script) {
  1307. parser.script += '</' + c
  1308. parser.state = S.SCRIPT
  1309. } else {
  1310. strictFail(parser, 'Invalid tagname in closing tag.')
  1311. }
  1312. } else {
  1313. parser.tagName = c
  1314. }
  1315. } else if (c === '>') {
  1316. closeTag(parser)
  1317. } else if (isMatch(nameBody, c)) {
  1318. parser.tagName += c
  1319. } else if (parser.script) {
  1320. parser.script += '</' + parser.tagName
  1321. parser.tagName = ''
  1322. parser.state = S.SCRIPT
  1323. } else {
  1324. if (!isWhitespace(c)) {
  1325. strictFail(parser, 'Invalid tagname in closing tag')
  1326. }
  1327. parser.state = S.CLOSE_TAG_SAW_WHITE
  1328. }
  1329. continue
  1330. case S.CLOSE_TAG_SAW_WHITE:
  1331. if (isWhitespace(c)) {
  1332. continue
  1333. }
  1334. if (c === '>') {
  1335. closeTag(parser)
  1336. } else {
  1337. strictFail(parser, 'Invalid characters in closing tag')
  1338. }
  1339. continue
  1340. case S.TEXT_ENTITY:
  1341. case S.ATTRIB_VALUE_ENTITY_Q:
  1342. case S.ATTRIB_VALUE_ENTITY_U:
  1343. var returnState
  1344. var buffer
  1345. switch (parser.state) {
  1346. case S.TEXT_ENTITY:
  1347. returnState = S.TEXT
  1348. buffer = 'textNode'
  1349. break
  1350. case S.ATTRIB_VALUE_ENTITY_Q:
  1351. returnState = S.ATTRIB_VALUE_QUOTED
  1352. buffer = 'attribValue'
  1353. break
  1354. case S.ATTRIB_VALUE_ENTITY_U:
  1355. returnState = S.ATTRIB_VALUE_UNQUOTED
  1356. buffer = 'attribValue'
  1357. break
  1358. }
  1359. if (c === ';') {
  1360. parser[buffer] += parseEntity(parser)
  1361. parser.entity = ''
  1362. parser.state = returnState
  1363. } else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
  1364. parser.entity += c
  1365. } else {
  1366. strictFail(parser, 'Invalid character in entity name')
  1367. parser[buffer] += '&' + parser.entity + c
  1368. parser.entity = ''
  1369. parser.state = returnState
  1370. }
  1371. continue
  1372. default:
  1373. throw new Error(parser, 'Unknown state: ' + parser.state)
  1374. }
  1375. } // while
  1376. if (parser.position >= parser.bufferCheckPosition) {
  1377. checkBufferLength(parser)
  1378. }
  1379. return parser
  1380. }
  1381. /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
  1382. /* istanbul ignore next */
  1383. if (!String.fromCodePoint) {
  1384. (function () {
  1385. var stringFromCharCode = String.fromCharCode
  1386. var floor = Math.floor
  1387. var fromCodePoint = function () {
  1388. var MAX_SIZE = 0x4000
  1389. var codeUnits = []
  1390. var highSurrogate
  1391. var lowSurrogate
  1392. var index = -1
  1393. var length = arguments.length
  1394. if (!length) {
  1395. return ''
  1396. }
  1397. var result = ''
  1398. while (++index < length) {
  1399. var codePoint = Number(arguments[index])
  1400. if (
  1401. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  1402. codePoint < 0 || // not a valid Unicode code point
  1403. codePoint > 0x10FFFF || // not a valid Unicode code point
  1404. floor(codePoint) !== codePoint // not an integer
  1405. ) {
  1406. throw RangeError('Invalid code point: ' + codePoint)
  1407. }
  1408. if (codePoint <= 0xFFFF) { // BMP code point
  1409. codeUnits.push(codePoint)
  1410. } else { // Astral code point; split in surrogate halves
  1411. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  1412. codePoint -= 0x10000
  1413. highSurrogate = (codePoint >> 10) + 0xD800
  1414. lowSurrogate = (codePoint % 0x400) + 0xDC00
  1415. codeUnits.push(highSurrogate, lowSurrogate)
  1416. }
  1417. if (index + 1 === length || codeUnits.length > MAX_SIZE) {
  1418. result += stringFromCharCode.apply(null, codeUnits)
  1419. codeUnits.length = 0
  1420. }
  1421. }
  1422. return result
  1423. }
  1424. /* istanbul ignore next */
  1425. if (Object.defineProperty) {
  1426. Object.defineProperty(String, 'fromCodePoint', {
  1427. value: fromCodePoint,
  1428. configurable: true,
  1429. writable: true
  1430. })
  1431. } else {
  1432. String.fromCodePoint = fromCodePoint
  1433. }
  1434. }())
  1435. }
  1436. })(typeof exports === 'undefined' ? this.sax = {} : exports)