saxes.js 73 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. const ed5 = require("xmlchars/xml/1.0/ed5");
  4. const ed2 = require("xmlchars/xml/1.1/ed2");
  5. const NSed3 = require("xmlchars/xmlns/1.0/ed3");
  6. var isS = ed5.isS;
  7. var isChar10 = ed5.isChar;
  8. var isNameStartChar = ed5.isNameStartChar;
  9. var isNameChar = ed5.isNameChar;
  10. var S_LIST = ed5.S_LIST;
  11. var NAME_RE = ed5.NAME_RE;
  12. var isChar11 = ed2.isChar;
  13. var isNCNameStartChar = NSed3.isNCNameStartChar;
  14. var isNCNameChar = NSed3.isNCNameChar;
  15. var NC_NAME_RE = NSed3.NC_NAME_RE;
  16. const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
  17. const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
  18. const rootNS = {
  19. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  20. __proto__: null,
  21. xml: XML_NAMESPACE,
  22. xmlns: XMLNS_NAMESPACE,
  23. };
  24. const XML_ENTITIES = {
  25. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  26. __proto__: null,
  27. amp: "&",
  28. gt: ">",
  29. lt: "<",
  30. quot: "\"",
  31. apos: "'",
  32. };
  33. // EOC: end-of-chunk
  34. const EOC = -1;
  35. const NL_LIKE = -2;
  36. const S_BEGIN = 0; // Initial state.
  37. const S_BEGIN_WHITESPACE = 1; // leading whitespace
  38. const S_DOCTYPE = 2; // <!DOCTYPE
  39. const S_DOCTYPE_QUOTE = 3; // <!DOCTYPE "//blah
  40. const S_DTD = 4; // <!DOCTYPE "//blah" [ ...
  41. const S_DTD_QUOTED = 5; // <!DOCTYPE "//blah" [ "foo
  42. const S_DTD_OPEN_WAKA = 6;
  43. const S_DTD_OPEN_WAKA_BANG = 7;
  44. const S_DTD_COMMENT = 8; // <!--
  45. const S_DTD_COMMENT_ENDING = 9; // <!-- blah -
  46. const S_DTD_COMMENT_ENDED = 10; // <!-- blah --
  47. const S_DTD_PI = 11; // <?
  48. const S_DTD_PI_ENDING = 12; // <?hi "there" ?
  49. const S_TEXT = 13; // general stuff
  50. const S_ENTITY = 14; // &amp and such
  51. const S_OPEN_WAKA = 15; // <
  52. const S_OPEN_WAKA_BANG = 16; // <!...
  53. const S_COMMENT = 17; // <!--
  54. const S_COMMENT_ENDING = 18; // <!-- blah -
  55. const S_COMMENT_ENDED = 19; // <!-- blah --
  56. const S_CDATA = 20; // <![CDATA[ something
  57. const S_CDATA_ENDING = 21; // ]
  58. const S_CDATA_ENDING_2 = 22; // ]]
  59. const S_PI_FIRST_CHAR = 23; // <?hi, first char
  60. const S_PI_REST = 24; // <?hi, rest of the name
  61. const S_PI_BODY = 25; // <?hi there
  62. const S_PI_ENDING = 26; // <?hi "there" ?
  63. const S_XML_DECL_NAME_START = 27; // <?xml
  64. const S_XML_DECL_NAME = 28; // <?xml foo
  65. const S_XML_DECL_EQ = 29; // <?xml foo=
  66. const S_XML_DECL_VALUE_START = 30; // <?xml foo=
  67. const S_XML_DECL_VALUE = 31; // <?xml foo="bar"
  68. const S_XML_DECL_SEPARATOR = 32; // <?xml foo="bar"
  69. const S_XML_DECL_ENDING = 33; // <?xml ... ?
  70. const S_OPEN_TAG = 34; // <strong
  71. const S_OPEN_TAG_SLASH = 35; // <strong /
  72. const S_ATTRIB = 36; // <a
  73. const S_ATTRIB_NAME = 37; // <a foo
  74. const S_ATTRIB_NAME_SAW_WHITE = 38; // <a foo _
  75. const S_ATTRIB_VALUE = 39; // <a foo=
  76. const S_ATTRIB_VALUE_QUOTED = 40; // <a foo="bar
  77. const S_ATTRIB_VALUE_CLOSED = 41; // <a foo="bar"
  78. const S_ATTRIB_VALUE_UNQUOTED = 42; // <a foo=bar
  79. const S_CLOSE_TAG = 43; // </a
  80. const S_CLOSE_TAG_SAW_WHITE = 44; // </a >
  81. const TAB = 9;
  82. const NL = 0xA;
  83. const CR = 0xD;
  84. const SPACE = 0x20;
  85. const BANG = 0x21;
  86. const DQUOTE = 0x22;
  87. const AMP = 0x26;
  88. const SQUOTE = 0x27;
  89. const MINUS = 0x2D;
  90. const FORWARD_SLASH = 0x2F;
  91. const SEMICOLON = 0x3B;
  92. const LESS = 0x3C;
  93. const EQUAL = 0x3D;
  94. const GREATER = 0x3E;
  95. const QUESTION = 0x3F;
  96. const OPEN_BRACKET = 0x5B;
  97. const CLOSE_BRACKET = 0x5D;
  98. const NEL = 0x85;
  99. const LS = 0x2028; // Line Separator
  100. const isQuote = (c) => c === DQUOTE || c === SQUOTE;
  101. const QUOTES = [DQUOTE, SQUOTE];
  102. const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER];
  103. const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET];
  104. const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST];
  105. const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS];
  106. function nsPairCheck(parser, prefix, uri) {
  107. switch (prefix) {
  108. case "xml":
  109. if (uri !== XML_NAMESPACE) {
  110. parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
  111. }
  112. break;
  113. case "xmlns":
  114. if (uri !== XMLNS_NAMESPACE) {
  115. parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
  116. }
  117. break;
  118. default:
  119. }
  120. switch (uri) {
  121. case XMLNS_NAMESPACE:
  122. parser.fail(prefix === "" ?
  123. `the default namespace may not be set to ${uri}.` :
  124. `may not assign a prefix (even "xmlns") to the URI \
  125. ${XMLNS_NAMESPACE}.`);
  126. break;
  127. case XML_NAMESPACE:
  128. switch (prefix) {
  129. case "xml":
  130. // Assinging the XML namespace to "xml" is fine.
  131. break;
  132. case "":
  133. parser.fail(`the default namespace may not be set to ${uri}.`);
  134. break;
  135. default:
  136. parser.fail("may not assign the xml namespace to another prefix.");
  137. }
  138. break;
  139. default:
  140. }
  141. }
  142. function nsMappingCheck(parser, mapping) {
  143. for (const local of Object.keys(mapping)) {
  144. nsPairCheck(parser, local, mapping[local]);
  145. }
  146. }
  147. const isNCName = (name) => NC_NAME_RE.test(name);
  148. const isName = (name) => NAME_RE.test(name);
  149. const FORBIDDEN_START = 0;
  150. const FORBIDDEN_BRACKET = 1;
  151. const FORBIDDEN_BRACKET_BRACKET = 2;
  152. /**
  153. * The list of supported events.
  154. */
  155. exports.EVENTS = [
  156. "xmldecl",
  157. "text",
  158. "processinginstruction",
  159. "doctype",
  160. "comment",
  161. "opentagstart",
  162. "attribute",
  163. "opentag",
  164. "closetag",
  165. "cdata",
  166. "error",
  167. "end",
  168. "ready",
  169. ];
  170. const EVENT_NAME_TO_HANDLER_NAME = {
  171. xmldecl: "xmldeclHandler",
  172. text: "textHandler",
  173. processinginstruction: "piHandler",
  174. doctype: "doctypeHandler",
  175. comment: "commentHandler",
  176. opentagstart: "openTagStartHandler",
  177. attribute: "attributeHandler",
  178. opentag: "openTagHandler",
  179. closetag: "closeTagHandler",
  180. cdata: "cdataHandler",
  181. error: "errorHandler",
  182. end: "endHandler",
  183. ready: "readyHandler",
  184. };
  185. class SaxesParser {
  186. /**
  187. * @param opt The parser options.
  188. */
  189. constructor(opt) {
  190. this.opt = opt !== null && opt !== void 0 ? opt : {};
  191. this.fragmentOpt = !!this.opt.fragment;
  192. const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns;
  193. this.trackPosition = this.opt.position !== false;
  194. this.fileName = this.opt.fileName;
  195. if (xmlnsOpt) {
  196. // This is the function we use to perform name checks on PIs and entities.
  197. // When namespaces are used, colons are not allowed in PI target names or
  198. // entity names. So the check depends on whether namespaces are used. See:
  199. //
  200. // https://www.w3.org/XML/xml-names-19990114-errata.html
  201. // NE08
  202. //
  203. this.nameStartCheck = isNCNameStartChar;
  204. this.nameCheck = isNCNameChar;
  205. this.isName = isNCName;
  206. // eslint-disable-next-line @typescript-eslint/unbound-method
  207. this.processAttribs = this.processAttribsNS;
  208. // eslint-disable-next-line @typescript-eslint/unbound-method
  209. this.pushAttrib = this.pushAttribNS;
  210. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  211. this.ns = Object.assign({ __proto__: null }, rootNS);
  212. const additional = this.opt.additionalNamespaces;
  213. if (additional != null) {
  214. nsMappingCheck(this, additional);
  215. Object.assign(this.ns, additional);
  216. }
  217. }
  218. else {
  219. this.nameStartCheck = isNameStartChar;
  220. this.nameCheck = isNameChar;
  221. this.isName = isName;
  222. // eslint-disable-next-line @typescript-eslint/unbound-method
  223. this.processAttribs = this.processAttribsPlain;
  224. // eslint-disable-next-line @typescript-eslint/unbound-method
  225. this.pushAttrib = this.pushAttribPlain;
  226. }
  227. //
  228. // The order of the members in this table needs to correspond to the state
  229. // numbers given to the states that correspond to the methods being recorded
  230. // here.
  231. //
  232. this.stateTable = [
  233. /* eslint-disable @typescript-eslint/unbound-method */
  234. this.sBegin,
  235. this.sBeginWhitespace,
  236. this.sDoctype,
  237. this.sDoctypeQuote,
  238. this.sDTD,
  239. this.sDTDQuoted,
  240. this.sDTDOpenWaka,
  241. this.sDTDOpenWakaBang,
  242. this.sDTDComment,
  243. this.sDTDCommentEnding,
  244. this.sDTDCommentEnded,
  245. this.sDTDPI,
  246. this.sDTDPIEnding,
  247. this.sText,
  248. this.sEntity,
  249. this.sOpenWaka,
  250. this.sOpenWakaBang,
  251. this.sComment,
  252. this.sCommentEnding,
  253. this.sCommentEnded,
  254. this.sCData,
  255. this.sCDataEnding,
  256. this.sCDataEnding2,
  257. this.sPIFirstChar,
  258. this.sPIRest,
  259. this.sPIBody,
  260. this.sPIEnding,
  261. this.sXMLDeclNameStart,
  262. this.sXMLDeclName,
  263. this.sXMLDeclEq,
  264. this.sXMLDeclValueStart,
  265. this.sXMLDeclValue,
  266. this.sXMLDeclSeparator,
  267. this.sXMLDeclEnding,
  268. this.sOpenTag,
  269. this.sOpenTagSlash,
  270. this.sAttrib,
  271. this.sAttribName,
  272. this.sAttribNameSawWhite,
  273. this.sAttribValue,
  274. this.sAttribValueQuoted,
  275. this.sAttribValueClosed,
  276. this.sAttribValueUnquoted,
  277. this.sCloseTag,
  278. this.sCloseTagSawWhite,
  279. ];
  280. this._init();
  281. }
  282. /**
  283. * Indicates whether or not the parser is closed. If ``true``, wait for
  284. * the ``ready`` event to write again.
  285. */
  286. get closed() {
  287. return this._closed;
  288. }
  289. _init() {
  290. var _a;
  291. this.openWakaBang = "";
  292. this.text = "";
  293. this.name = "";
  294. this.piTarget = "";
  295. this.entity = "";
  296. this.q = null;
  297. this.tags = [];
  298. this.tag = null;
  299. this.topNS = null;
  300. this.chunk = "";
  301. this.chunkPosition = 0;
  302. this.i = 0;
  303. this.prevI = 0;
  304. this.carriedFromPrevious = undefined;
  305. this.forbiddenState = FORBIDDEN_START;
  306. this.attribList = [];
  307. // The logic is organized so as to minimize the need to check
  308. // this.opt.fragment while parsing.
  309. const { fragmentOpt } = this;
  310. this.state = fragmentOpt ? S_TEXT : S_BEGIN;
  311. // We want these to be all true if we are dealing with a fragment.
  312. this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot =
  313. this.sawRoot = fragmentOpt;
  314. // An XML declaration is intially possible only when parsing whole
  315. // documents.
  316. this.xmlDeclPossible = !fragmentOpt;
  317. this.xmlDeclExpects = ["version"];
  318. this.entityReturnState = undefined;
  319. let { defaultXMLVersion } = this.opt;
  320. if (defaultXMLVersion === undefined) {
  321. if (this.opt.forceXMLVersion === true) {
  322. throw new Error("forceXMLVersion set but defaultXMLVersion is not set");
  323. }
  324. defaultXMLVersion = "1.0";
  325. }
  326. this.setXMLVersion(defaultXMLVersion);
  327. this.positionAtNewLine = 0;
  328. this.doctype = false;
  329. this._closed = false;
  330. this.xmlDecl = {
  331. version: undefined,
  332. encoding: undefined,
  333. standalone: undefined,
  334. };
  335. this.line = 1;
  336. this.column = 0;
  337. this.ENTITIES = Object.create(XML_ENTITIES);
  338. // eslint-disable-next-line no-unused-expressions
  339. (_a = this.readyHandler) === null || _a === void 0 ? void 0 : _a.call(this);
  340. }
  341. /**
  342. * The stream position the parser is currently looking at. This field is
  343. * zero-based.
  344. *
  345. * This field is not based on counting Unicode characters but is to be
  346. * interpreted as a plain index into a JavaScript string.
  347. */
  348. get position() {
  349. return this.chunkPosition + this.i;
  350. }
  351. /**
  352. * The column number of the next character to be read by the parser. *
  353. * This field is zero-based. (The first column in a line is 0.)
  354. *
  355. * This field reports the index at which the next character would be in the
  356. * line if the line were represented as a JavaScript string. Note that this
  357. * *can* be different to a count based on the number of *Unicode characters*
  358. * due to how JavaScript handles astral plane characters.
  359. *
  360. * See [[column]] for a number that corresponds to a count of Unicode
  361. * characters.
  362. */
  363. get columnIndex() {
  364. return this.position - this.positionAtNewLine;
  365. }
  366. /**
  367. * Set an event listener on an event. The parser supports one handler per
  368. * event type. If you try to set an event handler over an existing handler,
  369. * the old handler is silently overwritten.
  370. *
  371. * @param name The event to listen to.
  372. *
  373. * @param handler The handler to set.
  374. */
  375. on(name, handler) {
  376. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  377. this[EVENT_NAME_TO_HANDLER_NAME[name]] = handler;
  378. }
  379. /**
  380. * Unset an event handler.
  381. *
  382. * @parma name The event to stop listening to.
  383. */
  384. off(name) {
  385. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  386. this[EVENT_NAME_TO_HANDLER_NAME[name]] = undefined;
  387. }
  388. /**
  389. * Make an error object. The error object will have a message that contains
  390. * the ``fileName`` option passed at the creation of the parser. If position
  391. * tracking was turned on, it will also have line and column number
  392. * information.
  393. *
  394. * @param message The message describing the error to report.
  395. *
  396. * @returns An error object with a properly formatted message.
  397. */
  398. makeError(message) {
  399. var _a;
  400. let msg = (_a = this.fileName) !== null && _a !== void 0 ? _a : "";
  401. if (this.trackPosition) {
  402. if (msg.length > 0) {
  403. msg += ":";
  404. }
  405. msg += `${this.line}:${this.column}`;
  406. }
  407. if (msg.length > 0) {
  408. msg += ": ";
  409. }
  410. return new Error(msg + message);
  411. }
  412. /**
  413. * Report a parsing error. This method is made public so that client code may
  414. * check for issues that are outside the scope of this project and can report
  415. * errors.
  416. *
  417. * @param message The error to report.
  418. *
  419. * @returns this
  420. */
  421. fail(message) {
  422. const err = this.makeError(message);
  423. const handler = this.errorHandler;
  424. if (handler === undefined) {
  425. throw err;
  426. }
  427. else {
  428. handler(err);
  429. }
  430. return this;
  431. }
  432. /**
  433. * Write a XML data to the parser.
  434. *
  435. * @param chunk The XML data to write.
  436. *
  437. * @returns this
  438. */
  439. write(chunk) {
  440. if (this.closed) {
  441. return this.fail("cannot write after close; assign an onready handler.");
  442. }
  443. let end = false;
  444. if (chunk === null) {
  445. // We cannot return immediately because carriedFromPrevious may need
  446. // processing.
  447. end = true;
  448. chunk = "";
  449. }
  450. else if (typeof chunk === "object") {
  451. chunk = chunk.toString();
  452. }
  453. // We checked if performing a pre-decomposition of the string into an array
  454. // of single complete characters (``Array.from(chunk)``) would be faster
  455. // than the current repeated calls to ``charCodeAt``. As of August 2018, it
  456. // isn't. (There may be Node-specific code that would perform faster than
  457. // ``Array.from`` but don't want to be dependent on Node.)
  458. if (this.carriedFromPrevious !== undefined) {
  459. // The previous chunk had char we must carry over.
  460. chunk = `${this.carriedFromPrevious}${chunk}`;
  461. this.carriedFromPrevious = undefined;
  462. }
  463. let limit = chunk.length;
  464. const lastCode = chunk.charCodeAt(limit - 1);
  465. if (!end &&
  466. // A trailing CR or surrogate must be carried over to the next
  467. // chunk.
  468. (lastCode === CR || (lastCode >= 0xD800 && lastCode <= 0xDBFF))) {
  469. // The chunk ends with a character that must be carried over. We cannot
  470. // know how to handle it until we get the next chunk or the end of the
  471. // stream. So save it for later.
  472. this.carriedFromPrevious = chunk[limit - 1];
  473. limit--;
  474. chunk = chunk.slice(0, limit);
  475. }
  476. const { stateTable } = this;
  477. this.chunk = chunk;
  478. this.i = 0;
  479. while (this.i < limit) {
  480. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  481. stateTable[this.state].call(this);
  482. }
  483. this.chunkPosition += limit;
  484. return end ? this.end() : this;
  485. }
  486. /**
  487. * Close the current stream. Perform final well-formedness checks and reset
  488. * the parser tstate.
  489. *
  490. * @returns this
  491. */
  492. close() {
  493. return this.write(null);
  494. }
  495. /**
  496. * Get a single code point out of the current chunk. This updates the current
  497. * position if we do position tracking.
  498. *
  499. * This is the algorithm to use for XML 1.0.
  500. *
  501. * @returns The character read.
  502. */
  503. getCode10() {
  504. const { chunk, i } = this;
  505. this.prevI = i;
  506. // Yes, we do this instead of doing this.i++. Doing it this way, we do not
  507. // read this.i again, which is a bit faster.
  508. this.i = i + 1;
  509. if (i >= chunk.length) {
  510. return EOC;
  511. }
  512. // Using charCodeAt and handling the surrogates ourselves is faster
  513. // than using codePointAt.
  514. const code = chunk.charCodeAt(i);
  515. this.column++;
  516. if (code < 0xD800) {
  517. if (code >= SPACE || code === TAB) {
  518. return code;
  519. }
  520. switch (code) {
  521. case NL:
  522. this.line++;
  523. this.column = 0;
  524. this.positionAtNewLine = this.position;
  525. return NL;
  526. case CR:
  527. // We may get NaN if we read past the end of the chunk, which is fine.
  528. if (chunk.charCodeAt(i + 1) === NL) {
  529. // A \r\n sequence is converted to \n so we have to skip over the
  530. // next character. We already know it has a size of 1 so ++ is fine
  531. // here.
  532. this.i = i + 2;
  533. }
  534. // Otherwise, a \r is just converted to \n, so we don't have to skip
  535. // ahead.
  536. // In either case, \r becomes \n.
  537. this.line++;
  538. this.column = 0;
  539. this.positionAtNewLine = this.position;
  540. return NL_LIKE;
  541. default:
  542. // If we get here, then code < SPACE and it is not NL CR or TAB.
  543. this.fail("disallowed character.");
  544. return code;
  545. }
  546. }
  547. if (code > 0xDBFF) {
  548. // This is a specialized version of isChar10 that takes into account
  549. // that in this context code > 0xDBFF and code <= 0xFFFF. So it does not
  550. // test cases that don't need testing.
  551. if (!(code >= 0xE000 && code <= 0xFFFD)) {
  552. this.fail("disallowed character.");
  553. }
  554. return code;
  555. }
  556. const final = 0x10000 + ((code - 0xD800) * 0x400) +
  557. (chunk.charCodeAt(i + 1) - 0xDC00);
  558. this.i = i + 2;
  559. // This is a specialized version of isChar10 that takes into account that in
  560. // this context necessarily final >= 0x10000.
  561. if (final > 0x10FFFF) {
  562. this.fail("disallowed character.");
  563. }
  564. return final;
  565. }
  566. /**
  567. * Get a single code point out of the current chunk. This updates the current
  568. * position if we do position tracking.
  569. *
  570. * This is the algorithm to use for XML 1.1.
  571. *
  572. * @returns {number} The character read.
  573. */
  574. getCode11() {
  575. const { chunk, i } = this;
  576. this.prevI = i;
  577. // Yes, we do this instead of doing this.i++. Doing it this way, we do not
  578. // read this.i again, which is a bit faster.
  579. this.i = i + 1;
  580. if (i >= chunk.length) {
  581. return EOC;
  582. }
  583. // Using charCodeAt and handling the surrogates ourselves is faster
  584. // than using codePointAt.
  585. const code = chunk.charCodeAt(i);
  586. this.column++;
  587. if (code < 0xD800) {
  588. if ((code > 0x1F && code < 0x7F) || (code > 0x9F && code !== LS) ||
  589. code === TAB) {
  590. return code;
  591. }
  592. switch (code) {
  593. case NL: // 0xA
  594. this.line++;
  595. this.column = 0;
  596. this.positionAtNewLine = this.position;
  597. return NL;
  598. case CR: { // 0xD
  599. // We may get NaN if we read past the end of the chunk, which is
  600. // fine.
  601. const next = chunk.charCodeAt(i + 1);
  602. if (next === NL || next === NEL) {
  603. // A CR NL or CR NEL sequence is converted to NL so we have to skip
  604. // over the next character. We already know it has a size of 1.
  605. this.i = i + 2;
  606. }
  607. // Otherwise, a CR is just converted to NL, no skip.
  608. }
  609. /* yes, fall through */
  610. case NEL: // 0x85
  611. case LS: // Ox2028
  612. this.line++;
  613. this.column = 0;
  614. this.positionAtNewLine = this.position;
  615. return NL_LIKE;
  616. default:
  617. this.fail("disallowed character.");
  618. return code;
  619. }
  620. }
  621. if (code > 0xDBFF) {
  622. // This is a specialized version of isCharAndNotRestricted that takes into
  623. // account that in this context code > 0xDBFF and code <= 0xFFFF. So it
  624. // does not test cases that don't need testing.
  625. if (!(code >= 0xE000 && code <= 0xFFFD)) {
  626. this.fail("disallowed character.");
  627. }
  628. return code;
  629. }
  630. const final = 0x10000 + ((code - 0xD800) * 0x400) +
  631. (chunk.charCodeAt(i + 1) - 0xDC00);
  632. this.i = i + 2;
  633. // This is a specialized version of isCharAndNotRestricted that takes into
  634. // account that in this context necessarily final >= 0x10000.
  635. if (final > 0x10FFFF) {
  636. this.fail("disallowed character.");
  637. }
  638. return final;
  639. }
  640. /**
  641. * Like ``getCode`` but with the return value normalized so that ``NL`` is
  642. * returned for ``NL_LIKE``.
  643. */
  644. getCodeNorm() {
  645. const c = this.getCode();
  646. return c === NL_LIKE ? NL : c;
  647. }
  648. unget() {
  649. this.i = this.prevI;
  650. this.column--;
  651. }
  652. /**
  653. * Capture characters into a buffer until encountering one of a set of
  654. * characters.
  655. *
  656. * @param chars An array of codepoints. Encountering a character in the array
  657. * ends the capture. (``chars`` may safely contain ``NL``.)
  658. *
  659. * @return The character code that made the capture end, or ``EOC`` if we hit
  660. * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
  661. * instead.
  662. */
  663. captureTo(chars) {
  664. let { i: start } = this;
  665. const { chunk } = this;
  666. // eslint-disable-next-line no-constant-condition
  667. while (true) {
  668. const c = this.getCode();
  669. const isNLLike = c === NL_LIKE;
  670. const final = isNLLike ? NL : c;
  671. if (final === EOC || chars.includes(final)) {
  672. this.text += chunk.slice(start, this.prevI);
  673. return final;
  674. }
  675. if (isNLLike) {
  676. this.text += `${chunk.slice(start, this.prevI)}\n`;
  677. start = this.i;
  678. }
  679. }
  680. }
  681. /**
  682. * Capture characters into a buffer until encountering a character.
  683. *
  684. * @param char The codepoint that ends the capture. **NOTE ``char`` MAY NOT
  685. * CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior.
  686. *
  687. * @return ``true`` if we ran into the character. Otherwise, we ran into the
  688. * end of the current chunk.
  689. */
  690. captureToChar(char) {
  691. let { i: start } = this;
  692. const { chunk } = this;
  693. // eslint-disable-next-line no-constant-condition
  694. while (true) {
  695. let c = this.getCode();
  696. switch (c) {
  697. case NL_LIKE:
  698. this.text += `${chunk.slice(start, this.prevI)}\n`;
  699. start = this.i;
  700. c = NL;
  701. break;
  702. case EOC:
  703. this.text += chunk.slice(start);
  704. return false;
  705. default:
  706. }
  707. if (c === char) {
  708. this.text += chunk.slice(start, this.prevI);
  709. return true;
  710. }
  711. }
  712. }
  713. /**
  714. * Capture characters that satisfy ``isNameChar`` into the ``name`` field of
  715. * this parser.
  716. *
  717. * @return The character code that made the test fail, or ``EOC`` if we hit
  718. * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
  719. * instead.
  720. */
  721. captureNameChars() {
  722. const { chunk, i: start } = this;
  723. // eslint-disable-next-line no-constant-condition
  724. while (true) {
  725. const c = this.getCode();
  726. if (c === EOC) {
  727. this.name += chunk.slice(start);
  728. return EOC;
  729. }
  730. // NL is not a name char so we don't have to test specifically for it.
  731. if (!isNameChar(c)) {
  732. this.name += chunk.slice(start, this.prevI);
  733. return c === NL_LIKE ? NL : c;
  734. }
  735. }
  736. }
  737. /**
  738. * Skip white spaces.
  739. *
  740. * @return The character that ended the skip, or ``EOC`` if we hit
  741. * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
  742. * instead.
  743. */
  744. skipSpaces() {
  745. // eslint-disable-next-line no-constant-condition
  746. while (true) {
  747. const c = this.getCodeNorm();
  748. if (c === EOC || !isS(c)) {
  749. return c;
  750. }
  751. }
  752. }
  753. setXMLVersion(version) {
  754. this.currentXMLVersion = version;
  755. /* eslint-disable @typescript-eslint/unbound-method */
  756. if (version === "1.0") {
  757. this.isChar = isChar10;
  758. this.getCode = this.getCode10;
  759. }
  760. else {
  761. this.isChar = isChar11;
  762. this.getCode = this.getCode11;
  763. }
  764. /* eslint-enable @typescript-eslint/unbound-method */
  765. }
  766. // STATE ENGINE METHODS
  767. // This needs to be a state separate from S_BEGIN_WHITESPACE because we want
  768. // to be sure never to come back to this state later.
  769. sBegin() {
  770. // We are essentially peeking at the first character of the chunk. Since
  771. // S_BEGIN can be in effect only when we start working on the first chunk,
  772. // the index at which we must look is necessarily 0. Note also that the
  773. // following test does not depend on decoding surrogates.
  774. // If the initial character is 0xFEFF, ignore it.
  775. if (this.chunk.charCodeAt(0) === 0xFEFF) {
  776. this.i++;
  777. this.column++;
  778. }
  779. this.state = S_BEGIN_WHITESPACE;
  780. }
  781. sBeginWhitespace() {
  782. // We need to know whether we've encountered spaces or not because as soon
  783. // as we run into a space, an XML declaration is no longer possible. Rather
  784. // than slow down skipSpaces even in places where we don't care whether it
  785. // skipped anything or not, we check whether prevI is equal to the value of
  786. // i from before we skip spaces.
  787. const iBefore = this.i;
  788. const c = this.skipSpaces();
  789. if (this.prevI !== iBefore) {
  790. this.xmlDeclPossible = false;
  791. }
  792. switch (c) {
  793. case LESS:
  794. this.state = S_OPEN_WAKA;
  795. // We could naively call closeText but in this state, it is not normal
  796. // to have text be filled with any data.
  797. if (this.text.length !== 0) {
  798. throw new Error("no-empty text at start");
  799. }
  800. break;
  801. case EOC:
  802. break;
  803. default:
  804. this.unget();
  805. this.state = S_TEXT;
  806. this.xmlDeclPossible = false;
  807. }
  808. }
  809. sDoctype() {
  810. var _a;
  811. const c = this.captureTo(DOCTYPE_TERMINATOR);
  812. switch (c) {
  813. case GREATER: {
  814. // eslint-disable-next-line no-unused-expressions
  815. (_a = this.doctypeHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
  816. this.text = "";
  817. this.state = S_TEXT;
  818. this.doctype = true; // just remember that we saw it.
  819. break;
  820. }
  821. case EOC:
  822. break;
  823. default:
  824. this.text += String.fromCodePoint(c);
  825. if (c === OPEN_BRACKET) {
  826. this.state = S_DTD;
  827. }
  828. else if (isQuote(c)) {
  829. this.state = S_DOCTYPE_QUOTE;
  830. this.q = c;
  831. }
  832. }
  833. }
  834. sDoctypeQuote() {
  835. const q = this.q;
  836. if (this.captureToChar(q)) {
  837. this.text += String.fromCodePoint(q);
  838. this.q = null;
  839. this.state = S_DOCTYPE;
  840. }
  841. }
  842. sDTD() {
  843. const c = this.captureTo(DTD_TERMINATOR);
  844. if (c === EOC) {
  845. return;
  846. }
  847. this.text += String.fromCodePoint(c);
  848. if (c === CLOSE_BRACKET) {
  849. this.state = S_DOCTYPE;
  850. }
  851. else if (c === LESS) {
  852. this.state = S_DTD_OPEN_WAKA;
  853. }
  854. else if (isQuote(c)) {
  855. this.state = S_DTD_QUOTED;
  856. this.q = c;
  857. }
  858. }
  859. sDTDQuoted() {
  860. const q = this.q;
  861. if (this.captureToChar(q)) {
  862. this.text += String.fromCodePoint(q);
  863. this.state = S_DTD;
  864. this.q = null;
  865. }
  866. }
  867. sDTDOpenWaka() {
  868. const c = this.getCodeNorm();
  869. this.text += String.fromCodePoint(c);
  870. switch (c) {
  871. case BANG:
  872. this.state = S_DTD_OPEN_WAKA_BANG;
  873. this.openWakaBang = "";
  874. break;
  875. case QUESTION:
  876. this.state = S_DTD_PI;
  877. break;
  878. default:
  879. this.state = S_DTD;
  880. }
  881. }
  882. sDTDOpenWakaBang() {
  883. const char = String.fromCodePoint(this.getCodeNorm());
  884. const owb = this.openWakaBang += char;
  885. this.text += char;
  886. if (owb !== "-") {
  887. this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
  888. this.openWakaBang = "";
  889. }
  890. }
  891. sDTDComment() {
  892. if (this.captureToChar(MINUS)) {
  893. this.text += "-";
  894. this.state = S_DTD_COMMENT_ENDING;
  895. }
  896. }
  897. sDTDCommentEnding() {
  898. const c = this.getCodeNorm();
  899. this.text += String.fromCodePoint(c);
  900. this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
  901. }
  902. sDTDCommentEnded() {
  903. const c = this.getCodeNorm();
  904. this.text += String.fromCodePoint(c);
  905. if (c === GREATER) {
  906. this.state = S_DTD;
  907. }
  908. else {
  909. this.fail("malformed comment.");
  910. // <!-- blah -- bloo --> will be recorded as
  911. // a comment of " blah -- bloo "
  912. this.state = S_DTD_COMMENT;
  913. }
  914. }
  915. sDTDPI() {
  916. if (this.captureToChar(QUESTION)) {
  917. this.text += "?";
  918. this.state = S_DTD_PI_ENDING;
  919. }
  920. }
  921. sDTDPIEnding() {
  922. const c = this.getCodeNorm();
  923. this.text += String.fromCodePoint(c);
  924. if (c === GREATER) {
  925. this.state = S_DTD;
  926. }
  927. }
  928. sText() {
  929. //
  930. // We did try a version of saxes where the S_TEXT state was split in two
  931. // states: one for text inside the root element, and one for text
  932. // outside. This was avoiding having to test this.tags.length to decide
  933. // what implementation to actually use.
  934. //
  935. // Peformance testing on gigabyte-size files did not show any advantage to
  936. // using the two states solution instead of the current one. Conversely, it
  937. // made the code a bit more complicated elsewhere. For instance, a comment
  938. // can appear before the root element so when a comment ended it was
  939. // necessary to determine whether to return to the S_TEXT state or to the
  940. // new text-outside-root state.
  941. //
  942. if (this.tags.length !== 0) {
  943. this.handleTextInRoot();
  944. }
  945. else {
  946. this.handleTextOutsideRoot();
  947. }
  948. }
  949. sEntity() {
  950. // This is essentially a specialized version of captureToChar(SEMICOLON...)
  951. let { i: start } = this;
  952. const { chunk } = this;
  953. // eslint-disable-next-line no-labels, no-restricted-syntax
  954. loop:
  955. // eslint-disable-next-line no-constant-condition
  956. while (true) {
  957. switch (this.getCode()) {
  958. case NL_LIKE:
  959. this.entity += `${chunk.slice(start, this.prevI)}\n`;
  960. start = this.i;
  961. break;
  962. case SEMICOLON: {
  963. const { entityReturnState } = this;
  964. const entity = this.entity + chunk.slice(start, this.prevI);
  965. this.state = entityReturnState;
  966. let parsed;
  967. if (entity === "") {
  968. this.fail("empty entity name.");
  969. parsed = "&;";
  970. }
  971. else {
  972. parsed = this.parseEntity(entity);
  973. this.entity = "";
  974. }
  975. if (entityReturnState !== S_TEXT || this.textHandler !== undefined) {
  976. this.text += parsed;
  977. }
  978. // eslint-disable-next-line no-labels
  979. break loop;
  980. }
  981. case EOC:
  982. this.entity += chunk.slice(start);
  983. // eslint-disable-next-line no-labels
  984. break loop;
  985. default:
  986. }
  987. }
  988. }
  989. sOpenWaka() {
  990. // Reminder: a state handler is called with at least one character
  991. // available in the current chunk. So the first call to get code inside of
  992. // a state handler cannot return ``EOC``. That's why we don't test
  993. // for it.
  994. const c = this.getCode();
  995. // either a /, ?, !, or text is coming next.
  996. if (isNameStartChar(c)) {
  997. this.state = S_OPEN_TAG;
  998. this.unget();
  999. this.xmlDeclPossible = false;
  1000. }
  1001. else {
  1002. switch (c) {
  1003. case FORWARD_SLASH:
  1004. this.state = S_CLOSE_TAG;
  1005. this.xmlDeclPossible = false;
  1006. break;
  1007. case BANG:
  1008. this.state = S_OPEN_WAKA_BANG;
  1009. this.openWakaBang = "";
  1010. this.xmlDeclPossible = false;
  1011. break;
  1012. case QUESTION:
  1013. this.state = S_PI_FIRST_CHAR;
  1014. break;
  1015. default:
  1016. this.fail("disallowed character in tag name");
  1017. this.state = S_TEXT;
  1018. this.xmlDeclPossible = false;
  1019. }
  1020. }
  1021. }
  1022. sOpenWakaBang() {
  1023. this.openWakaBang += String.fromCodePoint(this.getCodeNorm());
  1024. switch (this.openWakaBang) {
  1025. case "[CDATA[":
  1026. if (!this.sawRoot && !this.reportedTextBeforeRoot) {
  1027. this.fail("text data outside of root node.");
  1028. this.reportedTextBeforeRoot = true;
  1029. }
  1030. if (this.closedRoot && !this.reportedTextAfterRoot) {
  1031. this.fail("text data outside of root node.");
  1032. this.reportedTextAfterRoot = true;
  1033. }
  1034. this.state = S_CDATA;
  1035. this.openWakaBang = "";
  1036. break;
  1037. case "--":
  1038. this.state = S_COMMENT;
  1039. this.openWakaBang = "";
  1040. break;
  1041. case "DOCTYPE":
  1042. this.state = S_DOCTYPE;
  1043. if (this.doctype || this.sawRoot) {
  1044. this.fail("inappropriately located doctype declaration.");
  1045. }
  1046. this.openWakaBang = "";
  1047. break;
  1048. default:
  1049. // 7 happens to be the maximum length of the string that can possibly
  1050. // match one of the cases above.
  1051. if (this.openWakaBang.length >= 7) {
  1052. this.fail("incorrect syntax.");
  1053. }
  1054. }
  1055. }
  1056. sComment() {
  1057. if (this.captureToChar(MINUS)) {
  1058. this.state = S_COMMENT_ENDING;
  1059. }
  1060. }
  1061. sCommentEnding() {
  1062. var _a;
  1063. const c = this.getCodeNorm();
  1064. if (c === MINUS) {
  1065. this.state = S_COMMENT_ENDED;
  1066. // eslint-disable-next-line no-unused-expressions
  1067. (_a = this.commentHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
  1068. this.text = "";
  1069. }
  1070. else {
  1071. this.text += `-${String.fromCodePoint(c)}`;
  1072. this.state = S_COMMENT;
  1073. }
  1074. }
  1075. sCommentEnded() {
  1076. const c = this.getCodeNorm();
  1077. if (c !== GREATER) {
  1078. this.fail("malformed comment.");
  1079. // <!-- blah -- bloo --> will be recorded as
  1080. // a comment of " blah -- bloo "
  1081. this.text += `--${String.fromCodePoint(c)}`;
  1082. this.state = S_COMMENT;
  1083. }
  1084. else {
  1085. this.state = S_TEXT;
  1086. }
  1087. }
  1088. sCData() {
  1089. if (this.captureToChar(CLOSE_BRACKET)) {
  1090. this.state = S_CDATA_ENDING;
  1091. }
  1092. }
  1093. sCDataEnding() {
  1094. const c = this.getCodeNorm();
  1095. if (c === CLOSE_BRACKET) {
  1096. this.state = S_CDATA_ENDING_2;
  1097. }
  1098. else {
  1099. this.text += `]${String.fromCodePoint(c)}`;
  1100. this.state = S_CDATA;
  1101. }
  1102. }
  1103. sCDataEnding2() {
  1104. var _a;
  1105. const c = this.getCodeNorm();
  1106. switch (c) {
  1107. case GREATER: {
  1108. // eslint-disable-next-line no-unused-expressions
  1109. (_a = this.cdataHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
  1110. this.text = "";
  1111. this.state = S_TEXT;
  1112. break;
  1113. }
  1114. case CLOSE_BRACKET:
  1115. this.text += "]";
  1116. break;
  1117. default:
  1118. this.text += `]]${String.fromCodePoint(c)}`;
  1119. this.state = S_CDATA;
  1120. }
  1121. }
  1122. // We need this separate state to check the first character fo the pi target
  1123. // with this.nameStartCheck which allows less characters than this.nameCheck.
  1124. sPIFirstChar() {
  1125. const c = this.getCodeNorm();
  1126. // This is first because in the case where the file is well-formed this is
  1127. // the branch taken. We optimize for well-formedness.
  1128. if (this.nameStartCheck(c)) {
  1129. this.piTarget += String.fromCodePoint(c);
  1130. this.state = S_PI_REST;
  1131. }
  1132. else if (c === QUESTION || isS(c)) {
  1133. this.fail("processing instruction without a target.");
  1134. this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
  1135. }
  1136. else {
  1137. this.fail("disallowed character in processing instruction name.");
  1138. this.piTarget += String.fromCodePoint(c);
  1139. this.state = S_PI_REST;
  1140. }
  1141. }
  1142. sPIRest() {
  1143. // Capture characters into a piTarget while ``this.nameCheck`` run on the
  1144. // character read returns true.
  1145. const { chunk, i: start } = this;
  1146. // eslint-disable-next-line no-constant-condition
  1147. while (true) {
  1148. const c = this.getCodeNorm();
  1149. if (c === EOC) {
  1150. this.piTarget += chunk.slice(start);
  1151. return;
  1152. }
  1153. // NL cannot satisfy this.nameCheck so we don't have to test specifically
  1154. // for it.
  1155. if (!this.nameCheck(c)) {
  1156. this.piTarget += chunk.slice(start, this.prevI);
  1157. const isQuestion = c === QUESTION;
  1158. if (isQuestion || isS(c)) {
  1159. if (this.piTarget === "xml") {
  1160. if (!this.xmlDeclPossible) {
  1161. this.fail("an XML declaration must be at the start of the document.");
  1162. }
  1163. this.state = isQuestion ? S_XML_DECL_ENDING : S_XML_DECL_NAME_START;
  1164. }
  1165. else {
  1166. this.state = isQuestion ? S_PI_ENDING : S_PI_BODY;
  1167. }
  1168. }
  1169. else {
  1170. this.fail("disallowed character in processing instruction name.");
  1171. this.piTarget += String.fromCodePoint(c);
  1172. }
  1173. break;
  1174. }
  1175. }
  1176. }
  1177. sPIBody() {
  1178. if (this.text.length === 0) {
  1179. const c = this.getCodeNorm();
  1180. if (c === QUESTION) {
  1181. this.state = S_PI_ENDING;
  1182. }
  1183. else if (!isS(c)) {
  1184. this.text = String.fromCodePoint(c);
  1185. }
  1186. }
  1187. // The question mark character is not valid inside any of the XML
  1188. // declaration name/value pairs.
  1189. else if (this.captureToChar(QUESTION)) {
  1190. this.state = S_PI_ENDING;
  1191. }
  1192. }
  1193. sPIEnding() {
  1194. var _a;
  1195. const c = this.getCodeNorm();
  1196. if (c === GREATER) {
  1197. const { piTarget } = this;
  1198. if (piTarget.toLowerCase() === "xml") {
  1199. this.fail("the XML declaration must appear at the start of the document.");
  1200. }
  1201. // eslint-disable-next-line no-unused-expressions
  1202. (_a = this.piHandler) === null || _a === void 0 ? void 0 : _a.call(this, {
  1203. target: piTarget,
  1204. body: this.text,
  1205. });
  1206. this.piTarget = this.text = "";
  1207. this.state = S_TEXT;
  1208. }
  1209. else if (c === QUESTION) {
  1210. // We ran into ?? as part of a processing instruction. We initially took
  1211. // the first ? as a sign that the PI was ending, but it is not. So we have
  1212. // to add it to the body but we take the new ? as a sign that the PI is
  1213. // ending.
  1214. this.text += "?";
  1215. }
  1216. else {
  1217. this.text += `?${String.fromCodePoint(c)}`;
  1218. this.state = S_PI_BODY;
  1219. }
  1220. this.xmlDeclPossible = false;
  1221. }
  1222. sXMLDeclNameStart() {
  1223. const c = this.skipSpaces();
  1224. // The question mark character is not valid inside any of the XML
  1225. // declaration name/value pairs.
  1226. if (c === QUESTION) {
  1227. // It is valid to go to S_XML_DECL_ENDING from this state.
  1228. this.state = S_XML_DECL_ENDING;
  1229. return;
  1230. }
  1231. if (c !== EOC) {
  1232. this.state = S_XML_DECL_NAME;
  1233. this.name = String.fromCodePoint(c);
  1234. }
  1235. }
  1236. sXMLDeclName() {
  1237. const c = this.captureTo(XML_DECL_NAME_TERMINATOR);
  1238. // The question mark character is not valid inside any of the XML
  1239. // declaration name/value pairs.
  1240. if (c === QUESTION) {
  1241. this.state = S_XML_DECL_ENDING;
  1242. this.name += this.text;
  1243. this.text = "";
  1244. this.fail("XML declaration is incomplete.");
  1245. return;
  1246. }
  1247. if (!(isS(c) || c === EQUAL)) {
  1248. return;
  1249. }
  1250. this.name += this.text;
  1251. this.text = "";
  1252. if (!this.xmlDeclExpects.includes(this.name)) {
  1253. switch (this.name.length) {
  1254. case 0:
  1255. this.fail("did not expect any more name/value pairs.");
  1256. break;
  1257. case 1:
  1258. this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
  1259. break;
  1260. default:
  1261. this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
  1262. }
  1263. }
  1264. this.state = c === EQUAL ? S_XML_DECL_VALUE_START : S_XML_DECL_EQ;
  1265. }
  1266. sXMLDeclEq() {
  1267. const c = this.getCodeNorm();
  1268. // The question mark character is not valid inside any of the XML
  1269. // declaration name/value pairs.
  1270. if (c === QUESTION) {
  1271. this.state = S_XML_DECL_ENDING;
  1272. this.fail("XML declaration is incomplete.");
  1273. return;
  1274. }
  1275. if (isS(c)) {
  1276. return;
  1277. }
  1278. if (c !== EQUAL) {
  1279. this.fail("value required.");
  1280. }
  1281. this.state = S_XML_DECL_VALUE_START;
  1282. }
  1283. sXMLDeclValueStart() {
  1284. const c = this.getCodeNorm();
  1285. // The question mark character is not valid inside any of the XML
  1286. // declaration name/value pairs.
  1287. if (c === QUESTION) {
  1288. this.state = S_XML_DECL_ENDING;
  1289. this.fail("XML declaration is incomplete.");
  1290. return;
  1291. }
  1292. if (isS(c)) {
  1293. return;
  1294. }
  1295. if (!isQuote(c)) {
  1296. this.fail("value must be quoted.");
  1297. this.q = SPACE;
  1298. }
  1299. else {
  1300. this.q = c;
  1301. }
  1302. this.state = S_XML_DECL_VALUE;
  1303. }
  1304. sXMLDeclValue() {
  1305. const c = this.captureTo([this.q, QUESTION]);
  1306. // The question mark character is not valid inside any of the XML
  1307. // declaration name/value pairs.
  1308. if (c === QUESTION) {
  1309. this.state = S_XML_DECL_ENDING;
  1310. this.text = "";
  1311. this.fail("XML declaration is incomplete.");
  1312. return;
  1313. }
  1314. if (c === EOC) {
  1315. return;
  1316. }
  1317. const value = this.text;
  1318. this.text = "";
  1319. switch (this.name) {
  1320. case "version": {
  1321. this.xmlDeclExpects = ["encoding", "standalone"];
  1322. const version = value;
  1323. this.xmlDecl.version = version;
  1324. // This is the test specified by XML 1.0 but it is fine for XML 1.1.
  1325. if (!/^1\.[0-9]+$/.test(version)) {
  1326. this.fail("version number must match /^1\\.[0-9]+$/.");
  1327. }
  1328. // When forceXMLVersion is set, the XML declaration is ignored.
  1329. else if (!this.opt.forceXMLVersion) {
  1330. this.setXMLVersion(version);
  1331. }
  1332. break;
  1333. }
  1334. case "encoding":
  1335. if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(value)) {
  1336. this.fail("encoding value must match \
  1337. /^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
  1338. }
  1339. this.xmlDeclExpects = ["standalone"];
  1340. this.xmlDecl.encoding = value;
  1341. break;
  1342. case "standalone":
  1343. if (value !== "yes" && value !== "no") {
  1344. this.fail("standalone value must match \"yes\" or \"no\".");
  1345. }
  1346. this.xmlDeclExpects = [];
  1347. this.xmlDecl.standalone = value;
  1348. break;
  1349. default:
  1350. // We don't need to raise an error here since we've already raised one
  1351. // when checking what name was expected.
  1352. }
  1353. this.name = "";
  1354. this.state = S_XML_DECL_SEPARATOR;
  1355. }
  1356. sXMLDeclSeparator() {
  1357. const c = this.getCodeNorm();
  1358. // The question mark character is not valid inside any of the XML
  1359. // declaration name/value pairs.
  1360. if (c === QUESTION) {
  1361. // It is valid to go to S_XML_DECL_ENDING from this state.
  1362. this.state = S_XML_DECL_ENDING;
  1363. return;
  1364. }
  1365. if (!isS(c)) {
  1366. this.fail("whitespace required.");
  1367. this.unget();
  1368. }
  1369. this.state = S_XML_DECL_NAME_START;
  1370. }
  1371. sXMLDeclEnding() {
  1372. var _a;
  1373. const c = this.getCodeNorm();
  1374. if (c === GREATER) {
  1375. if (this.piTarget !== "xml") {
  1376. this.fail("processing instructions are not allowed before root.");
  1377. }
  1378. else if (this.name !== "version" &&
  1379. this.xmlDeclExpects.includes("version")) {
  1380. this.fail("XML declaration must contain a version.");
  1381. }
  1382. // eslint-disable-next-line no-unused-expressions
  1383. (_a = this.xmldeclHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.xmlDecl);
  1384. this.name = "";
  1385. this.piTarget = this.text = "";
  1386. this.state = S_TEXT;
  1387. }
  1388. else {
  1389. // We got here because the previous character was a ?, but the question
  1390. // mark character is not valid inside any of the XML declaration
  1391. // name/value pairs.
  1392. this.fail("The character ? is disallowed anywhere in XML declarations.");
  1393. }
  1394. this.xmlDeclPossible = false;
  1395. }
  1396. sOpenTag() {
  1397. var _a;
  1398. const c = this.captureNameChars();
  1399. if (c === EOC) {
  1400. return;
  1401. }
  1402. const tag = this.tag = {
  1403. name: this.name,
  1404. attributes: Object.create(null),
  1405. };
  1406. this.name = "";
  1407. if (this.xmlnsOpt) {
  1408. this.topNS = tag.ns = Object.create(null);
  1409. }
  1410. // eslint-disable-next-line no-unused-expressions
  1411. (_a = this.openTagStartHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
  1412. this.sawRoot = true;
  1413. if (!this.fragmentOpt && this.closedRoot) {
  1414. this.fail("documents may contain only one root.");
  1415. }
  1416. switch (c) {
  1417. case GREATER:
  1418. this.openTag();
  1419. break;
  1420. case FORWARD_SLASH:
  1421. this.state = S_OPEN_TAG_SLASH;
  1422. break;
  1423. default:
  1424. if (!isS(c)) {
  1425. this.fail("disallowed character in tag name.");
  1426. }
  1427. this.state = S_ATTRIB;
  1428. }
  1429. }
  1430. sOpenTagSlash() {
  1431. if (this.getCode() === GREATER) {
  1432. this.openSelfClosingTag();
  1433. }
  1434. else {
  1435. this.fail("forward-slash in opening tag not followed by >.");
  1436. this.state = S_ATTRIB;
  1437. }
  1438. }
  1439. sAttrib() {
  1440. const c = this.skipSpaces();
  1441. if (c === EOC) {
  1442. return;
  1443. }
  1444. if (isNameStartChar(c)) {
  1445. this.unget();
  1446. this.state = S_ATTRIB_NAME;
  1447. }
  1448. else if (c === GREATER) {
  1449. this.openTag();
  1450. }
  1451. else if (c === FORWARD_SLASH) {
  1452. this.state = S_OPEN_TAG_SLASH;
  1453. }
  1454. else {
  1455. this.fail("disallowed character in attribute name.");
  1456. }
  1457. }
  1458. sAttribName() {
  1459. const c = this.captureNameChars();
  1460. if (c === EQUAL) {
  1461. this.state = S_ATTRIB_VALUE;
  1462. }
  1463. else if (isS(c)) {
  1464. this.state = S_ATTRIB_NAME_SAW_WHITE;
  1465. }
  1466. else if (c === GREATER) {
  1467. this.fail("attribute without value.");
  1468. this.pushAttrib(this.name, this.name);
  1469. this.name = this.text = "";
  1470. this.openTag();
  1471. }
  1472. else if (c !== EOC) {
  1473. this.fail("disallowed character in attribute name.");
  1474. }
  1475. }
  1476. sAttribNameSawWhite() {
  1477. const c = this.skipSpaces();
  1478. switch (c) {
  1479. case EOC:
  1480. return;
  1481. case EQUAL:
  1482. this.state = S_ATTRIB_VALUE;
  1483. break;
  1484. default:
  1485. this.fail("attribute without value.");
  1486. // Should we do this???
  1487. // this.tag.attributes[this.name] = "";
  1488. this.text = "";
  1489. this.name = "";
  1490. if (c === GREATER) {
  1491. this.openTag();
  1492. }
  1493. else if (isNameStartChar(c)) {
  1494. this.unget();
  1495. this.state = S_ATTRIB_NAME;
  1496. }
  1497. else {
  1498. this.fail("disallowed character in attribute name.");
  1499. this.state = S_ATTRIB;
  1500. }
  1501. }
  1502. }
  1503. sAttribValue() {
  1504. const c = this.getCodeNorm();
  1505. if (isQuote(c)) {
  1506. this.q = c;
  1507. this.state = S_ATTRIB_VALUE_QUOTED;
  1508. }
  1509. else if (!isS(c)) {
  1510. this.fail("unquoted attribute value.");
  1511. this.state = S_ATTRIB_VALUE_UNQUOTED;
  1512. this.unget();
  1513. }
  1514. }
  1515. sAttribValueQuoted() {
  1516. // We deliberately do not use captureTo here. The specialized code we use
  1517. // here is faster than using captureTo.
  1518. const { q, chunk } = this;
  1519. let { i: start } = this;
  1520. // eslint-disable-next-line no-constant-condition
  1521. while (true) {
  1522. switch (this.getCode()) {
  1523. case q:
  1524. this.pushAttrib(this.name, this.text + chunk.slice(start, this.prevI));
  1525. this.name = this.text = "";
  1526. this.q = null;
  1527. this.state = S_ATTRIB_VALUE_CLOSED;
  1528. return;
  1529. case AMP:
  1530. this.text += chunk.slice(start, this.prevI);
  1531. this.state = S_ENTITY;
  1532. this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
  1533. return;
  1534. case NL:
  1535. case NL_LIKE:
  1536. case TAB:
  1537. this.text += `${chunk.slice(start, this.prevI)} `;
  1538. start = this.i;
  1539. break;
  1540. case LESS:
  1541. this.text += chunk.slice(start, this.prevI);
  1542. this.fail("disallowed character.");
  1543. return;
  1544. case EOC:
  1545. this.text += chunk.slice(start);
  1546. return;
  1547. default:
  1548. }
  1549. }
  1550. }
  1551. sAttribValueClosed() {
  1552. const c = this.getCodeNorm();
  1553. if (isS(c)) {
  1554. this.state = S_ATTRIB;
  1555. }
  1556. else if (c === GREATER) {
  1557. this.openTag();
  1558. }
  1559. else if (c === FORWARD_SLASH) {
  1560. this.state = S_OPEN_TAG_SLASH;
  1561. }
  1562. else if (isNameStartChar(c)) {
  1563. this.fail("no whitespace between attributes.");
  1564. this.unget();
  1565. this.state = S_ATTRIB_NAME;
  1566. }
  1567. else {
  1568. this.fail("disallowed character in attribute name.");
  1569. }
  1570. }
  1571. sAttribValueUnquoted() {
  1572. // We don't do anything regarding EOL or space handling for unquoted
  1573. // attributes. We already have failed by the time we get here, and the
  1574. // contract that saxes upholds states that upon failure, it is not safe to
  1575. // rely on the data passed to event handlers (other than
  1576. // ``onerror``). Passing "bad" data is not a problem.
  1577. const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR);
  1578. switch (c) {
  1579. case AMP:
  1580. this.state = S_ENTITY;
  1581. this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
  1582. break;
  1583. case LESS:
  1584. this.fail("disallowed character.");
  1585. break;
  1586. case EOC:
  1587. break;
  1588. default:
  1589. if (this.text.includes("]]>")) {
  1590. this.fail("the string \"]]>\" is disallowed in char data.");
  1591. }
  1592. this.pushAttrib(this.name, this.text);
  1593. this.name = this.text = "";
  1594. if (c === GREATER) {
  1595. this.openTag();
  1596. }
  1597. else {
  1598. this.state = S_ATTRIB;
  1599. }
  1600. }
  1601. }
  1602. sCloseTag() {
  1603. const c = this.captureNameChars();
  1604. if (c === GREATER) {
  1605. this.closeTag();
  1606. }
  1607. else if (isS(c)) {
  1608. this.state = S_CLOSE_TAG_SAW_WHITE;
  1609. }
  1610. else if (c !== EOC) {
  1611. this.fail("disallowed character in closing tag.");
  1612. }
  1613. }
  1614. sCloseTagSawWhite() {
  1615. switch (this.skipSpaces()) {
  1616. case GREATER:
  1617. this.closeTag();
  1618. break;
  1619. case EOC:
  1620. break;
  1621. default:
  1622. this.fail("disallowed character in closing tag.");
  1623. }
  1624. }
  1625. // END OF STATE ENGINE METHODS
  1626. handleTextInRoot() {
  1627. // This is essentially a specialized version of captureTo which is optimized
  1628. // for performing the ]]> check. A previous version of this code, checked
  1629. // ``this.text`` for the presence of ]]>. It simplified the code but was
  1630. // very costly when character data contained a lot of entities to be parsed.
  1631. //
  1632. // Since we are using a specialized loop, we also keep track of the presence
  1633. // of ]]> in text data. The sequence ]]> is forbidden to appear as-is.
  1634. //
  1635. let { i: start, forbiddenState } = this;
  1636. const { chunk, textHandler: handler } = this;
  1637. // eslint-disable-next-line no-labels, no-restricted-syntax
  1638. scanLoop:
  1639. // eslint-disable-next-line no-constant-condition
  1640. while (true) {
  1641. switch (this.getCode()) {
  1642. case LESS: {
  1643. this.state = S_OPEN_WAKA;
  1644. if (handler !== undefined) {
  1645. const { text } = this;
  1646. const slice = chunk.slice(start, this.prevI);
  1647. if (text.length !== 0) {
  1648. handler(text + slice);
  1649. this.text = "";
  1650. }
  1651. else if (slice.length !== 0) {
  1652. handler(slice);
  1653. }
  1654. }
  1655. forbiddenState = FORBIDDEN_START;
  1656. // eslint-disable-next-line no-labels
  1657. break scanLoop;
  1658. }
  1659. case AMP:
  1660. this.state = S_ENTITY;
  1661. this.entityReturnState = S_TEXT;
  1662. if (handler !== undefined) {
  1663. this.text += chunk.slice(start, this.prevI);
  1664. }
  1665. forbiddenState = FORBIDDEN_START;
  1666. // eslint-disable-next-line no-labels
  1667. break scanLoop;
  1668. case CLOSE_BRACKET:
  1669. switch (forbiddenState) {
  1670. case FORBIDDEN_START:
  1671. forbiddenState = FORBIDDEN_BRACKET;
  1672. break;
  1673. case FORBIDDEN_BRACKET:
  1674. forbiddenState = FORBIDDEN_BRACKET_BRACKET;
  1675. break;
  1676. case FORBIDDEN_BRACKET_BRACKET:
  1677. break;
  1678. default:
  1679. throw new Error("impossible state");
  1680. }
  1681. break;
  1682. case GREATER:
  1683. if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) {
  1684. this.fail("the string \"]]>\" is disallowed in char data.");
  1685. }
  1686. forbiddenState = FORBIDDEN_START;
  1687. break;
  1688. case NL_LIKE:
  1689. if (handler !== undefined) {
  1690. this.text += `${chunk.slice(start, this.prevI)}\n`;
  1691. }
  1692. start = this.i;
  1693. forbiddenState = FORBIDDEN_START;
  1694. break;
  1695. case EOC:
  1696. if (handler !== undefined) {
  1697. this.text += chunk.slice(start);
  1698. }
  1699. // eslint-disable-next-line no-labels
  1700. break scanLoop;
  1701. default:
  1702. forbiddenState = FORBIDDEN_START;
  1703. }
  1704. }
  1705. this.forbiddenState = forbiddenState;
  1706. }
  1707. handleTextOutsideRoot() {
  1708. // This is essentially a specialized version of captureTo which is optimized
  1709. // for a specialized task. We keep track of the presence of non-space
  1710. // characters in the text since these are errors when appearing outside the
  1711. // document root element.
  1712. let { i: start } = this;
  1713. const { chunk, textHandler: handler } = this;
  1714. let nonSpace = false;
  1715. // eslint-disable-next-line no-labels, no-restricted-syntax
  1716. outRootLoop:
  1717. // eslint-disable-next-line no-constant-condition
  1718. while (true) {
  1719. const code = this.getCode();
  1720. switch (code) {
  1721. case LESS: {
  1722. this.state = S_OPEN_WAKA;
  1723. if (handler !== undefined) {
  1724. const { text } = this;
  1725. const slice = chunk.slice(start, this.prevI);
  1726. if (text.length !== 0) {
  1727. handler(text + slice);
  1728. this.text = "";
  1729. }
  1730. else if (slice.length !== 0) {
  1731. handler(slice);
  1732. }
  1733. }
  1734. // eslint-disable-next-line no-labels
  1735. break outRootLoop;
  1736. }
  1737. case AMP:
  1738. this.state = S_ENTITY;
  1739. this.entityReturnState = S_TEXT;
  1740. if (handler !== undefined) {
  1741. this.text += chunk.slice(start, this.prevI);
  1742. }
  1743. nonSpace = true;
  1744. // eslint-disable-next-line no-labels
  1745. break outRootLoop;
  1746. case NL_LIKE:
  1747. if (handler !== undefined) {
  1748. this.text += `${chunk.slice(start, this.prevI)}\n`;
  1749. }
  1750. start = this.i;
  1751. break;
  1752. case EOC:
  1753. if (handler !== undefined) {
  1754. this.text += chunk.slice(start);
  1755. }
  1756. // eslint-disable-next-line no-labels
  1757. break outRootLoop;
  1758. default:
  1759. if (!isS(code)) {
  1760. nonSpace = true;
  1761. }
  1762. }
  1763. }
  1764. if (!nonSpace) {
  1765. return;
  1766. }
  1767. // We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
  1768. // to avoid reporting errors for every single character that is out of
  1769. // place.
  1770. if (!this.sawRoot && !this.reportedTextBeforeRoot) {
  1771. this.fail("text data outside of root node.");
  1772. this.reportedTextBeforeRoot = true;
  1773. }
  1774. if (this.closedRoot && !this.reportedTextAfterRoot) {
  1775. this.fail("text data outside of root node.");
  1776. this.reportedTextAfterRoot = true;
  1777. }
  1778. }
  1779. pushAttribNS(name, value) {
  1780. var _a;
  1781. const { prefix, local } = this.qname(name);
  1782. const attr = { name, prefix, local, value };
  1783. this.attribList.push(attr);
  1784. // eslint-disable-next-line no-unused-expressions
  1785. (_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr);
  1786. if (prefix === "xmlns") {
  1787. const trimmed = value.trim();
  1788. if (this.currentXMLVersion === "1.0" && trimmed === "") {
  1789. this.fail("invalid attempt to undefine prefix in XML 1.0");
  1790. }
  1791. this.topNS[local] = trimmed;
  1792. nsPairCheck(this, local, trimmed);
  1793. }
  1794. else if (name === "xmlns") {
  1795. const trimmed = value.trim();
  1796. this.topNS[""] = trimmed;
  1797. nsPairCheck(this, "", trimmed);
  1798. }
  1799. }
  1800. pushAttribPlain(name, value) {
  1801. var _a;
  1802. const attr = { name, value };
  1803. this.attribList.push(attr);
  1804. // eslint-disable-next-line no-unused-expressions
  1805. (_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr);
  1806. }
  1807. /**
  1808. * End parsing. This performs final well-formedness checks and resets the
  1809. * parser to a clean state.
  1810. *
  1811. * @returns this
  1812. */
  1813. end() {
  1814. var _a, _b;
  1815. if (!this.sawRoot) {
  1816. this.fail("document must contain a root element.");
  1817. }
  1818. const { tags } = this;
  1819. while (tags.length > 0) {
  1820. const tag = tags.pop();
  1821. this.fail(`unclosed tag: ${tag.name}`);
  1822. }
  1823. if ((this.state !== S_BEGIN) && (this.state !== S_TEXT)) {
  1824. this.fail("unexpected end.");
  1825. }
  1826. const { text } = this;
  1827. if (text.length !== 0) {
  1828. // eslint-disable-next-line no-unused-expressions
  1829. (_a = this.textHandler) === null || _a === void 0 ? void 0 : _a.call(this, text);
  1830. this.text = "";
  1831. }
  1832. this._closed = true;
  1833. // eslint-disable-next-line no-unused-expressions
  1834. (_b = this.endHandler) === null || _b === void 0 ? void 0 : _b.call(this);
  1835. this._init();
  1836. return this;
  1837. }
  1838. /**
  1839. * Resolve a namespace prefix.
  1840. *
  1841. * @param prefix The prefix to resolve.
  1842. *
  1843. * @returns The namespace URI or ``undefined`` if the prefix is not defined.
  1844. */
  1845. resolve(prefix) {
  1846. var _a, _b;
  1847. let uri = this.topNS[prefix];
  1848. if (uri !== undefined) {
  1849. return uri;
  1850. }
  1851. const { tags } = this;
  1852. for (let index = tags.length - 1; index >= 0; index--) {
  1853. uri = tags[index].ns[prefix];
  1854. if (uri !== undefined) {
  1855. return uri;
  1856. }
  1857. }
  1858. uri = this.ns[prefix];
  1859. if (uri !== undefined) {
  1860. return uri;
  1861. }
  1862. return (_b = (_a = this.opt).resolvePrefix) === null || _b === void 0 ? void 0 : _b.call(_a, prefix);
  1863. }
  1864. /**
  1865. * Parse a qname into its prefix and local name parts.
  1866. *
  1867. * @param name The name to parse
  1868. *
  1869. * @returns
  1870. */
  1871. qname(name) {
  1872. // This is faster than using name.split(":").
  1873. const colon = name.indexOf(":");
  1874. if (colon === -1) {
  1875. return { prefix: "", local: name };
  1876. }
  1877. const local = name.slice(colon + 1);
  1878. const prefix = name.slice(0, colon);
  1879. if (prefix === "" || local === "" || local.includes(":")) {
  1880. this.fail(`malformed name: ${name}.`);
  1881. }
  1882. return { prefix, local };
  1883. }
  1884. processAttribsNS() {
  1885. var _a;
  1886. const { attribList } = this;
  1887. const tag = this.tag;
  1888. {
  1889. // add namespace info to tag
  1890. const { prefix, local } = this.qname(tag.name);
  1891. tag.prefix = prefix;
  1892. tag.local = local;
  1893. const uri = tag.uri = (_a = this.resolve(prefix)) !== null && _a !== void 0 ? _a : "";
  1894. if (prefix !== "") {
  1895. if (prefix === "xmlns") {
  1896. this.fail("tags may not have \"xmlns\" as prefix.");
  1897. }
  1898. if (uri === "") {
  1899. this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
  1900. tag.uri = prefix;
  1901. }
  1902. }
  1903. }
  1904. if (attribList.length === 0) {
  1905. return;
  1906. }
  1907. const { attributes } = tag;
  1908. const seen = new Set();
  1909. // Note: do not apply default ns to attributes:
  1910. // http://www.w3.org/TR/REC-xml-names/#defaulting
  1911. for (const attr of attribList) {
  1912. const { name, prefix, local } = attr;
  1913. let uri;
  1914. let eqname;
  1915. if (prefix === "") {
  1916. uri = name === "xmlns" ? XMLNS_NAMESPACE : "";
  1917. eqname = name;
  1918. }
  1919. else {
  1920. uri = this.resolve(prefix);
  1921. // if there's any attributes with an undefined namespace,
  1922. // then fail on them now.
  1923. if (uri === undefined) {
  1924. this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
  1925. uri = prefix;
  1926. }
  1927. eqname = `{${uri}}${local}`;
  1928. }
  1929. if (seen.has(eqname)) {
  1930. this.fail(`duplicate attribute: ${eqname}.`);
  1931. }
  1932. seen.add(eqname);
  1933. attr.uri = uri;
  1934. attributes[name] = attr;
  1935. }
  1936. this.attribList = [];
  1937. }
  1938. processAttribsPlain() {
  1939. const { attribList } = this;
  1940. // eslint-disable-next-line prefer-destructuring
  1941. const attributes = this.tag.attributes;
  1942. for (const { name, value } of attribList) {
  1943. if (attributes[name] !== undefined) {
  1944. this.fail(`duplicate attribute: ${name}.`);
  1945. }
  1946. attributes[name] = value;
  1947. }
  1948. this.attribList = [];
  1949. }
  1950. /**
  1951. * Handle a complete open tag. This parser code calls this once it has seen
  1952. * the whole tag. This method checks for well-formeness and then emits
  1953. * ``onopentag``.
  1954. */
  1955. openTag() {
  1956. var _a;
  1957. this.processAttribs();
  1958. const { tags } = this;
  1959. const tag = this.tag;
  1960. tag.isSelfClosing = false;
  1961. // There cannot be any pending text here due to the onopentagstart that was
  1962. // necessarily emitted before we get here. So we do not check text.
  1963. // eslint-disable-next-line no-unused-expressions
  1964. (_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
  1965. tags.push(tag);
  1966. this.state = S_TEXT;
  1967. this.name = "";
  1968. }
  1969. /**
  1970. * Handle a complete self-closing tag. This parser code calls this once it has
  1971. * seen the whole tag. This method checks for well-formeness and then emits
  1972. * ``onopentag`` and ``onclosetag``.
  1973. */
  1974. openSelfClosingTag() {
  1975. var _a, _b, _c;
  1976. this.processAttribs();
  1977. const { tags } = this;
  1978. const tag = this.tag;
  1979. tag.isSelfClosing = true;
  1980. // There cannot be any pending text here due to the onopentagstart that was
  1981. // necessarily emitted before we get here. So we do not check text.
  1982. // eslint-disable-next-line no-unused-expressions
  1983. (_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
  1984. // eslint-disable-next-line no-unused-expressions
  1985. (_b = this.closeTagHandler) === null || _b === void 0 ? void 0 : _b.call(this, tag);
  1986. const top = this.tag = (_c = tags[tags.length - 1]) !== null && _c !== void 0 ? _c : null;
  1987. if (top === null) {
  1988. this.closedRoot = true;
  1989. }
  1990. this.state = S_TEXT;
  1991. this.name = "";
  1992. }
  1993. /**
  1994. * Handle a complete close tag. This parser code calls this once it has seen
  1995. * the whole tag. This method checks for well-formeness and then emits
  1996. * ``onclosetag``.
  1997. */
  1998. closeTag() {
  1999. const { tags, name } = this;
  2000. // Our state after this will be S_TEXT, no matter what, and we can clear
  2001. // tagName now.
  2002. this.state = S_TEXT;
  2003. this.name = "";
  2004. if (name === "") {
  2005. this.fail("weird empty close tag.");
  2006. this.text += "</>";
  2007. return;
  2008. }
  2009. const handler = this.closeTagHandler;
  2010. let l = tags.length;
  2011. while (l-- > 0) {
  2012. const tag = this.tag = tags.pop();
  2013. this.topNS = tag.ns;
  2014. // eslint-disable-next-line no-unused-expressions
  2015. handler === null || handler === void 0 ? void 0 : handler(tag);
  2016. if (tag.name === name) {
  2017. break;
  2018. }
  2019. this.fail("unexpected close tag.");
  2020. }
  2021. if (l === 0) {
  2022. this.closedRoot = true;
  2023. }
  2024. else if (l < 0) {
  2025. this.fail(`unmatched closing tag: ${name}.`);
  2026. this.text += `</${name}>`;
  2027. }
  2028. }
  2029. /**
  2030. * Resolves an entity. Makes any necessary well-formedness checks.
  2031. *
  2032. * @param entity The entity to resolve.
  2033. *
  2034. * @returns The parsed entity.
  2035. */
  2036. parseEntity(entity) {
  2037. // startsWith would be significantly slower for this test.
  2038. // eslint-disable-next-line @typescript-eslint/prefer-string-starts-ends-with
  2039. if (entity[0] !== "#") {
  2040. const defined = this.ENTITIES[entity];
  2041. if (defined !== undefined) {
  2042. return defined;
  2043. }
  2044. this.fail(this.isName(entity) ? "undefined entity." :
  2045. "disallowed character in entity name.");
  2046. return `&${entity};`;
  2047. }
  2048. let num = NaN;
  2049. if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) {
  2050. num = parseInt(entity.slice(2), 16);
  2051. }
  2052. else if (/^#[0-9]+$/.test(entity)) {
  2053. num = parseInt(entity.slice(1), 10);
  2054. }
  2055. // The character reference is required to match the CHAR production.
  2056. if (!this.isChar(num)) {
  2057. this.fail("malformed character entity.");
  2058. return `&${entity};`;
  2059. }
  2060. return String.fromCodePoint(num);
  2061. }
  2062. }
  2063. exports.SaxesParser = SaxesParser;
  2064. //# sourceMappingURL=saxes.js.map