url-state-machine.js 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244
  1. "use strict";
  2. const tr46 = require("tr46");
  3. const infra = require("./infra");
  4. const { utf8DecodeWithoutBOM } = require("./encoding");
  5. const { percentDecodeString, utf8PercentEncodeCodePoint, utf8PercentEncodeString, isC0ControlPercentEncode,
  6. isFragmentPercentEncode, isQueryPercentEncode, isSpecialQueryPercentEncode, isPathPercentEncode,
  7. isUserinfoPercentEncode } = require("./percent-encoding");
  8. function p(char) {
  9. return char.codePointAt(0);
  10. }
  11. const specialSchemes = {
  12. ftp: 21,
  13. file: null,
  14. http: 80,
  15. https: 443,
  16. ws: 80,
  17. wss: 443
  18. };
  19. const failure = Symbol("failure");
  20. function countSymbols(str) {
  21. return [...str].length;
  22. }
  23. function at(input, idx) {
  24. const c = input[idx];
  25. return isNaN(c) ? undefined : String.fromCodePoint(c);
  26. }
  27. function isSingleDot(buffer) {
  28. return buffer === "." || buffer.toLowerCase() === "%2e";
  29. }
  30. function isDoubleDot(buffer) {
  31. buffer = buffer.toLowerCase();
  32. return buffer === ".." || buffer === "%2e." || buffer === ".%2e" || buffer === "%2e%2e";
  33. }
  34. function isWindowsDriveLetterCodePoints(cp1, cp2) {
  35. return infra.isASCIIAlpha(cp1) && (cp2 === p(":") || cp2 === p("|"));
  36. }
  37. function isWindowsDriveLetterString(string) {
  38. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && (string[1] === ":" || string[1] === "|");
  39. }
  40. function isNormalizedWindowsDriveLetterString(string) {
  41. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && string[1] === ":";
  42. }
  43. function containsForbiddenHostCodePoint(string) {
  44. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|%|\/|:|<|>|\?|@|\[|\\|\]|\^|\|/u) !== -1;
  45. }
  46. function containsForbiddenHostCodePointExcludingPercent(string) {
  47. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|\/|:|<|>|\?|@|\[|\\|\]|\^|\|/u) !== -1;
  48. }
  49. function isSpecialScheme(scheme) {
  50. return specialSchemes[scheme] !== undefined;
  51. }
  52. function isSpecial(url) {
  53. return isSpecialScheme(url.scheme);
  54. }
  55. function isNotSpecial(url) {
  56. return !isSpecialScheme(url.scheme);
  57. }
  58. function defaultPort(scheme) {
  59. return specialSchemes[scheme];
  60. }
  61. function parseIPv4Number(input) {
  62. if (input === "") {
  63. return failure;
  64. }
  65. let R = 10;
  66. if (input.length >= 2 && input.charAt(0) === "0" && input.charAt(1).toLowerCase() === "x") {
  67. input = input.substring(2);
  68. R = 16;
  69. } else if (input.length >= 2 && input.charAt(0) === "0") {
  70. input = input.substring(1);
  71. R = 8;
  72. }
  73. if (input === "") {
  74. return 0;
  75. }
  76. let regex = /[^0-7]/u;
  77. if (R === 10) {
  78. regex = /[^0-9]/u;
  79. }
  80. if (R === 16) {
  81. regex = /[^0-9A-Fa-f]/u;
  82. }
  83. if (regex.test(input)) {
  84. return failure;
  85. }
  86. return parseInt(input, R);
  87. }
  88. function parseIPv4(input) {
  89. const parts = input.split(".");
  90. if (parts[parts.length - 1] === "") {
  91. if (parts.length > 1) {
  92. parts.pop();
  93. }
  94. }
  95. if (parts.length > 4) {
  96. return failure;
  97. }
  98. const numbers = [];
  99. for (const part of parts) {
  100. const n = parseIPv4Number(part);
  101. if (n === failure) {
  102. return failure;
  103. }
  104. numbers.push(n);
  105. }
  106. for (let i = 0; i < numbers.length - 1; ++i) {
  107. if (numbers[i] > 255) {
  108. return failure;
  109. }
  110. }
  111. if (numbers[numbers.length - 1] >= 256 ** (5 - numbers.length)) {
  112. return failure;
  113. }
  114. let ipv4 = numbers.pop();
  115. let counter = 0;
  116. for (const n of numbers) {
  117. ipv4 += n * 256 ** (3 - counter);
  118. ++counter;
  119. }
  120. return ipv4;
  121. }
  122. function serializeIPv4(address) {
  123. let output = "";
  124. let n = address;
  125. for (let i = 1; i <= 4; ++i) {
  126. output = String(n % 256) + output;
  127. if (i !== 4) {
  128. output = `.${output}`;
  129. }
  130. n = Math.floor(n / 256);
  131. }
  132. return output;
  133. }
  134. function parseIPv6(input) {
  135. const address = [0, 0, 0, 0, 0, 0, 0, 0];
  136. let pieceIndex = 0;
  137. let compress = null;
  138. let pointer = 0;
  139. input = Array.from(input, c => c.codePointAt(0));
  140. if (input[pointer] === p(":")) {
  141. if (input[pointer + 1] !== p(":")) {
  142. return failure;
  143. }
  144. pointer += 2;
  145. ++pieceIndex;
  146. compress = pieceIndex;
  147. }
  148. while (pointer < input.length) {
  149. if (pieceIndex === 8) {
  150. return failure;
  151. }
  152. if (input[pointer] === p(":")) {
  153. if (compress !== null) {
  154. return failure;
  155. }
  156. ++pointer;
  157. ++pieceIndex;
  158. compress = pieceIndex;
  159. continue;
  160. }
  161. let value = 0;
  162. let length = 0;
  163. while (length < 4 && infra.isASCIIHex(input[pointer])) {
  164. value = value * 0x10 + parseInt(at(input, pointer), 16);
  165. ++pointer;
  166. ++length;
  167. }
  168. if (input[pointer] === p(".")) {
  169. if (length === 0) {
  170. return failure;
  171. }
  172. pointer -= length;
  173. if (pieceIndex > 6) {
  174. return failure;
  175. }
  176. let numbersSeen = 0;
  177. while (input[pointer] !== undefined) {
  178. let ipv4Piece = null;
  179. if (numbersSeen > 0) {
  180. if (input[pointer] === p(".") && numbersSeen < 4) {
  181. ++pointer;
  182. } else {
  183. return failure;
  184. }
  185. }
  186. if (!infra.isASCIIDigit(input[pointer])) {
  187. return failure;
  188. }
  189. while (infra.isASCIIDigit(input[pointer])) {
  190. const number = parseInt(at(input, pointer));
  191. if (ipv4Piece === null) {
  192. ipv4Piece = number;
  193. } else if (ipv4Piece === 0) {
  194. return failure;
  195. } else {
  196. ipv4Piece = ipv4Piece * 10 + number;
  197. }
  198. if (ipv4Piece > 255) {
  199. return failure;
  200. }
  201. ++pointer;
  202. }
  203. address[pieceIndex] = address[pieceIndex] * 0x100 + ipv4Piece;
  204. ++numbersSeen;
  205. if (numbersSeen === 2 || numbersSeen === 4) {
  206. ++pieceIndex;
  207. }
  208. }
  209. if (numbersSeen !== 4) {
  210. return failure;
  211. }
  212. break;
  213. } else if (input[pointer] === p(":")) {
  214. ++pointer;
  215. if (input[pointer] === undefined) {
  216. return failure;
  217. }
  218. } else if (input[pointer] !== undefined) {
  219. return failure;
  220. }
  221. address[pieceIndex] = value;
  222. ++pieceIndex;
  223. }
  224. if (compress !== null) {
  225. let swaps = pieceIndex - compress;
  226. pieceIndex = 7;
  227. while (pieceIndex !== 0 && swaps > 0) {
  228. const temp = address[compress + swaps - 1];
  229. address[compress + swaps - 1] = address[pieceIndex];
  230. address[pieceIndex] = temp;
  231. --pieceIndex;
  232. --swaps;
  233. }
  234. } else if (compress === null && pieceIndex !== 8) {
  235. return failure;
  236. }
  237. return address;
  238. }
  239. function serializeIPv6(address) {
  240. let output = "";
  241. const compress = findLongestZeroSequence(address);
  242. let ignore0 = false;
  243. for (let pieceIndex = 0; pieceIndex <= 7; ++pieceIndex) {
  244. if (ignore0 && address[pieceIndex] === 0) {
  245. continue;
  246. } else if (ignore0) {
  247. ignore0 = false;
  248. }
  249. if (compress === pieceIndex) {
  250. const separator = pieceIndex === 0 ? "::" : ":";
  251. output += separator;
  252. ignore0 = true;
  253. continue;
  254. }
  255. output += address[pieceIndex].toString(16);
  256. if (pieceIndex !== 7) {
  257. output += ":";
  258. }
  259. }
  260. return output;
  261. }
  262. function parseHost(input, isNotSpecialArg = false) {
  263. if (input[0] === "[") {
  264. if (input[input.length - 1] !== "]") {
  265. return failure;
  266. }
  267. return parseIPv6(input.substring(1, input.length - 1));
  268. }
  269. if (isNotSpecialArg) {
  270. return parseOpaqueHost(input);
  271. }
  272. const domain = utf8DecodeWithoutBOM(percentDecodeString(input));
  273. const asciiDomain = domainToASCII(domain);
  274. if (asciiDomain === failure) {
  275. return failure;
  276. }
  277. if (containsForbiddenHostCodePoint(asciiDomain)) {
  278. return failure;
  279. }
  280. if (endsInANumber(asciiDomain)) {
  281. return parseIPv4(asciiDomain);
  282. }
  283. return asciiDomain;
  284. }
  285. function endsInANumber(input) {
  286. const parts = input.split(".");
  287. if (parts[parts.length - 1] === "") {
  288. if (parts.length === 1) {
  289. return false;
  290. }
  291. parts.pop();
  292. }
  293. const last = parts[parts.length - 1];
  294. if (parseIPv4Number(last) !== failure) {
  295. return true;
  296. }
  297. if (/^[0-9]+$/u.test(last)) {
  298. return true;
  299. }
  300. return false;
  301. }
  302. function parseOpaqueHost(input) {
  303. if (containsForbiddenHostCodePointExcludingPercent(input)) {
  304. return failure;
  305. }
  306. return utf8PercentEncodeString(input, isC0ControlPercentEncode);
  307. }
  308. function findLongestZeroSequence(arr) {
  309. let maxIdx = null;
  310. let maxLen = 1; // only find elements > 1
  311. let currStart = null;
  312. let currLen = 0;
  313. for (let i = 0; i < arr.length; ++i) {
  314. if (arr[i] !== 0) {
  315. if (currLen > maxLen) {
  316. maxIdx = currStart;
  317. maxLen = currLen;
  318. }
  319. currStart = null;
  320. currLen = 0;
  321. } else {
  322. if (currStart === null) {
  323. currStart = i;
  324. }
  325. ++currLen;
  326. }
  327. }
  328. // if trailing zeros
  329. if (currLen > maxLen) {
  330. return currStart;
  331. }
  332. return maxIdx;
  333. }
  334. function serializeHost(host) {
  335. if (typeof host === "number") {
  336. return serializeIPv4(host);
  337. }
  338. // IPv6 serializer
  339. if (host instanceof Array) {
  340. return `[${serializeIPv6(host)}]`;
  341. }
  342. return host;
  343. }
  344. function domainToASCII(domain, beStrict = false) {
  345. const result = tr46.toASCII(domain, {
  346. checkBidi: true,
  347. checkHyphens: false,
  348. checkJoiners: true,
  349. useSTD3ASCIIRules: beStrict,
  350. verifyDNSLength: beStrict
  351. });
  352. if (result === null || result === "") {
  353. return failure;
  354. }
  355. return result;
  356. }
  357. function trimControlChars(url) {
  358. return url.replace(/^[\u0000-\u001F\u0020]+|[\u0000-\u001F\u0020]+$/ug, "");
  359. }
  360. function trimTabAndNewline(url) {
  361. return url.replace(/\u0009|\u000A|\u000D/ug, "");
  362. }
  363. function shortenPath(url) {
  364. const { path } = url;
  365. if (path.length === 0) {
  366. return;
  367. }
  368. if (url.scheme === "file" && path.length === 1 && isNormalizedWindowsDriveLetter(path[0])) {
  369. return;
  370. }
  371. path.pop();
  372. }
  373. function includesCredentials(url) {
  374. return url.username !== "" || url.password !== "";
  375. }
  376. function cannotHaveAUsernamePasswordPort(url) {
  377. return url.host === null || url.host === "" || hasAnOpaquePath(url) || url.scheme === "file";
  378. }
  379. function hasAnOpaquePath(url) {
  380. return typeof url.path === "string";
  381. }
  382. function isNormalizedWindowsDriveLetter(string) {
  383. return /^[A-Za-z]:$/u.test(string);
  384. }
  385. function URLStateMachine(input, base, encodingOverride, url, stateOverride) {
  386. this.pointer = 0;
  387. this.input = input;
  388. this.base = base || null;
  389. this.encodingOverride = encodingOverride || "utf-8";
  390. this.stateOverride = stateOverride;
  391. this.url = url;
  392. this.failure = false;
  393. this.parseError = false;
  394. if (!this.url) {
  395. this.url = {
  396. scheme: "",
  397. username: "",
  398. password: "",
  399. host: null,
  400. port: null,
  401. path: [],
  402. query: null,
  403. fragment: null
  404. };
  405. const res = trimControlChars(this.input);
  406. if (res !== this.input) {
  407. this.parseError = true;
  408. }
  409. this.input = res;
  410. }
  411. const res = trimTabAndNewline(this.input);
  412. if (res !== this.input) {
  413. this.parseError = true;
  414. }
  415. this.input = res;
  416. this.state = stateOverride || "scheme start";
  417. this.buffer = "";
  418. this.atFlag = false;
  419. this.arrFlag = false;
  420. this.passwordTokenSeenFlag = false;
  421. this.input = Array.from(this.input, c => c.codePointAt(0));
  422. for (; this.pointer <= this.input.length; ++this.pointer) {
  423. const c = this.input[this.pointer];
  424. const cStr = isNaN(c) ? undefined : String.fromCodePoint(c);
  425. // exec state machine
  426. const ret = this[`parse ${this.state}`](c, cStr);
  427. if (!ret) {
  428. break; // terminate algorithm
  429. } else if (ret === failure) {
  430. this.failure = true;
  431. break;
  432. }
  433. }
  434. }
  435. URLStateMachine.prototype["parse scheme start"] = function parseSchemeStart(c, cStr) {
  436. if (infra.isASCIIAlpha(c)) {
  437. this.buffer += cStr.toLowerCase();
  438. this.state = "scheme";
  439. } else if (!this.stateOverride) {
  440. this.state = "no scheme";
  441. --this.pointer;
  442. } else {
  443. this.parseError = true;
  444. return failure;
  445. }
  446. return true;
  447. };
  448. URLStateMachine.prototype["parse scheme"] = function parseScheme(c, cStr) {
  449. if (infra.isASCIIAlphanumeric(c) || c === p("+") || c === p("-") || c === p(".")) {
  450. this.buffer += cStr.toLowerCase();
  451. } else if (c === p(":")) {
  452. if (this.stateOverride) {
  453. if (isSpecial(this.url) && !isSpecialScheme(this.buffer)) {
  454. return false;
  455. }
  456. if (!isSpecial(this.url) && isSpecialScheme(this.buffer)) {
  457. return false;
  458. }
  459. if ((includesCredentials(this.url) || this.url.port !== null) && this.buffer === "file") {
  460. return false;
  461. }
  462. if (this.url.scheme === "file" && this.url.host === "") {
  463. return false;
  464. }
  465. }
  466. this.url.scheme = this.buffer;
  467. if (this.stateOverride) {
  468. if (this.url.port === defaultPort(this.url.scheme)) {
  469. this.url.port = null;
  470. }
  471. return false;
  472. }
  473. this.buffer = "";
  474. if (this.url.scheme === "file") {
  475. if (this.input[this.pointer + 1] !== p("/") || this.input[this.pointer + 2] !== p("/")) {
  476. this.parseError = true;
  477. }
  478. this.state = "file";
  479. } else if (isSpecial(this.url) && this.base !== null && this.base.scheme === this.url.scheme) {
  480. this.state = "special relative or authority";
  481. } else if (isSpecial(this.url)) {
  482. this.state = "special authority slashes";
  483. } else if (this.input[this.pointer + 1] === p("/")) {
  484. this.state = "path or authority";
  485. ++this.pointer;
  486. } else {
  487. this.url.path = "";
  488. this.state = "opaque path";
  489. }
  490. } else if (!this.stateOverride) {
  491. this.buffer = "";
  492. this.state = "no scheme";
  493. this.pointer = -1;
  494. } else {
  495. this.parseError = true;
  496. return failure;
  497. }
  498. return true;
  499. };
  500. URLStateMachine.prototype["parse no scheme"] = function parseNoScheme(c) {
  501. if (this.base === null || (hasAnOpaquePath(this.base) && c !== p("#"))) {
  502. return failure;
  503. } else if (hasAnOpaquePath(this.base) && c === p("#")) {
  504. this.url.scheme = this.base.scheme;
  505. this.url.path = this.base.path;
  506. this.url.query = this.base.query;
  507. this.url.fragment = "";
  508. this.state = "fragment";
  509. } else if (this.base.scheme === "file") {
  510. this.state = "file";
  511. --this.pointer;
  512. } else {
  513. this.state = "relative";
  514. --this.pointer;
  515. }
  516. return true;
  517. };
  518. URLStateMachine.prototype["parse special relative or authority"] = function parseSpecialRelativeOrAuthority(c) {
  519. if (c === p("/") && this.input[this.pointer + 1] === p("/")) {
  520. this.state = "special authority ignore slashes";
  521. ++this.pointer;
  522. } else {
  523. this.parseError = true;
  524. this.state = "relative";
  525. --this.pointer;
  526. }
  527. return true;
  528. };
  529. URLStateMachine.prototype["parse path or authority"] = function parsePathOrAuthority(c) {
  530. if (c === p("/")) {
  531. this.state = "authority";
  532. } else {
  533. this.state = "path";
  534. --this.pointer;
  535. }
  536. return true;
  537. };
  538. URLStateMachine.prototype["parse relative"] = function parseRelative(c) {
  539. this.url.scheme = this.base.scheme;
  540. if (c === p("/")) {
  541. this.state = "relative slash";
  542. } else if (isSpecial(this.url) && c === p("\\")) {
  543. this.parseError = true;
  544. this.state = "relative slash";
  545. } else {
  546. this.url.username = this.base.username;
  547. this.url.password = this.base.password;
  548. this.url.host = this.base.host;
  549. this.url.port = this.base.port;
  550. this.url.path = this.base.path.slice();
  551. this.url.query = this.base.query;
  552. if (c === p("?")) {
  553. this.url.query = "";
  554. this.state = "query";
  555. } else if (c === p("#")) {
  556. this.url.fragment = "";
  557. this.state = "fragment";
  558. } else if (!isNaN(c)) {
  559. this.url.query = null;
  560. this.url.path.pop();
  561. this.state = "path";
  562. --this.pointer;
  563. }
  564. }
  565. return true;
  566. };
  567. URLStateMachine.prototype["parse relative slash"] = function parseRelativeSlash(c) {
  568. if (isSpecial(this.url) && (c === p("/") || c === p("\\"))) {
  569. if (c === p("\\")) {
  570. this.parseError = true;
  571. }
  572. this.state = "special authority ignore slashes";
  573. } else if (c === p("/")) {
  574. this.state = "authority";
  575. } else {
  576. this.url.username = this.base.username;
  577. this.url.password = this.base.password;
  578. this.url.host = this.base.host;
  579. this.url.port = this.base.port;
  580. this.state = "path";
  581. --this.pointer;
  582. }
  583. return true;
  584. };
  585. URLStateMachine.prototype["parse special authority slashes"] = function parseSpecialAuthoritySlashes(c) {
  586. if (c === p("/") && this.input[this.pointer + 1] === p("/")) {
  587. this.state = "special authority ignore slashes";
  588. ++this.pointer;
  589. } else {
  590. this.parseError = true;
  591. this.state = "special authority ignore slashes";
  592. --this.pointer;
  593. }
  594. return true;
  595. };
  596. URLStateMachine.prototype["parse special authority ignore slashes"] = function parseSpecialAuthorityIgnoreSlashes(c) {
  597. if (c !== p("/") && c !== p("\\")) {
  598. this.state = "authority";
  599. --this.pointer;
  600. } else {
  601. this.parseError = true;
  602. }
  603. return true;
  604. };
  605. URLStateMachine.prototype["parse authority"] = function parseAuthority(c, cStr) {
  606. if (c === p("@")) {
  607. this.parseError = true;
  608. if (this.atFlag) {
  609. this.buffer = `%40${this.buffer}`;
  610. }
  611. this.atFlag = true;
  612. // careful, this is based on buffer and has its own pointer (this.pointer != pointer) and inner chars
  613. const len = countSymbols(this.buffer);
  614. for (let pointer = 0; pointer < len; ++pointer) {
  615. const codePoint = this.buffer.codePointAt(pointer);
  616. if (codePoint === p(":") && !this.passwordTokenSeenFlag) {
  617. this.passwordTokenSeenFlag = true;
  618. continue;
  619. }
  620. const encodedCodePoints = utf8PercentEncodeCodePoint(codePoint, isUserinfoPercentEncode);
  621. if (this.passwordTokenSeenFlag) {
  622. this.url.password += encodedCodePoints;
  623. } else {
  624. this.url.username += encodedCodePoints;
  625. }
  626. }
  627. this.buffer = "";
  628. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  629. (isSpecial(this.url) && c === p("\\"))) {
  630. if (this.atFlag && this.buffer === "") {
  631. this.parseError = true;
  632. return failure;
  633. }
  634. this.pointer -= countSymbols(this.buffer) + 1;
  635. this.buffer = "";
  636. this.state = "host";
  637. } else {
  638. this.buffer += cStr;
  639. }
  640. return true;
  641. };
  642. URLStateMachine.prototype["parse hostname"] =
  643. URLStateMachine.prototype["parse host"] = function parseHostName(c, cStr) {
  644. if (this.stateOverride && this.url.scheme === "file") {
  645. --this.pointer;
  646. this.state = "file host";
  647. } else if (c === p(":") && !this.arrFlag) {
  648. if (this.buffer === "") {
  649. this.parseError = true;
  650. return failure;
  651. }
  652. if (this.stateOverride === "hostname") {
  653. return false;
  654. }
  655. const host = parseHost(this.buffer, isNotSpecial(this.url));
  656. if (host === failure) {
  657. return failure;
  658. }
  659. this.url.host = host;
  660. this.buffer = "";
  661. this.state = "port";
  662. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  663. (isSpecial(this.url) && c === p("\\"))) {
  664. --this.pointer;
  665. if (isSpecial(this.url) && this.buffer === "") {
  666. this.parseError = true;
  667. return failure;
  668. } else if (this.stateOverride && this.buffer === "" &&
  669. (includesCredentials(this.url) || this.url.port !== null)) {
  670. this.parseError = true;
  671. return false;
  672. }
  673. const host = parseHost(this.buffer, isNotSpecial(this.url));
  674. if (host === failure) {
  675. return failure;
  676. }
  677. this.url.host = host;
  678. this.buffer = "";
  679. this.state = "path start";
  680. if (this.stateOverride) {
  681. return false;
  682. }
  683. } else {
  684. if (c === p("[")) {
  685. this.arrFlag = true;
  686. } else if (c === p("]")) {
  687. this.arrFlag = false;
  688. }
  689. this.buffer += cStr;
  690. }
  691. return true;
  692. };
  693. URLStateMachine.prototype["parse port"] = function parsePort(c, cStr) {
  694. if (infra.isASCIIDigit(c)) {
  695. this.buffer += cStr;
  696. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  697. (isSpecial(this.url) && c === p("\\")) ||
  698. this.stateOverride) {
  699. if (this.buffer !== "") {
  700. const port = parseInt(this.buffer);
  701. if (port > 2 ** 16 - 1) {
  702. this.parseError = true;
  703. return failure;
  704. }
  705. this.url.port = port === defaultPort(this.url.scheme) ? null : port;
  706. this.buffer = "";
  707. }
  708. if (this.stateOverride) {
  709. return false;
  710. }
  711. this.state = "path start";
  712. --this.pointer;
  713. } else {
  714. this.parseError = true;
  715. return failure;
  716. }
  717. return true;
  718. };
  719. const fileOtherwiseCodePoints = new Set([p("/"), p("\\"), p("?"), p("#")]);
  720. function startsWithWindowsDriveLetter(input, pointer) {
  721. const length = input.length - pointer;
  722. return length >= 2 &&
  723. isWindowsDriveLetterCodePoints(input[pointer], input[pointer + 1]) &&
  724. (length === 2 || fileOtherwiseCodePoints.has(input[pointer + 2]));
  725. }
  726. URLStateMachine.prototype["parse file"] = function parseFile(c) {
  727. this.url.scheme = "file";
  728. this.url.host = "";
  729. if (c === p("/") || c === p("\\")) {
  730. if (c === p("\\")) {
  731. this.parseError = true;
  732. }
  733. this.state = "file slash";
  734. } else if (this.base !== null && this.base.scheme === "file") {
  735. this.url.host = this.base.host;
  736. this.url.path = this.base.path.slice();
  737. this.url.query = this.base.query;
  738. if (c === p("?")) {
  739. this.url.query = "";
  740. this.state = "query";
  741. } else if (c === p("#")) {
  742. this.url.fragment = "";
  743. this.state = "fragment";
  744. } else if (!isNaN(c)) {
  745. this.url.query = null;
  746. if (!startsWithWindowsDriveLetter(this.input, this.pointer)) {
  747. shortenPath(this.url);
  748. } else {
  749. this.parseError = true;
  750. this.url.path = [];
  751. }
  752. this.state = "path";
  753. --this.pointer;
  754. }
  755. } else {
  756. this.state = "path";
  757. --this.pointer;
  758. }
  759. return true;
  760. };
  761. URLStateMachine.prototype["parse file slash"] = function parseFileSlash(c) {
  762. if (c === p("/") || c === p("\\")) {
  763. if (c === p("\\")) {
  764. this.parseError = true;
  765. }
  766. this.state = "file host";
  767. } else {
  768. if (this.base !== null && this.base.scheme === "file") {
  769. if (!startsWithWindowsDriveLetter(this.input, this.pointer) &&
  770. isNormalizedWindowsDriveLetterString(this.base.path[0])) {
  771. this.url.path.push(this.base.path[0]);
  772. }
  773. this.url.host = this.base.host;
  774. }
  775. this.state = "path";
  776. --this.pointer;
  777. }
  778. return true;
  779. };
  780. URLStateMachine.prototype["parse file host"] = function parseFileHost(c, cStr) {
  781. if (isNaN(c) || c === p("/") || c === p("\\") || c === p("?") || c === p("#")) {
  782. --this.pointer;
  783. if (!this.stateOverride && isWindowsDriveLetterString(this.buffer)) {
  784. this.parseError = true;
  785. this.state = "path";
  786. } else if (this.buffer === "") {
  787. this.url.host = "";
  788. if (this.stateOverride) {
  789. return false;
  790. }
  791. this.state = "path start";
  792. } else {
  793. let host = parseHost(this.buffer, isNotSpecial(this.url));
  794. if (host === failure) {
  795. return failure;
  796. }
  797. if (host === "localhost") {
  798. host = "";
  799. }
  800. this.url.host = host;
  801. if (this.stateOverride) {
  802. return false;
  803. }
  804. this.buffer = "";
  805. this.state = "path start";
  806. }
  807. } else {
  808. this.buffer += cStr;
  809. }
  810. return true;
  811. };
  812. URLStateMachine.prototype["parse path start"] = function parsePathStart(c) {
  813. if (isSpecial(this.url)) {
  814. if (c === p("\\")) {
  815. this.parseError = true;
  816. }
  817. this.state = "path";
  818. if (c !== p("/") && c !== p("\\")) {
  819. --this.pointer;
  820. }
  821. } else if (!this.stateOverride && c === p("?")) {
  822. this.url.query = "";
  823. this.state = "query";
  824. } else if (!this.stateOverride && c === p("#")) {
  825. this.url.fragment = "";
  826. this.state = "fragment";
  827. } else if (c !== undefined) {
  828. this.state = "path";
  829. if (c !== p("/")) {
  830. --this.pointer;
  831. }
  832. } else if (this.stateOverride && this.url.host === null) {
  833. this.url.path.push("");
  834. }
  835. return true;
  836. };
  837. URLStateMachine.prototype["parse path"] = function parsePath(c) {
  838. if (isNaN(c) || c === p("/") || (isSpecial(this.url) && c === p("\\")) ||
  839. (!this.stateOverride && (c === p("?") || c === p("#")))) {
  840. if (isSpecial(this.url) && c === p("\\")) {
  841. this.parseError = true;
  842. }
  843. if (isDoubleDot(this.buffer)) {
  844. shortenPath(this.url);
  845. if (c !== p("/") && !(isSpecial(this.url) && c === p("\\"))) {
  846. this.url.path.push("");
  847. }
  848. } else if (isSingleDot(this.buffer) && c !== p("/") &&
  849. !(isSpecial(this.url) && c === p("\\"))) {
  850. this.url.path.push("");
  851. } else if (!isSingleDot(this.buffer)) {
  852. if (this.url.scheme === "file" && this.url.path.length === 0 && isWindowsDriveLetterString(this.buffer)) {
  853. this.buffer = `${this.buffer[0]}:`;
  854. }
  855. this.url.path.push(this.buffer);
  856. }
  857. this.buffer = "";
  858. if (c === p("?")) {
  859. this.url.query = "";
  860. this.state = "query";
  861. }
  862. if (c === p("#")) {
  863. this.url.fragment = "";
  864. this.state = "fragment";
  865. }
  866. } else {
  867. // TODO: If c is not a URL code point and not "%", parse error.
  868. if (c === p("%") &&
  869. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  870. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  871. this.parseError = true;
  872. }
  873. this.buffer += utf8PercentEncodeCodePoint(c, isPathPercentEncode);
  874. }
  875. return true;
  876. };
  877. URLStateMachine.prototype["parse opaque path"] = function parseOpaquePath(c) {
  878. if (c === p("?")) {
  879. this.url.query = "";
  880. this.state = "query";
  881. } else if (c === p("#")) {
  882. this.url.fragment = "";
  883. this.state = "fragment";
  884. } else {
  885. // TODO: Add: not a URL code point
  886. if (!isNaN(c) && c !== p("%")) {
  887. this.parseError = true;
  888. }
  889. if (c === p("%") &&
  890. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  891. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  892. this.parseError = true;
  893. }
  894. if (!isNaN(c)) {
  895. this.url.path += utf8PercentEncodeCodePoint(c, isC0ControlPercentEncode);
  896. }
  897. }
  898. return true;
  899. };
  900. URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) {
  901. if (!isSpecial(this.url) || this.url.scheme === "ws" || this.url.scheme === "wss") {
  902. this.encodingOverride = "utf-8";
  903. }
  904. if ((!this.stateOverride && c === p("#")) || isNaN(c)) {
  905. const queryPercentEncodePredicate = isSpecial(this.url) ? isSpecialQueryPercentEncode : isQueryPercentEncode;
  906. this.url.query += utf8PercentEncodeString(this.buffer, queryPercentEncodePredicate);
  907. this.buffer = "";
  908. if (c === p("#")) {
  909. this.url.fragment = "";
  910. this.state = "fragment";
  911. }
  912. } else if (!isNaN(c)) {
  913. // TODO: If c is not a URL code point and not "%", parse error.
  914. if (c === p("%") &&
  915. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  916. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  917. this.parseError = true;
  918. }
  919. this.buffer += cStr;
  920. }
  921. return true;
  922. };
  923. URLStateMachine.prototype["parse fragment"] = function parseFragment(c) {
  924. if (!isNaN(c)) {
  925. // TODO: If c is not a URL code point and not "%", parse error.
  926. if (c === p("%") &&
  927. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  928. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  929. this.parseError = true;
  930. }
  931. this.url.fragment += utf8PercentEncodeCodePoint(c, isFragmentPercentEncode);
  932. }
  933. return true;
  934. };
  935. function serializeURL(url, excludeFragment) {
  936. let output = `${url.scheme}:`;
  937. if (url.host !== null) {
  938. output += "//";
  939. if (url.username !== "" || url.password !== "") {
  940. output += url.username;
  941. if (url.password !== "") {
  942. output += `:${url.password}`;
  943. }
  944. output += "@";
  945. }
  946. output += serializeHost(url.host);
  947. if (url.port !== null) {
  948. output += `:${url.port}`;
  949. }
  950. }
  951. if (url.host === null && !hasAnOpaquePath(url) && url.path.length > 1 && url.path[0] === "") {
  952. output += "/.";
  953. }
  954. output += serializePath(url);
  955. if (url.query !== null) {
  956. output += `?${url.query}`;
  957. }
  958. if (!excludeFragment && url.fragment !== null) {
  959. output += `#${url.fragment}`;
  960. }
  961. return output;
  962. }
  963. function serializeOrigin(tuple) {
  964. let result = `${tuple.scheme}://`;
  965. result += serializeHost(tuple.host);
  966. if (tuple.port !== null) {
  967. result += `:${tuple.port}`;
  968. }
  969. return result;
  970. }
  971. function serializePath(url) {
  972. if (hasAnOpaquePath(url)) {
  973. return url.path;
  974. }
  975. let output = "";
  976. for (const segment of url.path) {
  977. output += `/${segment}`;
  978. }
  979. return output;
  980. }
  981. module.exports.serializeURL = serializeURL;
  982. module.exports.serializePath = serializePath;
  983. module.exports.serializeURLOrigin = function (url) {
  984. // https://url.spec.whatwg.org/#concept-url-origin
  985. switch (url.scheme) {
  986. case "blob":
  987. try {
  988. return module.exports.serializeURLOrigin(module.exports.parseURL(serializePath(url)));
  989. } catch (e) {
  990. // serializing an opaque origin returns "null"
  991. return "null";
  992. }
  993. case "ftp":
  994. case "http":
  995. case "https":
  996. case "ws":
  997. case "wss":
  998. return serializeOrigin({
  999. scheme: url.scheme,
  1000. host: url.host,
  1001. port: url.port
  1002. });
  1003. case "file":
  1004. // The spec says:
  1005. // > Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
  1006. // Browsers tested so far:
  1007. // - Chrome says "file://", but treats file: URLs as cross-origin for most (all?) purposes; see e.g.
  1008. // https://bugs.chromium.org/p/chromium/issues/detail?id=37586
  1009. // - Firefox says "null", but treats file: URLs as same-origin sometimes based on directory stuff; see
  1010. // https://developer.mozilla.org/en-US/docs/Archive/Misc_top_level/Same-origin_policy_for_file:_URIs
  1011. return "null";
  1012. default:
  1013. // serializing an opaque origin returns "null"
  1014. return "null";
  1015. }
  1016. };
  1017. module.exports.basicURLParse = function (input, options) {
  1018. if (options === undefined) {
  1019. options = {};
  1020. }
  1021. const usm = new URLStateMachine(input, options.baseURL, options.encodingOverride, options.url, options.stateOverride);
  1022. if (usm.failure) {
  1023. return null;
  1024. }
  1025. return usm.url;
  1026. };
  1027. module.exports.setTheUsername = function (url, username) {
  1028. url.username = utf8PercentEncodeString(username, isUserinfoPercentEncode);
  1029. };
  1030. module.exports.setThePassword = function (url, password) {
  1031. url.password = utf8PercentEncodeString(password, isUserinfoPercentEncode);
  1032. };
  1033. module.exports.serializeHost = serializeHost;
  1034. module.exports.cannotHaveAUsernamePasswordPort = cannotHaveAUsernamePasswordPort;
  1035. module.exports.hasAnOpaquePath = hasAnOpaquePath;
  1036. module.exports.serializeInteger = function (integer) {
  1037. return String(integer);
  1038. };
  1039. module.exports.parseURL = function (input, options) {
  1040. if (options === undefined) {
  1041. options = {};
  1042. }
  1043. // We don't handle blobs, so this just delegates:
  1044. return module.exports.basicURLParse(input, { baseURL: options.baseURL, encodingOverride: options.encodingOverride });
  1045. };