percent-encoding.js 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. "use strict";
  2. const { isASCIIHex } = require("./infra");
  3. const { utf8Encode } = require("./encoding");
  4. // https://url.spec.whatwg.org/#percent-encode
  5. function percentEncode(c) {
  6. let hex = c.toString(16).toUpperCase();
  7. if (hex.length === 1) {
  8. hex = "0" + hex;
  9. }
  10. return "%" + hex;
  11. }
  12. // https://url.spec.whatwg.org/#percent-decode
  13. function percentDecodeBytes(input) {
  14. const output = new Uint8Array(input.byteLength);
  15. let outputIndex = 0;
  16. for (let i = 0; i < input.byteLength; ++i) {
  17. const byte = input[i];
  18. if (byte !== 0x25) {
  19. output[outputIndex++] = byte;
  20. } else if (byte === 0x25 && (!isASCIIHex(input[i + 1]) || !isASCIIHex(input[i + 2]))) {
  21. output[outputIndex++] = byte;
  22. } else {
  23. const bytePoint = parseInt(String.fromCodePoint(input[i + 1], input[i + 2]), 16);
  24. output[outputIndex++] = bytePoint;
  25. i += 2;
  26. }
  27. }
  28. // TODO: remove the Buffer.from in the next major version; it's only needed for back-compat, and sticking to standard
  29. // typed arrays is nicer and simpler.
  30. // See https://github.com/jsdom/data-urls/issues/17 for background.
  31. return Buffer.from(output.slice(0, outputIndex));
  32. }
  33. // https://url.spec.whatwg.org/#string-percent-decode
  34. function percentDecodeString(input) {
  35. const bytes = utf8Encode(input);
  36. return percentDecodeBytes(bytes);
  37. }
  38. // https://url.spec.whatwg.org/#c0-control-percent-encode-set
  39. function isC0ControlPercentEncode(c) {
  40. return c <= 0x1F || c > 0x7E;
  41. }
  42. // https://url.spec.whatwg.org/#fragment-percent-encode-set
  43. const extraFragmentPercentEncodeSet = new Set([32, 34, 60, 62, 96]);
  44. function isFragmentPercentEncode(c) {
  45. return isC0ControlPercentEncode(c) || extraFragmentPercentEncodeSet.has(c);
  46. }
  47. // https://url.spec.whatwg.org/#query-percent-encode-set
  48. const extraQueryPercentEncodeSet = new Set([32, 34, 35, 60, 62]);
  49. function isQueryPercentEncode(c) {
  50. return isC0ControlPercentEncode(c) || extraQueryPercentEncodeSet.has(c);
  51. }
  52. // https://url.spec.whatwg.org/#special-query-percent-encode-set
  53. function isSpecialQueryPercentEncode(c) {
  54. return isQueryPercentEncode(c) || c === 39;
  55. }
  56. // https://url.spec.whatwg.org/#path-percent-encode-set
  57. const extraPathPercentEncodeSet = new Set([63, 96, 123, 125]);
  58. function isPathPercentEncode(c) {
  59. return isQueryPercentEncode(c) || extraPathPercentEncodeSet.has(c);
  60. }
  61. // https://url.spec.whatwg.org/#userinfo-percent-encode-set
  62. const extraUserinfoPercentEncodeSet =
  63. new Set([47, 58, 59, 61, 64, 91, 92, 93, 94, 124]);
  64. function isUserinfoPercentEncode(c) {
  65. return isPathPercentEncode(c) || extraUserinfoPercentEncodeSet.has(c);
  66. }
  67. // https://url.spec.whatwg.org/#component-percent-encode-set
  68. const extraComponentPercentEncodeSet = new Set([36, 37, 38, 43, 44]);
  69. function isComponentPercentEncode(c) {
  70. return isUserinfoPercentEncode(c) || extraComponentPercentEncodeSet.has(c);
  71. }
  72. // https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
  73. const extraURLEncodedPercentEncodeSet = new Set([33, 39, 40, 41, 126]);
  74. function isURLEncodedPercentEncode(c) {
  75. return isComponentPercentEncode(c) || extraURLEncodedPercentEncodeSet.has(c);
  76. }
  77. // https://url.spec.whatwg.org/#code-point-percent-encode-after-encoding
  78. // https://url.spec.whatwg.org/#utf-8-percent-encode
  79. // Assuming encoding is always utf-8 allows us to trim one of the logic branches. TODO: support encoding.
  80. // The "-Internal" variant here has code points as JS strings. The external version used by other files has code points
  81. // as JS numbers, like the rest of the codebase.
  82. function utf8PercentEncodeCodePointInternal(codePoint, percentEncodePredicate) {
  83. const bytes = utf8Encode(codePoint);
  84. let output = "";
  85. for (const byte of bytes) {
  86. // Our percentEncodePredicate operates on bytes, not code points, so this is slightly different from the spec.
  87. if (!percentEncodePredicate(byte)) {
  88. output += String.fromCharCode(byte);
  89. } else {
  90. output += percentEncode(byte);
  91. }
  92. }
  93. return output;
  94. }
  95. function utf8PercentEncodeCodePoint(codePoint, percentEncodePredicate) {
  96. return utf8PercentEncodeCodePointInternal(String.fromCodePoint(codePoint), percentEncodePredicate);
  97. }
  98. // https://url.spec.whatwg.org/#string-percent-encode-after-encoding
  99. // https://url.spec.whatwg.org/#string-utf-8-percent-encode
  100. function utf8PercentEncodeString(input, percentEncodePredicate, spaceAsPlus = false) {
  101. let output = "";
  102. for (const codePoint of input) {
  103. if (spaceAsPlus && codePoint === " ") {
  104. output += "+";
  105. } else {
  106. output += utf8PercentEncodeCodePointInternal(codePoint, percentEncodePredicate);
  107. }
  108. }
  109. return output;
  110. }
  111. module.exports = {
  112. isC0ControlPercentEncode,
  113. isFragmentPercentEncode,
  114. isQueryPercentEncode,
  115. isSpecialQueryPercentEncode,
  116. isPathPercentEncode,
  117. isUserinfoPercentEncode,
  118. isURLEncodedPercentEncode,
  119. percentDecodeString,
  120. percentDecodeBytes,
  121. utf8PercentEncodeString,
  122. utf8PercentEncodeCodePoint
  123. };