index.js 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. 'use strict';
  2. const url = require('url');
  3. const punycode = require('punycode');
  4. const queryString = require('query-string');
  5. const prependHttp = require('prepend-http');
  6. const sortKeys = require('sort-keys');
  7. const DEFAULT_PORTS = {
  8. 'http:': 80,
  9. 'https:': 443,
  10. 'ftp:': 21
  11. };
  12. // Protocols that always contain a `//`` bit
  13. const slashedProtocol = {
  14. http: true,
  15. https: true,
  16. ftp: true,
  17. gopher: true,
  18. file: true,
  19. 'http:': true,
  20. 'https:': true,
  21. 'ftp:': true,
  22. 'gopher:': true,
  23. 'file:': true
  24. };
  25. function testParameter(name, filters) {
  26. return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
  27. }
  28. module.exports = (str, opts) => {
  29. opts = Object.assign({
  30. normalizeProtocol: true,
  31. normalizeHttps: false,
  32. stripFragment: true,
  33. stripWWW: true,
  34. removeQueryParameters: [/^utm_\w+/i],
  35. removeTrailingSlash: true,
  36. removeDirectoryIndex: false,
  37. sortQueryParameters: true
  38. }, opts);
  39. if (typeof str !== 'string') {
  40. throw new TypeError('Expected a string');
  41. }
  42. const hasRelativeProtocol = str.startsWith('//');
  43. // Prepend protocol
  44. str = prependHttp(str.trim()).replace(/^\/\//, 'http://');
  45. const urlObj = url.parse(str);
  46. if (opts.normalizeHttps && urlObj.protocol === 'https:') {
  47. urlObj.protocol = 'http:';
  48. }
  49. if (!urlObj.hostname && !urlObj.pathname) {
  50. throw new Error('Invalid URL');
  51. }
  52. // Prevent these from being used by `url.format`
  53. delete urlObj.host;
  54. delete urlObj.query;
  55. // Remove fragment
  56. if (opts.stripFragment) {
  57. delete urlObj.hash;
  58. }
  59. // Remove default port
  60. const port = DEFAULT_PORTS[urlObj.protocol];
  61. if (Number(urlObj.port) === port) {
  62. delete urlObj.port;
  63. }
  64. // Remove duplicate slashes
  65. if (urlObj.pathname) {
  66. urlObj.pathname = urlObj.pathname.replace(/\/{2,}/g, '/');
  67. }
  68. // Decode URI octets
  69. if (urlObj.pathname) {
  70. urlObj.pathname = decodeURI(urlObj.pathname);
  71. }
  72. // Remove directory index
  73. if (opts.removeDirectoryIndex === true) {
  74. opts.removeDirectoryIndex = [/^index\.[a-z]+$/];
  75. }
  76. if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length > 0) {
  77. let pathComponents = urlObj.pathname.split('/');
  78. const lastComponent = pathComponents[pathComponents.length - 1];
  79. if (testParameter(lastComponent, opts.removeDirectoryIndex)) {
  80. pathComponents = pathComponents.slice(0, pathComponents.length - 1);
  81. urlObj.pathname = pathComponents.slice(1).join('/') + '/';
  82. }
  83. }
  84. // Resolve relative paths, but only for slashed protocols
  85. if (slashedProtocol[urlObj.protocol]) {
  86. const domain = urlObj.protocol + '//' + urlObj.hostname;
  87. const relative = url.resolve(domain, urlObj.pathname);
  88. urlObj.pathname = relative.replace(domain, '');
  89. }
  90. if (urlObj.hostname) {
  91. // IDN to Unicode
  92. urlObj.hostname = punycode.toUnicode(urlObj.hostname).toLowerCase();
  93. // Remove trailing dot
  94. urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
  95. // Remove `www.`
  96. if (opts.stripWWW) {
  97. urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
  98. }
  99. }
  100. // Remove URL with empty query string
  101. if (urlObj.search === '?') {
  102. delete urlObj.search;
  103. }
  104. const queryParameters = queryString.parse(urlObj.search);
  105. // Remove query unwanted parameters
  106. if (Array.isArray(opts.removeQueryParameters)) {
  107. for (const key in queryParameters) {
  108. if (testParameter(key, opts.removeQueryParameters)) {
  109. delete queryParameters[key];
  110. }
  111. }
  112. }
  113. // Sort query parameters
  114. if (opts.sortQueryParameters) {
  115. urlObj.search = queryString.stringify(sortKeys(queryParameters));
  116. }
  117. // Decode query parameters
  118. if (urlObj.search !== null) {
  119. urlObj.search = decodeURIComponent(urlObj.search);
  120. }
  121. // Take advantage of many of the Node `url` normalizations
  122. str = url.format(urlObj);
  123. // Remove ending `/`
  124. if (opts.removeTrailingSlash || urlObj.pathname === '/') {
  125. str = str.replace(/\/$/, '');
  126. }
  127. // Restore relative protocol, if applicable
  128. if (hasRelativeProtocol && !opts.normalizeProtocol) {
  129. str = str.replace(/^http:\/\//, '//');
  130. }
  131. return str;
  132. };