index.js 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. 'use strict';
  2. var url = require('url');
  3. var punycode = require('punycode');
  4. var queryString = require('query-string');
  5. var prependHttp = require('prepend-http');
  6. var sortKeys = require('sort-keys');
  7. var objectAssign = require('object-assign');
  8. var DEFAULT_PORTS = {
  9. 'http:': 80,
  10. 'https:': 443,
  11. 'ftp:': 21
  12. };
  13. // protocols that always contain a `//`` bit
  14. var slashedProtocol = {
  15. 'http': true,
  16. 'https': true,
  17. 'ftp': true,
  18. 'gopher': true,
  19. 'file': true,
  20. 'http:': true,
  21. 'https:': true,
  22. 'ftp:': true,
  23. 'gopher:': true,
  24. 'file:': true
  25. };
  26. function testParameter(name, filters) {
  27. return filters.some(function (filter) {
  28. return filter instanceof RegExp ? filter.test(name) : filter === name;
  29. });
  30. }
  31. module.exports = function (str, opts) {
  32. opts = objectAssign({
  33. normalizeProtocol: true,
  34. normalizeHttps: false,
  35. stripFragment: true,
  36. stripWWW: true,
  37. removeQueryParameters: [/^utm_\w+/i],
  38. removeTrailingSlash: true,
  39. removeDirectoryIndex: false
  40. }, opts);
  41. if (typeof str !== 'string') {
  42. throw new TypeError('Expected a string');
  43. }
  44. var hasRelativeProtocol = str.indexOf('//') === 0;
  45. // prepend protocol
  46. str = prependHttp(str.trim()).replace(/^\/\//, 'http://');
  47. var urlObj = url.parse(str);
  48. if (opts.normalizeHttps && urlObj.protocol === 'https:') {
  49. urlObj.protocol = 'http:';
  50. }
  51. if (!urlObj.hostname && !urlObj.pathname) {
  52. throw new Error('Invalid URL');
  53. }
  54. // prevent these from being used by `url.format`
  55. delete urlObj.host;
  56. delete urlObj.query;
  57. // remove fragment
  58. if (opts.stripFragment) {
  59. delete urlObj.hash;
  60. }
  61. // remove default port
  62. var port = DEFAULT_PORTS[urlObj.protocol];
  63. if (Number(urlObj.port) === port) {
  64. delete urlObj.port;
  65. }
  66. // remove duplicate slashes
  67. if (urlObj.pathname) {
  68. urlObj.pathname = urlObj.pathname.replace(/\/{2,}/g, '/');
  69. }
  70. // decode URI octets
  71. if (urlObj.pathname) {
  72. urlObj.pathname = decodeURI(urlObj.pathname);
  73. }
  74. // remove directory index
  75. if (opts.removeDirectoryIndex === true) {
  76. opts.removeDirectoryIndex = [/^index\.[a-z]+$/];
  77. }
  78. if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length) {
  79. var pathComponents = urlObj.pathname.split('/');
  80. var lastComponent = pathComponents[pathComponents.length - 1];
  81. if (testParameter(lastComponent, opts.removeDirectoryIndex)) {
  82. pathComponents = pathComponents.slice(0, pathComponents.length - 1);
  83. urlObj.pathname = pathComponents.slice(1).join('/') + '/';
  84. }
  85. }
  86. // resolve relative paths, but only for slashed protocols
  87. if (slashedProtocol[urlObj.protocol]) {
  88. var domain = urlObj.protocol + '//' + urlObj.hostname;
  89. var relative = url.resolve(domain, urlObj.pathname);
  90. urlObj.pathname = relative.replace(domain, '');
  91. }
  92. if (urlObj.hostname) {
  93. // IDN to Unicode
  94. urlObj.hostname = punycode.toUnicode(urlObj.hostname).toLowerCase();
  95. // remove trailing dot
  96. urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
  97. // remove `www.`
  98. if (opts.stripWWW) {
  99. urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
  100. }
  101. }
  102. // remove URL with empty query string
  103. if (urlObj.search === '?') {
  104. delete urlObj.search;
  105. }
  106. var queryParameters = queryString.parse(urlObj.search);
  107. // remove query unwanted parameters
  108. if (Array.isArray(opts.removeQueryParameters)) {
  109. for (var key in queryParameters) {
  110. if (testParameter(key, opts.removeQueryParameters)) {
  111. delete queryParameters[key];
  112. }
  113. }
  114. }
  115. // sort query parameters
  116. urlObj.search = queryString.stringify(sortKeys(queryParameters));
  117. // decode query parameters
  118. urlObj.search = decodeURIComponent(urlObj.search);
  119. // take advantage of many of the Node `url` normalizations
  120. str = url.format(urlObj);
  121. // remove ending `/`
  122. if (opts.removeTrailingSlash || urlObj.pathname === '/') {
  123. str = str.replace(/\/$/, '');
  124. }
  125. // restore relative protocol, if applicable
  126. if (hasRelativeProtocol && !opts.normalizeProtocol) {
  127. str = str.replace(/^http:\/\//, '//');
  128. }
  129. return str;
  130. };