index.js 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. 'use strict';
  2. // TODO: Use the `URL` global when targeting Node.js 10
  3. const URLParser = typeof URL === 'undefined' ? require('url').URL : URL;
  4. // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
  5. const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain';
  6. const DATA_URL_DEFAULT_CHARSET = 'us-ascii';
  7. const testParameter = (name, filters) => {
  8. return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
  9. };
  10. const normalizeDataURL = (urlString, {stripHash}) => {
  11. const parts = urlString.match(/^data:(.*?),(.*?)(?:#(.*))?$/);
  12. if (!parts) {
  13. throw new Error(`Invalid URL: ${urlString}`);
  14. }
  15. const mediaType = parts[1].split(';');
  16. const body = parts[2];
  17. const hash = stripHash ? '' : parts[3];
  18. let base64 = false;
  19. if (mediaType[mediaType.length - 1] === 'base64') {
  20. mediaType.pop();
  21. base64 = true;
  22. }
  23. // Lowercase MIME type
  24. const mimeType = (mediaType.shift() || '').toLowerCase();
  25. const attributes = mediaType
  26. .map(attribute => {
  27. let [key, value = ''] = attribute.split('=').map(string => string.trim());
  28. // Lowercase `charset`
  29. if (key === 'charset') {
  30. value = value.toLowerCase();
  31. if (value === DATA_URL_DEFAULT_CHARSET) {
  32. return '';
  33. }
  34. }
  35. return `${key}${value ? `=${value}` : ''}`;
  36. })
  37. .filter(Boolean);
  38. const normalizedMediaType = [
  39. ...attributes
  40. ];
  41. if (base64) {
  42. normalizedMediaType.push('base64');
  43. }
  44. if (normalizedMediaType.length !== 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
  45. normalizedMediaType.unshift(mimeType);
  46. }
  47. return `data:${normalizedMediaType.join(';')},${base64 ? body.trim() : body}${hash ? `#${hash}` : ''}`;
  48. };
  49. const normalizeUrl = (urlString, options) => {
  50. options = {
  51. defaultProtocol: 'http:',
  52. normalizeProtocol: true,
  53. forceHttp: false,
  54. forceHttps: false,
  55. stripAuthentication: true,
  56. stripHash: false,
  57. stripWWW: true,
  58. removeQueryParameters: [/^utm_\w+/i],
  59. removeTrailingSlash: true,
  60. removeDirectoryIndex: false,
  61. sortQueryParameters: true,
  62. ...options
  63. };
  64. // TODO: Remove this at some point in the future
  65. if (Reflect.has(options, 'normalizeHttps')) {
  66. throw new Error('options.normalizeHttps is renamed to options.forceHttp');
  67. }
  68. if (Reflect.has(options, 'normalizeHttp')) {
  69. throw new Error('options.normalizeHttp is renamed to options.forceHttps');
  70. }
  71. if (Reflect.has(options, 'stripFragment')) {
  72. throw new Error('options.stripFragment is renamed to options.stripHash');
  73. }
  74. urlString = urlString.trim();
  75. // Data URL
  76. if (/^data:/i.test(urlString)) {
  77. return normalizeDataURL(urlString, options);
  78. }
  79. const hasRelativeProtocol = urlString.startsWith('//');
  80. const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
  81. // Prepend protocol
  82. if (!isRelativeUrl) {
  83. urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol);
  84. }
  85. const urlObj = new URLParser(urlString);
  86. if (options.forceHttp && options.forceHttps) {
  87. throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
  88. }
  89. if (options.forceHttp && urlObj.protocol === 'https:') {
  90. urlObj.protocol = 'http:';
  91. }
  92. if (options.forceHttps && urlObj.protocol === 'http:') {
  93. urlObj.protocol = 'https:';
  94. }
  95. // Remove auth
  96. if (options.stripAuthentication) {
  97. urlObj.username = '';
  98. urlObj.password = '';
  99. }
  100. // Remove hash
  101. if (options.stripHash) {
  102. urlObj.hash = '';
  103. }
  104. // Remove duplicate slashes if not preceded by a protocol
  105. if (urlObj.pathname) {
  106. // TODO: Use the following instead when targeting Node.js 10
  107. // `urlObj.pathname = urlObj.pathname.replace(/(?<!https?:)\/{2,}/g, '/');`
  108. urlObj.pathname = urlObj.pathname.replace(/((?!:).|^)\/{2,}/g, (_, p1) => {
  109. if (/^(?!\/)/g.test(p1)) {
  110. return `${p1}/`;
  111. }
  112. return '/';
  113. });
  114. }
  115. // Decode URI octets
  116. if (urlObj.pathname) {
  117. urlObj.pathname = decodeURI(urlObj.pathname);
  118. }
  119. // Remove directory index
  120. if (options.removeDirectoryIndex === true) {
  121. options.removeDirectoryIndex = [/^index\.[a-z]+$/];
  122. }
  123. if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
  124. let pathComponents = urlObj.pathname.split('/');
  125. const lastComponent = pathComponents[pathComponents.length - 1];
  126. if (testParameter(lastComponent, options.removeDirectoryIndex)) {
  127. pathComponents = pathComponents.slice(0, pathComponents.length - 1);
  128. urlObj.pathname = pathComponents.slice(1).join('/') + '/';
  129. }
  130. }
  131. if (urlObj.hostname) {
  132. // Remove trailing dot
  133. urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
  134. // Remove `www.`
  135. if (options.stripWWW && /^www\.([a-z\-\d]{2,63})\.([a-z.]{2,5})$/.test(urlObj.hostname)) {
  136. // Each label should be max 63 at length (min: 2).
  137. // The extension should be max 5 at length (min: 2).
  138. // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
  139. urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
  140. }
  141. }
  142. // Remove query unwanted parameters
  143. if (Array.isArray(options.removeQueryParameters)) {
  144. for (const key of [...urlObj.searchParams.keys()]) {
  145. if (testParameter(key, options.removeQueryParameters)) {
  146. urlObj.searchParams.delete(key);
  147. }
  148. }
  149. }
  150. // Sort query parameters
  151. if (options.sortQueryParameters) {
  152. urlObj.searchParams.sort();
  153. }
  154. if (options.removeTrailingSlash) {
  155. urlObj.pathname = urlObj.pathname.replace(/\/$/, '');
  156. }
  157. // Take advantage of many of the Node `url` normalizations
  158. urlString = urlObj.toString();
  159. // Remove ending `/`
  160. if ((options.removeTrailingSlash || urlObj.pathname === '/') && urlObj.hash === '') {
  161. urlString = urlString.replace(/\/$/, '');
  162. }
  163. // Restore relative protocol, if applicable
  164. if (hasRelativeProtocol && !options.normalizeProtocol) {
  165. urlString = urlString.replace(/^http:\/\//, '//');
  166. }
  167. // Remove http/https
  168. if (options.stripProtocol) {
  169. urlString = urlString.replace(/^(?:https?:)?\/\//, '');
  170. }
  171. return urlString;
  172. };
  173. module.exports = normalizeUrl;
  174. // TODO: Remove this for the next major release
  175. module.exports.default = normalizeUrl;