123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- 'use strict';
- const url = require('url');
- const punycode = require('punycode');
- const queryString = require('query-string');
- const prependHttp = require('prepend-http');
- const sortKeys = require('sort-keys');
- const DEFAULT_PORTS = {
- 'http:': 80,
- 'https:': 443,
- 'ftp:': 21
- };
- // Protocols that always contain a `//`` bit
- const slashedProtocol = {
- http: true,
- https: true,
- ftp: true,
- gopher: true,
- file: true,
- 'http:': true,
- 'https:': true,
- 'ftp:': true,
- 'gopher:': true,
- 'file:': true
- };
- function testParameter(name, filters) {
- return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
- }
- module.exports = (str, opts) => {
- opts = Object.assign({
- normalizeProtocol: true,
- normalizeHttps: false,
- stripFragment: true,
- stripWWW: true,
- removeQueryParameters: [/^utm_\w+/i],
- removeTrailingSlash: true,
- removeDirectoryIndex: false,
- sortQueryParameters: true
- }, opts);
- if (typeof str !== 'string') {
- throw new TypeError('Expected a string');
- }
- const hasRelativeProtocol = str.startsWith('//');
- // Prepend protocol
- str = prependHttp(str.trim()).replace(/^\/\//, 'http://');
- const urlObj = url.parse(str);
- if (opts.normalizeHttps && urlObj.protocol === 'https:') {
- urlObj.protocol = 'http:';
- }
- if (!urlObj.hostname && !urlObj.pathname) {
- throw new Error('Invalid URL');
- }
- // Prevent these from being used by `url.format`
- delete urlObj.host;
- delete urlObj.query;
- // Remove fragment
- if (opts.stripFragment) {
- delete urlObj.hash;
- }
- // Remove default port
- const port = DEFAULT_PORTS[urlObj.protocol];
- if (Number(urlObj.port) === port) {
- delete urlObj.port;
- }
- // Remove duplicate slashes
- if (urlObj.pathname) {
- urlObj.pathname = urlObj.pathname.replace(/\/{2,}/g, '/');
- }
- // Decode URI octets
- if (urlObj.pathname) {
- urlObj.pathname = decodeURI(urlObj.pathname);
- }
- // Remove directory index
- if (opts.removeDirectoryIndex === true) {
- opts.removeDirectoryIndex = [/^index\.[a-z]+$/];
- }
- if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length > 0) {
- let pathComponents = urlObj.pathname.split('/');
- const lastComponent = pathComponents[pathComponents.length - 1];
- if (testParameter(lastComponent, opts.removeDirectoryIndex)) {
- pathComponents = pathComponents.slice(0, pathComponents.length - 1);
- urlObj.pathname = pathComponents.slice(1).join('/') + '/';
- }
- }
- // Resolve relative paths, but only for slashed protocols
- if (slashedProtocol[urlObj.protocol]) {
- const domain = urlObj.protocol + '//' + urlObj.hostname;
- const relative = url.resolve(domain, urlObj.pathname);
- urlObj.pathname = relative.replace(domain, '');
- }
- if (urlObj.hostname) {
- // IDN to Unicode
- urlObj.hostname = punycode.toUnicode(urlObj.hostname).toLowerCase();
- // Remove trailing dot
- urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
- // Remove `www.`
- if (opts.stripWWW) {
- urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
- }
- }
- // Remove URL with empty query string
- if (urlObj.search === '?') {
- delete urlObj.search;
- }
- const queryParameters = queryString.parse(urlObj.search);
- // Remove query unwanted parameters
- if (Array.isArray(opts.removeQueryParameters)) {
- for (const key in queryParameters) {
- if (testParameter(key, opts.removeQueryParameters)) {
- delete queryParameters[key];
- }
- }
- }
- // Sort query parameters
- if (opts.sortQueryParameters) {
- urlObj.search = queryString.stringify(sortKeys(queryParameters));
- }
- // Decode query parameters
- if (urlObj.search !== null) {
- urlObj.search = decodeURIComponent(urlObj.search);
- }
- // Take advantage of many of the Node `url` normalizations
- str = url.format(urlObj);
- // Remove ending `/`
- if (opts.removeTrailingSlash || urlObj.pathname === '/') {
- str = str.replace(/\/$/, '');
- }
- // Restore relative protocol, if applicable
- if (hasRelativeProtocol && !opts.normalizeProtocol) {
- str = str.replace(/^http:\/\//, '//');
- }
- return str;
- };
|