punycode.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. /*! https://mths.be/punycode v1.4.1 by @mathias */
  2. ;(function(root) {
  3. /** Detect free variables */
  4. var freeExports = typeof exports == 'object' && exports &&
  5. !exports.nodeType && exports;
  6. var freeModule = typeof module == 'object' && module &&
  7. !module.nodeType && module;
  8. var freeGlobal = typeof global == 'object' && global;
  9. if (
  10. freeGlobal.global === freeGlobal ||
  11. freeGlobal.window === freeGlobal ||
  12. freeGlobal.self === freeGlobal
  13. ) {
  14. root = freeGlobal;
  15. }
  16. /**
  17. * The `punycode` object.
  18. * @name punycode
  19. * @type Object
  20. */
  21. var punycode,
  22. /** Highest positive signed 32-bit float value */
  23. maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
  24. /** Bootstring parameters */
  25. base = 36,
  26. tMin = 1,
  27. tMax = 26,
  28. skew = 38,
  29. damp = 700,
  30. initialBias = 72,
  31. initialN = 128, // 0x80
  32. delimiter = '-', // '\x2D'
  33. /** Regular expressions */
  34. regexPunycode = /^xn--/,
  35. regexNonASCII = /[^\x20-\x7E]/, // unprintable ASCII chars + non-ASCII chars
  36. regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g, // RFC 3490 separators
  37. /** Error messages */
  38. errors = {
  39. 'overflow': 'Overflow: input needs wider integers to process',
  40. 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
  41. 'invalid-input': 'Invalid input'
  42. },
  43. /** Convenience shortcuts */
  44. baseMinusTMin = base - tMin,
  45. floor = Math.floor,
  46. stringFromCharCode = String.fromCharCode,
  47. /** Temporary variable */
  48. key;
  49. /*--------------------------------------------------------------------------*/
  50. /**
  51. * A generic error utility function.
  52. * @private
  53. * @param {String} type The error type.
  54. * @returns {Error} Throws a `RangeError` with the applicable error message.
  55. */
  56. function error(type) {
  57. throw new RangeError(errors[type]);
  58. }
  59. /**
  60. * A generic `Array#map` utility function.
  61. * @private
  62. * @param {Array} array The array to iterate over.
  63. * @param {Function} callback The function that gets called for every array
  64. * item.
  65. * @returns {Array} A new array of values returned by the callback function.
  66. */
  67. function map(array, fn) {
  68. var length = array.length;
  69. var result = [];
  70. while (length--) {
  71. result[length] = fn(array[length]);
  72. }
  73. return result;
  74. }
  75. /**
  76. * A simple `Array#map`-like wrapper to work with domain name strings or email
  77. * addresses.
  78. * @private
  79. * @param {String} domain The domain name or email address.
  80. * @param {Function} callback The function that gets called for every
  81. * character.
  82. * @returns {Array} A new string of characters returned by the callback
  83. * function.
  84. */
  85. function mapDomain(string, fn) {
  86. var parts = string.split('@');
  87. var result = '';
  88. if (parts.length > 1) {
  89. // In email addresses, only the domain name should be punycoded. Leave
  90. // the local part (i.e. everything up to `@`) intact.
  91. result = parts[0] + '@';
  92. string = parts[1];
  93. }
  94. // Avoid `split(regex)` for IE8 compatibility. See #17.
  95. string = string.replace(regexSeparators, '\x2E');
  96. var labels = string.split('.');
  97. var encoded = map(labels, fn).join('.');
  98. return result + encoded;
  99. }
  100. /**
  101. * Creates an array containing the numeric code points of each Unicode
  102. * character in the string. While JavaScript uses UCS-2 internally,
  103. * this function will convert a pair of surrogate halves (each of which
  104. * UCS-2 exposes as separate characters) into a single code point,
  105. * matching UTF-16.
  106. * @see `punycode.ucs2.encode`
  107. * @see <https://mathiasbynens.be/notes/javascript-encoding>
  108. * @memberOf punycode.ucs2
  109. * @name decode
  110. * @param {String} string The Unicode input string (UCS-2).
  111. * @returns {Array} The new array of code points.
  112. */
  113. function ucs2decode(string) {
  114. var output = [],
  115. counter = 0,
  116. length = string.length,
  117. value,
  118. extra;
  119. while (counter < length) {
  120. value = string.charCodeAt(counter++);
  121. if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
  122. // high surrogate, and there is a next character
  123. extra = string.charCodeAt(counter++);
  124. if ((extra & 0xFC00) == 0xDC00) { // low surrogate
  125. output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
  126. } else {
  127. // unmatched surrogate; only append this code unit, in case the next
  128. // code unit is the high surrogate of a surrogate pair
  129. output.push(value);
  130. counter--;
  131. }
  132. } else {
  133. output.push(value);
  134. }
  135. }
  136. return output;
  137. }
  138. /**
  139. * Creates a string based on an array of numeric code points.
  140. * @see `punycode.ucs2.decode`
  141. * @memberOf punycode.ucs2
  142. * @name encode
  143. * @param {Array} codePoints The array of numeric code points.
  144. * @returns {String} The new Unicode string (UCS-2).
  145. */
  146. function ucs2encode(array) {
  147. return map(array, function(value) {
  148. var output = '';
  149. if (value > 0xFFFF) {
  150. value -= 0x10000;
  151. output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
  152. value = 0xDC00 | value & 0x3FF;
  153. }
  154. output += stringFromCharCode(value);
  155. return output;
  156. }).join('');
  157. }
  158. /**
  159. * Converts a basic code point into a digit/integer.
  160. * @see `digitToBasic()`
  161. * @private
  162. * @param {Number} codePoint The basic numeric code point value.
  163. * @returns {Number} The numeric value of a basic code point (for use in
  164. * representing integers) in the range `0` to `base - 1`, or `base` if
  165. * the code point does not represent a value.
  166. */
  167. function basicToDigit(codePoint) {
  168. if (codePoint - 48 < 10) {
  169. return codePoint - 22;
  170. }
  171. if (codePoint - 65 < 26) {
  172. return codePoint - 65;
  173. }
  174. if (codePoint - 97 < 26) {
  175. return codePoint - 97;
  176. }
  177. return base;
  178. }
  179. /**
  180. * Converts a digit/integer into a basic code point.
  181. * @see `basicToDigit()`
  182. * @private
  183. * @param {Number} digit The numeric value of a basic code point.
  184. * @returns {Number} The basic code point whose value (when used for
  185. * representing integers) is `digit`, which needs to be in the range
  186. * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
  187. * used; else, the lowercase form is used. The behavior is undefined
  188. * if `flag` is non-zero and `digit` has no uppercase form.
  189. */
  190. function digitToBasic(digit, flag) {
  191. // 0..25 map to ASCII a..z or A..Z
  192. // 26..35 map to ASCII 0..9
  193. return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
  194. }
  195. /**
  196. * Bias adaptation function as per section 3.4 of RFC 3492.
  197. * https://tools.ietf.org/html/rfc3492#section-3.4
  198. * @private
  199. */
  200. function adapt(delta, numPoints, firstTime) {
  201. var k = 0;
  202. delta = firstTime ? floor(delta / damp) : delta >> 1;
  203. delta += floor(delta / numPoints);
  204. for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
  205. delta = floor(delta / baseMinusTMin);
  206. }
  207. return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
  208. }
  209. /**
  210. * Converts a Punycode string of ASCII-only symbols to a string of Unicode
  211. * symbols.
  212. * @memberOf punycode
  213. * @param {String} input The Punycode string of ASCII-only symbols.
  214. * @returns {String} The resulting string of Unicode symbols.
  215. */
  216. function decode(input) {
  217. // Don't use UCS-2
  218. var output = [],
  219. inputLength = input.length,
  220. out,
  221. i = 0,
  222. n = initialN,
  223. bias = initialBias,
  224. basic,
  225. j,
  226. index,
  227. oldi,
  228. w,
  229. k,
  230. digit,
  231. t,
  232. /** Cached calculation results */
  233. baseMinusT;
  234. // Handle the basic code points: let `basic` be the number of input code
  235. // points before the last delimiter, or `0` if there is none, then copy
  236. // the first basic code points to the output.
  237. basic = input.lastIndexOf(delimiter);
  238. if (basic < 0) {
  239. basic = 0;
  240. }
  241. for (j = 0; j < basic; ++j) {
  242. // if it's not a basic code point
  243. if (input.charCodeAt(j) >= 0x80) {
  244. error('not-basic');
  245. }
  246. output.push(input.charCodeAt(j));
  247. }
  248. // Main decoding loop: start just after the last delimiter if any basic code
  249. // points were copied; start at the beginning otherwise.
  250. for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
  251. // `index` is the index of the next character to be consumed.
  252. // Decode a generalized variable-length integer into `delta`,
  253. // which gets added to `i`. The overflow checking is easier
  254. // if we increase `i` as we go, then subtract off its starting
  255. // value at the end to obtain `delta`.
  256. for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
  257. if (index >= inputLength) {
  258. error('invalid-input');
  259. }
  260. digit = basicToDigit(input.charCodeAt(index++));
  261. if (digit >= base || digit > floor((maxInt - i) / w)) {
  262. error('overflow');
  263. }
  264. i += digit * w;
  265. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  266. if (digit < t) {
  267. break;
  268. }
  269. baseMinusT = base - t;
  270. if (w > floor(maxInt / baseMinusT)) {
  271. error('overflow');
  272. }
  273. w *= baseMinusT;
  274. }
  275. out = output.length + 1;
  276. bias = adapt(i - oldi, out, oldi == 0);
  277. // `i` was supposed to wrap around from `out` to `0`,
  278. // incrementing `n` each time, so we'll fix that now:
  279. if (floor(i / out) > maxInt - n) {
  280. error('overflow');
  281. }
  282. n += floor(i / out);
  283. i %= out;
  284. // Insert `n` at position `i` of the output
  285. output.splice(i++, 0, n);
  286. }
  287. return ucs2encode(output);
  288. }
  289. /**
  290. * Converts a string of Unicode symbols (e.g. a domain name label) to a
  291. * Punycode string of ASCII-only symbols.
  292. * @memberOf punycode
  293. * @param {String} input The string of Unicode symbols.
  294. * @returns {String} The resulting Punycode string of ASCII-only symbols.
  295. */
  296. function encode(input) {
  297. var n,
  298. delta,
  299. handledCPCount,
  300. basicLength,
  301. bias,
  302. j,
  303. m,
  304. q,
  305. k,
  306. t,
  307. currentValue,
  308. output = [],
  309. /** `inputLength` will hold the number of code points in `input`. */
  310. inputLength,
  311. /** Cached calculation results */
  312. handledCPCountPlusOne,
  313. baseMinusT,
  314. qMinusT;
  315. // Convert the input in UCS-2 to Unicode
  316. input = ucs2decode(input);
  317. // Cache the length
  318. inputLength = input.length;
  319. // Initialize the state
  320. n = initialN;
  321. delta = 0;
  322. bias = initialBias;
  323. // Handle the basic code points
  324. for (j = 0; j < inputLength; ++j) {
  325. currentValue = input[j];
  326. if (currentValue < 0x80) {
  327. output.push(stringFromCharCode(currentValue));
  328. }
  329. }
  330. handledCPCount = basicLength = output.length;
  331. // `handledCPCount` is the number of code points that have been handled;
  332. // `basicLength` is the number of basic code points.
  333. // Finish the basic string - if it is not empty - with a delimiter
  334. if (basicLength) {
  335. output.push(delimiter);
  336. }
  337. // Main encoding loop:
  338. while (handledCPCount < inputLength) {
  339. // All non-basic code points < n have been handled already. Find the next
  340. // larger one:
  341. for (m = maxInt, j = 0; j < inputLength; ++j) {
  342. currentValue = input[j];
  343. if (currentValue >= n && currentValue < m) {
  344. m = currentValue;
  345. }
  346. }
  347. // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
  348. // but guard against overflow
  349. handledCPCountPlusOne = handledCPCount + 1;
  350. if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
  351. error('overflow');
  352. }
  353. delta += (m - n) * handledCPCountPlusOne;
  354. n = m;
  355. for (j = 0; j < inputLength; ++j) {
  356. currentValue = input[j];
  357. if (currentValue < n && ++delta > maxInt) {
  358. error('overflow');
  359. }
  360. if (currentValue == n) {
  361. // Represent delta as a generalized variable-length integer
  362. for (q = delta, k = base; /* no condition */; k += base) {
  363. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  364. if (q < t) {
  365. break;
  366. }
  367. qMinusT = q - t;
  368. baseMinusT = base - t;
  369. output.push(
  370. stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
  371. );
  372. q = floor(qMinusT / baseMinusT);
  373. }
  374. output.push(stringFromCharCode(digitToBasic(q, 0)));
  375. bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
  376. delta = 0;
  377. ++handledCPCount;
  378. }
  379. }
  380. ++delta;
  381. ++n;
  382. }
  383. return output.join('');
  384. }
  385. /**
  386. * Converts a Punycode string representing a domain name or an email address
  387. * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
  388. * it doesn't matter if you call it on a string that has already been
  389. * converted to Unicode.
  390. * @memberOf punycode
  391. * @param {String} input The Punycoded domain name or email address to
  392. * convert to Unicode.
  393. * @returns {String} The Unicode representation of the given Punycode
  394. * string.
  395. */
  396. function toUnicode(input) {
  397. return mapDomain(input, function(string) {
  398. return regexPunycode.test(string)
  399. ? decode(string.slice(4).toLowerCase())
  400. : string;
  401. });
  402. }
  403. /**
  404. * Converts a Unicode string representing a domain name or an email address to
  405. * Punycode. Only the non-ASCII parts of the domain name will be converted,
  406. * i.e. it doesn't matter if you call it with a domain that's already in
  407. * ASCII.
  408. * @memberOf punycode
  409. * @param {String} input The domain name or email address to convert, as a
  410. * Unicode string.
  411. * @returns {String} The Punycode representation of the given domain name or
  412. * email address.
  413. */
  414. function toASCII(input) {
  415. return mapDomain(input, function(string) {
  416. return regexNonASCII.test(string)
  417. ? 'xn--' + encode(string)
  418. : string;
  419. });
  420. }
  421. /*--------------------------------------------------------------------------*/
  422. /** Define the public API */
  423. punycode = {
  424. /**
  425. * A string representing the current Punycode.js version number.
  426. * @memberOf punycode
  427. * @type String
  428. */
  429. 'version': '1.4.1',
  430. /**
  431. * An object of methods to convert from JavaScript's internal character
  432. * representation (UCS-2) to Unicode code points, and back.
  433. * @see <https://mathiasbynens.be/notes/javascript-encoding>
  434. * @memberOf punycode
  435. * @type Object
  436. */
  437. 'ucs2': {
  438. 'decode': ucs2decode,
  439. 'encode': ucs2encode
  440. },
  441. 'decode': decode,
  442. 'encode': encode,
  443. 'toASCII': toASCII,
  444. 'toUnicode': toUnicode
  445. };
  446. /** Expose `punycode` */
  447. // Some AMD build optimizers, like r.js, check for specific condition patterns
  448. // like the following:
  449. if (
  450. typeof define == 'function' &&
  451. typeof define.amd == 'object' &&
  452. define.amd
  453. ) {
  454. define('punycode', function() {
  455. return punycode;
  456. });
  457. } else if (freeExports && freeModule) {
  458. if (module.exports == freeExports) {
  459. // in Node.js, io.js, or RingoJS v0.8.0+
  460. freeModule.exports = punycode;
  461. } else {
  462. // in Narwhal or RingoJS v0.7.0-
  463. for (key in punycode) {
  464. punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]);
  465. }
  466. }
  467. } else {
  468. // in Rhino or a web browser
  469. root.punycode = punycode;
  470. }
  471. }(this));