shim.js 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. /* eslint no-bitwise: "off", max-statements: "off", max-lines: "off" */
  2. // Taken from: https://github.com/walling/unorm/blob/master/lib/unorm.js
  3. /*
  4. * UnicodeNormalizer 1.0.0
  5. * Copyright (c) 2008 Matsuza
  6. * Dual licensed under the MIT (MIT-LICENSE.txt) and
  7. * GPL (GPL-LICENSE.txt) licenses.
  8. * $Date: 2008-06-05 16:44:17 +0200 (Thu, 05 Jun 2008) $
  9. * $Rev: 13309 $
  10. */
  11. "use strict";
  12. var primitiveSet = require("../../../object/primitive-set")
  13. , validValue = require("../../../object/valid-value")
  14. , data = require("./_data");
  15. var floor = Math.floor
  16. , forms = primitiveSet("NFC", "NFD", "NFKC", "NFKD")
  17. , DEFAULT_FEATURE = [null, 0, {}]
  18. , CACHE_THRESHOLD = 10
  19. , SBase = 0xac00
  20. , LBase = 0x1100
  21. , VBase = 0x1161
  22. , TBase = 0x11a7
  23. , LCount = 19
  24. , VCount = 21
  25. , TCount = 28
  26. , NCount = VCount * TCount
  27. , SCount = LCount * NCount
  28. , UChar
  29. , cache = {}
  30. , cacheCounter = []
  31. , fromCache
  32. , fromData
  33. , fromCpOnly
  34. , fromRuleBasedJamo
  35. , fromCpFilter
  36. , strategies
  37. , UCharIterator
  38. , RecursDecompIterator
  39. , DecompIterator
  40. , CompIterator
  41. , createIterator
  42. , normalize;
  43. UChar = function (cp, feature) {
  44. this.codepoint = cp;
  45. this.feature = feature;
  46. };
  47. // Strategies
  48. (function () { for (var i = 0; i <= 0xff; ++i) cacheCounter[i] = 0; })();
  49. fromCache = function (nextStep, cp, needFeature) {
  50. var ret = cache[cp];
  51. if (!ret) {
  52. ret = nextStep(cp, needFeature);
  53. if (Boolean(ret.feature) && ++cacheCounter[(cp >> 8) & 0xff] > CACHE_THRESHOLD) {
  54. cache[cp] = ret;
  55. }
  56. }
  57. return ret;
  58. };
  59. fromData = function (next, cp) {
  60. var hash = cp & 0xff00, dunit = UChar.udata[hash] || {}, feature = dunit[cp];
  61. return feature ? new UChar(cp, feature) : new UChar(cp, DEFAULT_FEATURE);
  62. };
  63. fromCpOnly = function (next, cp, needFeature) {
  64. return needFeature ? next(cp, needFeature) : new UChar(cp, null);
  65. };
  66. fromRuleBasedJamo = function (next, cp, needFeature) {
  67. var char, base, i, arr, SIndex, TIndex, feature, j;
  68. if (cp < LBase || (LBase + LCount <= cp && cp < SBase) || SBase + SCount < cp) {
  69. return next(cp, needFeature);
  70. }
  71. if (LBase <= cp && cp < LBase + LCount) {
  72. char = {};
  73. base = (cp - LBase) * VCount;
  74. for (i = 0; i < VCount; ++i) {
  75. char[VBase + i] = SBase + TCount * (i + base);
  76. }
  77. arr = new Array(3);
  78. arr[2] = char;
  79. return new UChar(cp, arr);
  80. }
  81. SIndex = cp - SBase;
  82. TIndex = SIndex % TCount;
  83. feature = [];
  84. if (TIndex === 0) {
  85. feature[0] = [LBase + floor(SIndex / NCount), VBase + floor((SIndex % NCount) / TCount)];
  86. feature[2] = {};
  87. for (j = 1; j < TCount; ++j) {
  88. feature[2][TBase + j] = cp + j;
  89. }
  90. } else {
  91. feature[0] = [SBase + SIndex - TIndex, TBase + TIndex];
  92. }
  93. return new UChar(cp, feature);
  94. };
  95. fromCpFilter = function (next, cp, needFeature) {
  96. return cp < 60 || (cp > 13311 && cp < 42607)
  97. ? new UChar(cp, DEFAULT_FEATURE)
  98. : next(cp, needFeature);
  99. };
  100. strategies = [fromCpFilter, fromCache, fromCpOnly, fromRuleBasedJamo, fromData];
  101. UChar.fromCharCode = strategies.reduceRight(function (next, strategy) {
  102. return function (cp, needFeature) { return strategy(next, cp, needFeature); };
  103. }, null);
  104. UChar.isHighSurrogate = function (cp) { return cp >= 0xd800 && cp <= 0xdbff; };
  105. UChar.isLowSurrogate = function (cp) { return cp >= 0xdc00 && cp <= 0xdfff; };
  106. UChar.prototype.prepFeature = function () {
  107. if (!this.feature) {
  108. this.feature = UChar.fromCharCode(this.codepoint, true).feature;
  109. }
  110. };
  111. UChar.prototype.toString = function () {
  112. var num;
  113. if (this.codepoint < 0x10000) return String.fromCharCode(this.codepoint);
  114. num = this.codepoint - 0x10000;
  115. return String.fromCharCode(floor(num / 0x400) + 0xd800, (num % 0x400) + 0xdc00);
  116. };
  117. UChar.prototype.getDecomp = function () {
  118. this.prepFeature();
  119. return this.feature[0] || null;
  120. };
  121. UChar.prototype.isCompatibility = function () {
  122. this.prepFeature();
  123. return Boolean(this.feature[1]) && this.feature[1] & (1 << 8);
  124. };
  125. UChar.prototype.isExclude = function () {
  126. this.prepFeature();
  127. return Boolean(this.feature[1]) && this.feature[1] & (1 << 9);
  128. };
  129. UChar.prototype.getCanonicalClass = function () {
  130. this.prepFeature();
  131. return this.feature[1] ? this.feature[1] & 0xff : 0;
  132. };
  133. UChar.prototype.getComposite = function (following) {
  134. var cp;
  135. this.prepFeature();
  136. if (!this.feature[2]) return null;
  137. cp = this.feature[2][following.codepoint];
  138. return cp ? UChar.fromCharCode(cp) : null;
  139. };
  140. UCharIterator = function (str) {
  141. this.str = str;
  142. this.cursor = 0;
  143. };
  144. UCharIterator.prototype.next = function () {
  145. if (Boolean(this.str) && this.cursor < this.str.length) {
  146. var cp = this.str.charCodeAt(this.cursor++), d;
  147. if (
  148. UChar.isHighSurrogate(cp) &&
  149. this.cursor < this.str.length &&
  150. UChar.isLowSurrogate((d = this.str.charCodeAt(this.cursor)))
  151. ) {
  152. cp = (cp - 0xd800) * 0x400 + (d - 0xdc00) + 0x10000;
  153. ++this.cursor;
  154. }
  155. return UChar.fromCharCode(cp);
  156. }
  157. this.str = null;
  158. return null;
  159. };
  160. RecursDecompIterator = function (it, cano) {
  161. this.it = it;
  162. this.canonical = cano;
  163. this.resBuf = [];
  164. };
  165. RecursDecompIterator.prototype.next = function () {
  166. var recursiveDecomp, uchar;
  167. recursiveDecomp = function (cano, ucharLoc) {
  168. var decomp = ucharLoc.getDecomp(), ret, i, a, j;
  169. if (Boolean(decomp) && !(cano && ucharLoc.isCompatibility())) {
  170. ret = [];
  171. for (i = 0; i < decomp.length; ++i) {
  172. a = recursiveDecomp(cano, UChar.fromCharCode(decomp[i]));
  173. // Ret.concat(a); //<-why does not this work?
  174. // following block is a workaround.
  175. for (j = 0; j < a.length; ++j) ret.push(a[j]);
  176. }
  177. return ret;
  178. }
  179. return [ucharLoc];
  180. };
  181. if (this.resBuf.length === 0) {
  182. uchar = this.it.next();
  183. if (!uchar) return null;
  184. this.resBuf = recursiveDecomp(this.canonical, uchar);
  185. }
  186. return this.resBuf.shift();
  187. };
  188. DecompIterator = function (it) {
  189. this.it = it;
  190. this.resBuf = [];
  191. };
  192. DecompIterator.prototype.next = function () {
  193. var cc, uchar, inspt, uchar2, cc2;
  194. if (this.resBuf.length === 0) {
  195. do {
  196. uchar = this.it.next();
  197. if (!uchar) break;
  198. cc = uchar.getCanonicalClass();
  199. inspt = this.resBuf.length;
  200. if (cc !== 0) {
  201. for (inspt; inspt > 0; --inspt) {
  202. uchar2 = this.resBuf[inspt - 1];
  203. cc2 = uchar2.getCanonicalClass();
  204. // eslint-disable-next-line max-depth
  205. if (cc2 <= cc) break;
  206. }
  207. }
  208. this.resBuf.splice(inspt, 0, uchar);
  209. } while (cc !== 0);
  210. }
  211. return this.resBuf.shift();
  212. };
  213. CompIterator = function (it) {
  214. this.it = it;
  215. this.procBuf = [];
  216. this.resBuf = [];
  217. this.lastClass = null;
  218. };
  219. CompIterator.prototype.next = function () {
  220. var uchar, starter, composite, cc;
  221. while (this.resBuf.length === 0) {
  222. uchar = this.it.next();
  223. if (!uchar) {
  224. this.resBuf = this.procBuf;
  225. this.procBuf = [];
  226. break;
  227. }
  228. if (this.procBuf.length === 0) {
  229. this.lastClass = uchar.getCanonicalClass();
  230. this.procBuf.push(uchar);
  231. } else {
  232. starter = this.procBuf[0];
  233. composite = starter.getComposite(uchar);
  234. cc = uchar.getCanonicalClass();
  235. if (Boolean(composite) && (this.lastClass < cc || this.lastClass === 0)) {
  236. this.procBuf[0] = composite;
  237. } else {
  238. if (cc === 0) {
  239. this.resBuf = this.procBuf;
  240. this.procBuf = [];
  241. }
  242. this.lastClass = cc;
  243. this.procBuf.push(uchar);
  244. }
  245. }
  246. }
  247. return this.resBuf.shift();
  248. };
  249. createIterator = function (mode, str) {
  250. switch (mode) {
  251. case "NFD":
  252. return new DecompIterator(new RecursDecompIterator(new UCharIterator(str), true));
  253. case "NFKD":
  254. return new DecompIterator(new RecursDecompIterator(new UCharIterator(str), false));
  255. case "NFC":
  256. return new CompIterator(
  257. new DecompIterator(new RecursDecompIterator(new UCharIterator(str), true))
  258. );
  259. case "NFKC":
  260. return new CompIterator(
  261. new DecompIterator(new RecursDecompIterator(new UCharIterator(str), false))
  262. );
  263. default:
  264. throw new Error(mode + " is invalid");
  265. }
  266. };
  267. normalize = function (mode, str) {
  268. var it = createIterator(mode, str), ret = "", uchar;
  269. while ((uchar = it.next())) ret += uchar.toString();
  270. return ret;
  271. };
  272. /* Unicode data */
  273. UChar.udata = data;
  274. module.exports = function (/* Form*/) {
  275. var str = String(validValue(this)), form = arguments[0];
  276. if (form === undefined) form = "NFC";
  277. else form = String(form);
  278. if (!forms[form]) throw new RangeError("Invalid normalization form: " + form);
  279. return normalize(form, str);
  280. };