regjsgen.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /*!
  2. * regjsgen 0.5.2
  3. * Copyright 2014-2020 Benjamin Tan <https://ofcr.se/>
  4. * Available under the MIT license <https://github.com/bnjmnt4n/regjsgen/blob/master/LICENSE-MIT.txt>
  5. */
  6. ;(function() {
  7. 'use strict';
  8. // Used to determine if values are of the language type `Object`.
  9. var objectTypes = {
  10. 'function': true,
  11. 'object': true
  12. };
  13. // Used as a reference to the global object.
  14. var root = (objectTypes[typeof window] && window) || this;
  15. // Detect free variable `exports`.
  16. var freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports;
  17. // Detect free variable `module`.
  18. var hasFreeModule = objectTypes[typeof module] && module && !module.nodeType;
  19. // Detect free variable `global` from Node.js or Browserified code and use it as `root`.
  20. var freeGlobal = freeExports && hasFreeModule && typeof global == 'object' && global;
  21. if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
  22. root = freeGlobal;
  23. }
  24. // Used to check objects for own properties.
  25. var hasOwnProperty = Object.prototype.hasOwnProperty;
  26. /*--------------------------------------------------------------------------*/
  27. // Generates a string based on the given code point.
  28. // Based on https://mths.be/fromcodepoint by @mathias.
  29. function fromCodePoint() {
  30. var codePoint = Number(arguments[0]);
  31. if (
  32. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  33. codePoint < 0 || // not a valid Unicode code point
  34. codePoint > 0x10FFFF || // not a valid Unicode code point
  35. Math.floor(codePoint) != codePoint // not an integer
  36. ) {
  37. throw RangeError('Invalid code point: ' + codePoint);
  38. }
  39. if (codePoint <= 0xFFFF) {
  40. // BMP code point
  41. return String.fromCharCode(codePoint);
  42. } else {
  43. // Astral code point; split in surrogate halves
  44. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  45. codePoint -= 0x10000;
  46. var highSurrogate = (codePoint >> 10) + 0xD800;
  47. var lowSurrogate = (codePoint % 0x400) + 0xDC00;
  48. return String.fromCharCode(highSurrogate, lowSurrogate);
  49. }
  50. }
  51. /*--------------------------------------------------------------------------*/
  52. // Ensures that nodes have the correct types.
  53. var assertTypeRegexMap = {};
  54. function assertType(type, expected) {
  55. if (expected.indexOf('|') == -1) {
  56. if (type == expected) {
  57. return;
  58. }
  59. throw Error('Invalid node type: ' + type + '; expected type: ' + expected);
  60. }
  61. expected = hasOwnProperty.call(assertTypeRegexMap, expected)
  62. ? assertTypeRegexMap[expected]
  63. : (assertTypeRegexMap[expected] = RegExp('^(?:' + expected + ')$'));
  64. if (expected.test(type)) {
  65. return;
  66. }
  67. throw Error('Invalid node type: ' + type + '; expected types: ' + expected);
  68. }
  69. /*--------------------------------------------------------------------------*/
  70. // Generates a regular expression string based on an AST.
  71. function generate(node) {
  72. var type = node.type;
  73. if (hasOwnProperty.call(generators, type)) {
  74. return generators[type](node);
  75. }
  76. throw Error('Invalid node type: ' + type);
  77. }
  78. // Constructs a string by concatentating the output of each term.
  79. function generateSequence(generator, terms) {
  80. var i = -1,
  81. length = terms.length,
  82. result = '',
  83. term;
  84. while (++i < length) {
  85. term = terms[i];
  86. // Ensure that `\0` null escapes followed by number symbols are not
  87. // treated as backreferences.
  88. if (
  89. i + 1 < length &&
  90. terms[i].type == 'value' &&
  91. terms[i].kind == 'null' &&
  92. terms[i + 1].type == 'value' &&
  93. terms[i + 1].kind == 'symbol' &&
  94. terms[i + 1].codePoint >= 48 &&
  95. terms[i + 1].codePoint <= 57
  96. ) {
  97. result += '\\000';
  98. continue;
  99. }
  100. result += generator(term);
  101. }
  102. return result;
  103. }
  104. /*--------------------------------------------------------------------------*/
  105. function generateAlternative(node) {
  106. assertType(node.type, 'alternative');
  107. return generateSequence(generateTerm, node.body);
  108. }
  109. function generateAnchor(node) {
  110. assertType(node.type, 'anchor');
  111. switch (node.kind) {
  112. case 'start':
  113. return '^';
  114. case 'end':
  115. return '$';
  116. case 'boundary':
  117. return '\\b';
  118. case 'not-boundary':
  119. return '\\B';
  120. default:
  121. throw Error('Invalid assertion');
  122. }
  123. }
  124. function generateAtom(node) {
  125. assertType(node.type, 'anchor|characterClass|characterClassEscape|dot|group|reference|value');
  126. return generate(node);
  127. }
  128. function generateCharacterClass(node) {
  129. assertType(node.type, 'characterClass');
  130. return '[' +
  131. (node.negative ? '^' : '') +
  132. generateSequence(generateClassAtom, node.body) +
  133. ']';
  134. }
  135. function generateCharacterClassEscape(node) {
  136. assertType(node.type, 'characterClassEscape');
  137. return '\\' + node.value;
  138. }
  139. function generateCharacterClassRange(node) {
  140. assertType(node.type, 'characterClassRange');
  141. var min = node.min,
  142. max = node.max;
  143. if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
  144. throw Error('Invalid character class range');
  145. }
  146. return generateClassAtom(min) + '-' + generateClassAtom(max);
  147. }
  148. function generateClassAtom(node) {
  149. assertType(node.type, 'anchor|characterClassEscape|characterClassRange|dot|value');
  150. return generate(node);
  151. }
  152. function generateDisjunction(node) {
  153. assertType(node.type, 'disjunction');
  154. var body = node.body,
  155. i = -1,
  156. length = body.length,
  157. result = '';
  158. while (++i < length) {
  159. if (i != 0) {
  160. result += '|';
  161. }
  162. result += generate(body[i]);
  163. }
  164. return result;
  165. }
  166. function generateDot(node) {
  167. assertType(node.type, 'dot');
  168. return '.';
  169. }
  170. function generateGroup(node) {
  171. assertType(node.type, 'group');
  172. var result = '';
  173. switch (node.behavior) {
  174. case 'normal':
  175. if (node.name) {
  176. result += '?<' + generateIdentifier(node.name) + '>';
  177. }
  178. break;
  179. case 'ignore':
  180. result += '?:';
  181. break;
  182. case 'lookahead':
  183. result += '?=';
  184. break;
  185. case 'negativeLookahead':
  186. result += '?!';
  187. break;
  188. case 'lookbehind':
  189. result += '?<=';
  190. break;
  191. case 'negativeLookbehind':
  192. result += '?<!';
  193. break;
  194. default:
  195. throw Error('Invalid behaviour: ' + node.behaviour);
  196. }
  197. result += generateSequence(generate, node.body);
  198. return '(' + result + ')';
  199. }
  200. function generateIdentifier(node) {
  201. assertType(node.type, 'identifier');
  202. return node.value;
  203. }
  204. function generateQuantifier(node) {
  205. assertType(node.type, 'quantifier');
  206. var quantifier = '',
  207. min = node.min,
  208. max = node.max;
  209. if (max == null) {
  210. if (min == 0) {
  211. quantifier = '*';
  212. } else if (min == 1) {
  213. quantifier = '+';
  214. } else {
  215. quantifier = '{' + min + ',}';
  216. }
  217. } else if (min == max) {
  218. quantifier = '{' + min + '}';
  219. } else if (min == 0 && max == 1) {
  220. quantifier = '?';
  221. } else {
  222. quantifier = '{' + min + ',' + max + '}';
  223. }
  224. if (!node.greedy) {
  225. quantifier += '?';
  226. }
  227. return generateAtom(node.body[0]) + quantifier;
  228. }
  229. function generateReference(node) {
  230. assertType(node.type, 'reference');
  231. if (node.matchIndex) {
  232. return '\\' + node.matchIndex;
  233. }
  234. if (node.name) {
  235. return '\\k<' + generateIdentifier(node.name) + '>';
  236. }
  237. throw new Error('Unknown reference type');
  238. }
  239. function generateTerm(node) {
  240. assertType(node.type, 'anchor|characterClass|characterClassEscape|empty|group|quantifier|reference|unicodePropertyEscape|value|dot');
  241. return generate(node);
  242. }
  243. function generateUnicodePropertyEscape(node) {
  244. assertType(node.type, 'unicodePropertyEscape');
  245. return '\\' + (node.negative ? 'P' : 'p') + '{' + node.value + '}';
  246. }
  247. function generateValue(node) {
  248. assertType(node.type, 'value');
  249. var kind = node.kind,
  250. codePoint = node.codePoint;
  251. if (typeof codePoint != 'number') {
  252. throw new Error('Invalid code point: ' + codePoint);
  253. }
  254. switch (kind) {
  255. case 'controlLetter':
  256. return '\\c' + fromCodePoint(codePoint + 64);
  257. case 'hexadecimalEscape':
  258. return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
  259. case 'identifier':
  260. return '\\' + fromCodePoint(codePoint);
  261. case 'null':
  262. return '\\' + codePoint;
  263. case 'octal':
  264. return '\\' + ('000' + codePoint.toString(8)).slice(-3);
  265. case 'singleEscape':
  266. switch (codePoint) {
  267. case 0x0008:
  268. return '\\b';
  269. case 0x0009:
  270. return '\\t';
  271. case 0x000A:
  272. return '\\n';
  273. case 0x000B:
  274. return '\\v';
  275. case 0x000C:
  276. return '\\f';
  277. case 0x000D:
  278. return '\\r';
  279. case 0x002D:
  280. return '\\-';
  281. default:
  282. throw Error('Invalid code point: ' + codePoint);
  283. }
  284. case 'symbol':
  285. return fromCodePoint(codePoint);
  286. case 'unicodeEscape':
  287. return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
  288. case 'unicodeCodePointEscape':
  289. return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
  290. default:
  291. throw Error('Unsupported node kind: ' + kind);
  292. }
  293. }
  294. /*--------------------------------------------------------------------------*/
  295. // Used to generate strings for each node type.
  296. var generators = {
  297. 'alternative': generateAlternative,
  298. 'anchor': generateAnchor,
  299. 'characterClass': generateCharacterClass,
  300. 'characterClassEscape': generateCharacterClassEscape,
  301. 'characterClassRange': generateCharacterClassRange,
  302. 'disjunction': generateDisjunction,
  303. 'dot': generateDot,
  304. 'group': generateGroup,
  305. 'quantifier': generateQuantifier,
  306. 'reference': generateReference,
  307. 'unicodePropertyEscape': generateUnicodePropertyEscape,
  308. 'value': generateValue
  309. };
  310. /*--------------------------------------------------------------------------*/
  311. // Export regjsgen.
  312. var regjsgen = {
  313. 'generate': generate
  314. };
  315. // Some AMD build optimizers, like r.js, check for condition patterns like the following:
  316. if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
  317. // Define as an anonymous module so it can be aliased through path mapping.
  318. define(function() {
  319. return regjsgen;
  320. });
  321. root.regjsgen = regjsgen;
  322. }
  323. // Check for `exports` after `define` in case a build optimizer adds an `exports` object.
  324. else if (freeExports && hasFreeModule) {
  325. // Export for CommonJS support.
  326. freeExports.generate = generate;
  327. }
  328. else {
  329. // Export to the global object.
  330. root.regjsgen = regjsgen;
  331. }
  332. }.call(this));