TokenizeUtil.js.flow 1.3 KB

12345678910111213141516171819202122232425262728293031323334
  1. /**
  2. * Copyright (c) 2013-present, Facebook, Inc.
  3. *
  4. * This source code is licensed under the MIT license found in the
  5. * LICENSE file in the root directory of this source tree.
  6. *
  7. * @providesModule TokenizeUtil
  8. * @typechecks
  9. * @stub
  10. * @flow
  11. */
  12. 'use strict';
  13. // \u00a1-\u00b1\u00b4-\u00b8\u00ba\u00bb\u00bf
  14. // is latin supplement punctuation except fractions and superscript
  15. // numbers
  16. // \u2010-\u2027\u2030-\u205e
  17. // is punctuation from the general punctuation block:
  18. // weird quotes, commas, bullets, dashes, etc.
  19. // \u30fb\u3001\u3002\u3008-\u3011\u3014-\u301f
  20. // is CJK punctuation
  21. // \uff1a-\uff1f\uff01-\uff0f\uff3b-\uff40\uff5b-\uff65
  22. // is some full-width/half-width punctuation
  23. // \u2E2E\u061f\u066a-\u066c\u061b\u060c\u060d\uFD3e\uFD3F
  24. // is some Arabic punctuation marks
  25. // \u1801\u0964\u104a\u104b
  26. // is misc. other language punctuation marks
  27. var PUNCTUATION = '[.,+*?$|#{}()\'\\^\\-\\[\\]\\\\\\/!@%"~=<>_:;' + '\u30fb\u3001\u3002\u3008-\u3011\u3014-\u301f\uff1a-\uff1f\uff01-\uff0f' + '\uff3b-\uff40\uff5b-\uff65\u2E2E\u061f\u066a-\u066c\u061b\u060c\u060d' + '\uFD3e\uFD3F\u1801\u0964\u104a\u104b\u2010-\u2027\u2030-\u205e' + '\u00a1-\u00b1\u00b4-\u00b8\u00ba\u00bb\u00bf]';
  28. module.exports = {
  29. getPunctuation: (): string => PUNCTUATION
  30. };