UnicodeUtilsExtra.js 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /**
  2. * Copyright (c) 2013-present, Facebook, Inc.
  3. *
  4. * This source code is licensed under the MIT license found in the
  5. * LICENSE file in the root directory of this source tree.
  6. *
  7. * @typechecks
  8. */
  9. /**
  10. * Unicode-enabled extra utility functions not always needed.
  11. */
  12. 'use strict';
  13. var UnicodeUtils = require('./UnicodeUtils');
  14. /**
  15. * @param {number} codePoint Valid Unicode code-point
  16. * @param {number} len Zero-padded minimum width of result
  17. * @return {string} A zero-padded hexadecimal string (00XXXX)
  18. */
  19. function zeroPaddedHex(codePoint, len) {
  20. var codePointHex = codePoint.toString(16).toUpperCase();
  21. var numZeros = Math.max(0, len - codePointHex.length);
  22. var result = '';
  23. for (var i = 0; i < numZeros; i++) {
  24. result += '0';
  25. }
  26. result += codePointHex;
  27. return result;
  28. }
  29. /**
  30. * @param {number} codePoint Valid Unicode code-point
  31. * @return {string} A formatted Unicode code-point string
  32. * of the format U+XXXX, U+XXXXX, or U+XXXXXX
  33. */
  34. function formatCodePoint(codePoint) {
  35. codePoint = codePoint || 0; // NaN --> 0
  36. var formatted = '';
  37. if (codePoint <= 0xFFFF) {
  38. formatted = zeroPaddedHex(codePoint, 4);
  39. } else {
  40. formatted = codePoint.toString(16).toUpperCase();
  41. }
  42. return 'U+' + formatted;
  43. }
  44. /**
  45. * Get a list of formatted (string) Unicode code-points from a String
  46. *
  47. * @param {string} str Valid Unicode string
  48. * @return {array<string>} A list of formatted code-point strings
  49. */
  50. function getCodePointsFormatted(str) {
  51. var codePoints = UnicodeUtils.getCodePoints(str);
  52. return codePoints.map(formatCodePoint);
  53. }
  54. var specialEscape = {
  55. 0x07: '\\a',
  56. 0x08: '\\b',
  57. 0x0C: '\\f',
  58. 0x0A: '\\n',
  59. 0x0D: '\\r',
  60. 0x09: '\\t',
  61. 0x0B: '\\v',
  62. 0x22: '\\"',
  63. 0x5c: '\\\\'
  64. };
  65. /**
  66. * Returns a double-quoted PHP string with all non-printable and
  67. * non-US-ASCII sequences escaped.
  68. *
  69. * @param {string} str Valid Unicode string
  70. * @return {string} Double-quoted string with Unicode sequences escaped
  71. */
  72. function phpEscape(s) {
  73. var result = '"';
  74. var _iteratorNormalCompletion = true;
  75. var _didIteratorError = false;
  76. var _iteratorError = undefined;
  77. try {
  78. for (var _iterator = UnicodeUtils.getCodePoints(s)[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
  79. var cp = _step.value;
  80. var special = specialEscape[cp];
  81. if (special !== undefined) {
  82. result += special;
  83. } else if (cp >= 0x20 && cp <= 0x7e) {
  84. result += String.fromCodePoint(cp);
  85. } else if (cp <= 0xFFFF) {
  86. result += '\\u{' + zeroPaddedHex(cp, 4) + '}';
  87. } else {
  88. result += '\\u{' + zeroPaddedHex(cp, 6) + '}';
  89. }
  90. }
  91. } catch (err) {
  92. _didIteratorError = true;
  93. _iteratorError = err;
  94. } finally {
  95. try {
  96. if (!_iteratorNormalCompletion && _iterator['return']) {
  97. _iterator['return']();
  98. }
  99. } finally {
  100. if (_didIteratorError) {
  101. throw _iteratorError;
  102. }
  103. }
  104. }
  105. result += '"';
  106. return result;
  107. }
  108. /**
  109. * Returns a double-quoted Java or JavaScript string with all
  110. * non-printable and non-US-ASCII sequences escaped.
  111. *
  112. * @param {string} str Valid Unicode string
  113. * @return {string} Double-quoted string with Unicode sequences escaped
  114. */
  115. function jsEscape(s) {
  116. var result = '"';
  117. for (var i = 0; i < s.length; i++) {
  118. var cp = s.charCodeAt(i);
  119. var special = specialEscape[cp];
  120. if (special !== undefined) {
  121. result += special;
  122. } else if (cp >= 0x20 && cp <= 0x7e) {
  123. result += String.fromCodePoint(cp);
  124. } else {
  125. result += '\\u' + zeroPaddedHex(cp, 4);
  126. }
  127. }
  128. result += '"';
  129. return result;
  130. }
  131. function c11Escape(s) {
  132. var result = '';
  133. var _iteratorNormalCompletion2 = true;
  134. var _didIteratorError2 = false;
  135. var _iteratorError2 = undefined;
  136. try {
  137. for (var _iterator2 = UnicodeUtils.getCodePoints(s)[Symbol.iterator](), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) {
  138. var cp = _step2.value;
  139. var special = specialEscape[cp];
  140. if (special !== undefined) {
  141. result += special;
  142. } else if (cp >= 0x20 && cp <= 0x7e) {
  143. result += String.fromCodePoint(cp);
  144. } else if (cp <= 0xFFFF) {
  145. result += '\\u' + zeroPaddedHex(cp, 4);
  146. } else {
  147. result += '\\U' + zeroPaddedHex(cp, 8);
  148. }
  149. }
  150. } catch (err) {
  151. _didIteratorError2 = true;
  152. _iteratorError2 = err;
  153. } finally {
  154. try {
  155. if (!_iteratorNormalCompletion2 && _iterator2['return']) {
  156. _iterator2['return']();
  157. }
  158. } finally {
  159. if (_didIteratorError2) {
  160. throw _iteratorError2;
  161. }
  162. }
  163. }
  164. return result;
  165. }
  166. /**
  167. * Returns a double-quoted C string with all non-printable and
  168. * non-US-ASCII sequences escaped.
  169. *
  170. * @param {string} str Valid Unicode string
  171. * @return {string} Double-quoted string with Unicode sequences escaped
  172. */
  173. function cEscape(s) {
  174. return 'u8"' + c11Escape(s) + '"';
  175. }
  176. /**
  177. * Returns a double-quoted Objective-C string with all non-printable
  178. * and non-US-ASCII sequences escaped.
  179. *
  180. * @param {string} str Valid Unicode string
  181. * @return {string} Double-quoted string with Unicode sequences escaped
  182. */
  183. function objcEscape(s) {
  184. return '@"' + c11Escape(s) + '"';
  185. }
  186. /**
  187. * Returns a double-quoted Python string with all non-printable
  188. * and non-US-ASCII sequences escaped.
  189. *
  190. * @param {string} str Valid Unicode string
  191. * @return {string} Double-quoted string with Unicode sequences escaped
  192. */
  193. function pyEscape(s) {
  194. return 'u"' + c11Escape(s) + '"';
  195. }
  196. var UnicodeUtilsExtra = {
  197. formatCodePoint: formatCodePoint,
  198. getCodePointsFormatted: getCodePointsFormatted,
  199. zeroPaddedHex: zeroPaddedHex,
  200. phpEscape: phpEscape,
  201. jsEscape: jsEscape,
  202. cEscape: cEscape,
  203. objcEscape: objcEscape,
  204. pyEscape: pyEscape
  205. };
  206. module.exports = UnicodeUtilsExtra;