UnicodeUtilsExtra.js.flow 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. /**
  2. * Copyright (c) 2013-present, Facebook, Inc.
  3. *
  4. * This source code is licensed under the MIT license found in the
  5. * LICENSE file in the root directory of this source tree.
  6. *
  7. * @providesModule UnicodeUtilsExtra
  8. * @typechecks
  9. */
  10. /**
  11. * Unicode-enabled extra utility functions not always needed.
  12. */
  13. 'use strict';
  14. const UnicodeUtils = require('./UnicodeUtils');
  15. /**
  16. * @param {number} codePoint Valid Unicode code-point
  17. * @param {number} len Zero-padded minimum width of result
  18. * @return {string} A zero-padded hexadecimal string (00XXXX)
  19. */
  20. function zeroPaddedHex(codePoint, len) {
  21. let codePointHex = codePoint.toString(16).toUpperCase();
  22. let numZeros = Math.max(0, len - codePointHex.length);
  23. var result = '';
  24. for (var i = 0; i < numZeros; i++) {
  25. result += '0';
  26. }
  27. result += codePointHex;
  28. return result;
  29. }
  30. /**
  31. * @param {number} codePoint Valid Unicode code-point
  32. * @return {string} A formatted Unicode code-point string
  33. * of the format U+XXXX, U+XXXXX, or U+XXXXXX
  34. */
  35. function formatCodePoint(codePoint) {
  36. codePoint = codePoint || 0; // NaN --> 0
  37. var formatted = '';
  38. if (codePoint <= 0xFFFF) {
  39. formatted = zeroPaddedHex(codePoint, 4);
  40. } else {
  41. formatted = codePoint.toString(16).toUpperCase();
  42. }
  43. return 'U+' + formatted;
  44. }
  45. /**
  46. * Get a list of formatted (string) Unicode code-points from a String
  47. *
  48. * @param {string} str Valid Unicode string
  49. * @return {array<string>} A list of formatted code-point strings
  50. */
  51. function getCodePointsFormatted(str) {
  52. const codePoints = UnicodeUtils.getCodePoints(str);
  53. return codePoints.map(formatCodePoint);
  54. }
  55. const specialEscape = {
  56. 0x07: '\\a',
  57. 0x08: '\\b',
  58. 0x0C: '\\f',
  59. 0x0A: '\\n',
  60. 0x0D: '\\r',
  61. 0x09: '\\t',
  62. 0x0B: '\\v',
  63. 0x22: '\\"',
  64. 0x5c: '\\\\'
  65. };
  66. /**
  67. * Returns a double-quoted PHP string with all non-printable and
  68. * non-US-ASCII sequences escaped.
  69. *
  70. * @param {string} str Valid Unicode string
  71. * @return {string} Double-quoted string with Unicode sequences escaped
  72. */
  73. function phpEscape(s) {
  74. var result = '"';
  75. for (let cp of UnicodeUtils.getCodePoints(s)) {
  76. let special = specialEscape[cp];
  77. if (special !== undefined) {
  78. result += special;
  79. } else if (cp >= 0x20 && cp <= 0x7e) {
  80. result += String.fromCodePoint(cp);
  81. } else if (cp <= 0xFFFF) {
  82. result += '\\u{' + zeroPaddedHex(cp, 4) + '}';
  83. } else {
  84. result += '\\u{' + zeroPaddedHex(cp, 6) + '}';
  85. }
  86. }
  87. result += '"';
  88. return result;
  89. }
  90. /**
  91. * Returns a double-quoted Java or JavaScript string with all
  92. * non-printable and non-US-ASCII sequences escaped.
  93. *
  94. * @param {string} str Valid Unicode string
  95. * @return {string} Double-quoted string with Unicode sequences escaped
  96. */
  97. function jsEscape(s) {
  98. var result = '"';
  99. for (var i = 0; i < s.length; i++) {
  100. let cp = s.charCodeAt(i);
  101. let special = specialEscape[cp];
  102. if (special !== undefined) {
  103. result += special;
  104. } else if (cp >= 0x20 && cp <= 0x7e) {
  105. result += String.fromCodePoint(cp);
  106. } else {
  107. result += '\\u' + zeroPaddedHex(cp, 4);
  108. }
  109. }
  110. result += '"';
  111. return result;
  112. }
  113. function c11Escape(s) {
  114. var result = '';
  115. for (let cp of UnicodeUtils.getCodePoints(s)) {
  116. let special = specialEscape[cp];
  117. if (special !== undefined) {
  118. result += special;
  119. } else if (cp >= 0x20 && cp <= 0x7e) {
  120. result += String.fromCodePoint(cp);
  121. } else if (cp <= 0xFFFF) {
  122. result += '\\u' + zeroPaddedHex(cp, 4);
  123. } else {
  124. result += '\\U' + zeroPaddedHex(cp, 8);
  125. }
  126. }
  127. return result;
  128. }
  129. /**
  130. * Returns a double-quoted C string with all non-printable and
  131. * non-US-ASCII sequences escaped.
  132. *
  133. * @param {string} str Valid Unicode string
  134. * @return {string} Double-quoted string with Unicode sequences escaped
  135. */
  136. function cEscape(s) {
  137. return 'u8"' + c11Escape(s) + '"';
  138. }
  139. /**
  140. * Returns a double-quoted Objective-C string with all non-printable
  141. * and non-US-ASCII sequences escaped.
  142. *
  143. * @param {string} str Valid Unicode string
  144. * @return {string} Double-quoted string with Unicode sequences escaped
  145. */
  146. function objcEscape(s) {
  147. return '@"' + c11Escape(s) + '"';
  148. }
  149. /**
  150. * Returns a double-quoted Python string with all non-printable
  151. * and non-US-ASCII sequences escaped.
  152. *
  153. * @param {string} str Valid Unicode string
  154. * @return {string} Double-quoted string with Unicode sequences escaped
  155. */
  156. function pyEscape(s) {
  157. return 'u"' + c11Escape(s) + '"';
  158. }
  159. const UnicodeUtilsExtra = {
  160. formatCodePoint: formatCodePoint,
  161. getCodePointsFormatted: getCodePointsFormatted,
  162. zeroPaddedHex: zeroPaddedHex,
  163. phpEscape: phpEscape,
  164. jsEscape: jsEscape,
  165. cEscape: cEscape,
  166. objcEscape: objcEscape,
  167. pyEscape: pyEscape
  168. };
  169. module.exports = UnicodeUtilsExtra;