index.js 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /* eslint no-use-before-define:0 */
  2. 'use strict';
  3. // Import
  4. var pathUtil = require('path');
  5. var textExtensions = require('textextensions');
  6. var binaryExtensions = require('binaryextensions');
  7. /**
  8. * Is Text (Synchronous)
  9. * Determine whether or not a file is a text or binary file.
  10. * Determined by extension checks first, then if unknown extension, will fallback on encoding detection.
  11. * We do that as encoding detection cannot guarantee everything, especially for chars between utf8 and utf16
  12. * @param {?string} filename - the filename for the file/buffer if available
  13. * @param {?Buffer} buffer - the buffer for the file if available
  14. * @returns {Error|boolean}
  15. */
  16. function isTextSync(filename, buffer) {
  17. // Prepare
  18. var isText = null;
  19. // Test extensions
  20. if (filename) {
  21. // Extract filename
  22. var parts = pathUtil.basename(filename).split('.').reverse();
  23. // Cycle extensions
  24. var _iteratorNormalCompletion = true;
  25. var _didIteratorError = false;
  26. var _iteratorError = undefined;
  27. try {
  28. for (var _iterator = parts[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
  29. var extension = _step.value;
  30. if (textExtensions.indexOf(extension) !== -1) {
  31. isText = true;
  32. break;
  33. }
  34. if (binaryExtensions.indexOf(extension) !== -1) {
  35. isText = false;
  36. break;
  37. }
  38. }
  39. } catch (err) {
  40. _didIteratorError = true;
  41. _iteratorError = err;
  42. } finally {
  43. try {
  44. if (!_iteratorNormalCompletion && _iterator.return) {
  45. _iterator.return();
  46. }
  47. } finally {
  48. if (_didIteratorError) {
  49. throw _iteratorError;
  50. }
  51. }
  52. }
  53. }
  54. // Fallback to encoding if extension check was not enough
  55. if (buffer && isText === null) {
  56. isText = getEncodingSync(buffer) === 'utf8';
  57. }
  58. // Return our result
  59. return isText;
  60. }
  61. /**
  62. * Is Text
  63. * Uses `isTextSync` behind the scenes.
  64. * @param {?string} filename - forwarded to `isTextSync`
  65. * @param {?Buffer} buffer - forwarded to `isTextSync`
  66. * @param {Function} next - accepts arguments: (error: Error, result: Boolean)
  67. * @returns {nothing}
  68. */
  69. function isText(filename, buffer, next) {
  70. var result = isTextSync(filename, buffer);
  71. if (result instanceof Error) {
  72. next(result);
  73. } else {
  74. next(null, result);
  75. }
  76. }
  77. /**
  78. * Is Binary (Synchronous)
  79. * Uses `isTextSync` behind the scenes.
  80. * @param {?string} filename - forwarded to `isTextSync`
  81. * @param {?Buffer} buffer - forwarded to `isTextSync`
  82. * @returns {Error|boolean}
  83. */
  84. function isBinarySync(filename, buffer) {
  85. // Handle
  86. var result = isTextSync(filename, buffer);
  87. return result instanceof Error ? result : !result;
  88. }
  89. /**
  90. * Is Binary
  91. * Uses `isText` behind the scenes.
  92. * @param {?string} filename - forwarded to `isText`
  93. * @param {?Buffer} buffer - forwarded to `isText`
  94. * @param {Function} next - accepts arguments: (error: Error, result: Boolean)
  95. * @returns {nothing}
  96. */
  97. function isBinary(filename, buffer, next) {
  98. // Handle
  99. isText(filename, buffer, function (err, result) {
  100. if (err) return next(err);
  101. return next(null, !result);
  102. });
  103. }
  104. /**
  105. * Get the encoding of a buffer.
  106. * We fetch a bunch chars from the start, middle and end of the buffer.
  107. * We check all three, as doing only start was not enough, and doing only middle was not enough, so better safe than sorry.
  108. * @param {Buffer} buffer
  109. * @param {?Object} [opts]
  110. * @param {?number} [opts.chunkLength = 24]
  111. * @param {?number} [opts.chunkBegin = 0]
  112. * @returns {Error|string} either an Error instance if something went wrong, or if successful "utf8" or "binary"
  113. */
  114. function getEncodingSync(buffer, opts) {
  115. // Prepare
  116. var textEncoding = 'utf8';
  117. var binaryEncoding = 'binary';
  118. // Discover
  119. if (opts == null) {
  120. // Start
  121. var chunkLength = 24;
  122. var encoding = getEncodingSync(buffer, { chunkLength: chunkLength });
  123. if (encoding === textEncoding) {
  124. // Middle
  125. var chunkBegin = Math.max(0, Math.floor(buffer.length / 2) - chunkLength);
  126. encoding = getEncodingSync(buffer, { chunkLength: chunkLength, chunkBegin: chunkBegin });
  127. if (encoding === textEncoding) {
  128. // End
  129. chunkBegin = Math.max(0, buffer.length - chunkLength);
  130. encoding = getEncodingSync(buffer, { chunkLength: chunkLength, chunkBegin: chunkBegin });
  131. }
  132. }
  133. // Return
  134. return encoding;
  135. } else {
  136. // Extract
  137. var _opts$chunkLength = opts.chunkLength,
  138. _chunkLength = _opts$chunkLength === undefined ? 24 : _opts$chunkLength,
  139. _opts$chunkBegin = opts.chunkBegin,
  140. _chunkBegin = _opts$chunkBegin === undefined ? 0 : _opts$chunkBegin;
  141. var chunkEnd = Math.min(buffer.length, _chunkBegin + _chunkLength);
  142. var contentChunkUTF8 = buffer.toString(textEncoding, _chunkBegin, chunkEnd);
  143. var _encoding = textEncoding;
  144. // Detect encoding
  145. for (var i = 0; i < contentChunkUTF8.length; ++i) {
  146. var charCode = contentChunkUTF8.charCodeAt(i);
  147. if (charCode === 65533 || charCode <= 8) {
  148. // 8 and below are control characters (e.g. backspace, null, eof, etc.)
  149. // 65533 is the unknown character
  150. // console.log(charCode, contentChunkUTF8[i])
  151. _encoding = binaryEncoding;
  152. break;
  153. }
  154. }
  155. // Return
  156. return _encoding;
  157. }
  158. }
  159. /**
  160. * Get the encoding of a buffer
  161. * Uses `getEncodingSync` behind the scenes.
  162. * @param {Buffer} buffer - forwarded to `getEncodingSync`
  163. * @param {Object} opts - forwarded to `getEncodingSync`
  164. * @param {Function} next - accepts arguments: (error: Error, result: Boolean)
  165. * @returns {nothing}
  166. */
  167. function getEncoding(buffer, opts, next) {
  168. // Fetch and wrap result
  169. var result = getEncodingSync(buffer, opts);
  170. if (result instanceof Error) {
  171. next(result);
  172. } else {
  173. next(null, result);
  174. }
  175. }
  176. // Export
  177. module.exports = { isTextSync: isTextSync, isText: isText, isBinarySync: isBinarySync, isBinary: isBinary, getEncodingSync: getEncodingSync, getEncoding: getEncoding };