index.js 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /* eslint no-use-before-define:0 */
  2. 'use strict'
  3. // Import
  4. const pathUtil = require('path')
  5. const textExtensions = require('textextensions')
  6. const binaryExtensions = require('binaryextensions')
  7. /**
  8. * Is Text (Synchronous)
  9. * Determine whether or not a file is a text or binary file.
  10. * Determined by extension checks first, then if unknown extension, will fallback on encoding detection.
  11. * We do that as encoding detection cannot guarantee everything, especially for chars between utf8 and utf16
  12. * @param {?string} filename - the filename for the file/buffer if available
  13. * @param {?Buffer} buffer - the buffer for the file if available
  14. * @returns {Error|boolean}
  15. */
  16. function isTextSync (filename, buffer) {
  17. // Prepare
  18. let isText = null
  19. // Test extensions
  20. if (filename) {
  21. // Extract filename
  22. const parts = pathUtil.basename(filename).split('.').reverse()
  23. // Cycle extensions
  24. for (const extension of parts) {
  25. if (textExtensions.indexOf(extension) !== -1) {
  26. isText = true
  27. break
  28. }
  29. if (binaryExtensions.indexOf(extension) !== -1) {
  30. isText = false
  31. break
  32. }
  33. }
  34. }
  35. // Fallback to encoding if extension check was not enough
  36. if (buffer && isText === null) {
  37. isText = getEncodingSync(buffer) === 'utf8'
  38. }
  39. // Return our result
  40. return isText
  41. }
  42. /**
  43. * Is Text
  44. * Uses `isTextSync` behind the scenes.
  45. * @param {?string} filename - forwarded to `isTextSync`
  46. * @param {?Buffer} buffer - forwarded to `isTextSync`
  47. * @param {Function} next - accepts arguments: (error: Error, result: Boolean)
  48. * @returns {nothing}
  49. */
  50. function isText (filename, buffer, next) {
  51. const result = isTextSync(filename, buffer)
  52. if (result instanceof Error) {
  53. next(result)
  54. }
  55. else {
  56. next(null, result)
  57. }
  58. }
  59. /**
  60. * Is Binary (Synchronous)
  61. * Uses `isTextSync` behind the scenes.
  62. * @param {?string} filename - forwarded to `isTextSync`
  63. * @param {?Buffer} buffer - forwarded to `isTextSync`
  64. * @returns {Error|boolean}
  65. */
  66. function isBinarySync (filename, buffer) {
  67. // Handle
  68. const result = isTextSync(filename, buffer)
  69. return result instanceof Error ? result : !result
  70. }
  71. /**
  72. * Is Binary
  73. * Uses `isText` behind the scenes.
  74. * @param {?string} filename - forwarded to `isText`
  75. * @param {?Buffer} buffer - forwarded to `isText`
  76. * @param {Function} next - accepts arguments: (error: Error, result: Boolean)
  77. * @returns {nothing}
  78. */
  79. function isBinary (filename, buffer, next) {
  80. // Handle
  81. isText(filename, buffer, function (err, result) {
  82. if (err) return next(err)
  83. return next(null, !result)
  84. })
  85. }
  86. /**
  87. * Get the encoding of a buffer.
  88. * We fetch a bunch chars from the start, middle and end of the buffer.
  89. * We check all three, as doing only start was not enough, and doing only middle was not enough, so better safe than sorry.
  90. * @param {Buffer} buffer
  91. * @param {?Object} [opts]
  92. * @param {?number} [opts.chunkLength = 24]
  93. * @param {?number} [opts.chunkBegin = 0]
  94. * @returns {Error|string} either an Error instance if something went wrong, or if successful "utf8" or "binary"
  95. */
  96. function getEncodingSync (buffer, opts) {
  97. // Prepare
  98. const textEncoding = 'utf8'
  99. const binaryEncoding = 'binary'
  100. // Discover
  101. if (opts == null) {
  102. // Start
  103. const chunkLength = 24
  104. let encoding = getEncodingSync(buffer, { chunkLength })
  105. if (encoding === textEncoding) {
  106. // Middle
  107. let chunkBegin = Math.max(0, Math.floor(buffer.length / 2) - chunkLength)
  108. encoding = getEncodingSync(buffer, { chunkLength, chunkBegin })
  109. if (encoding === textEncoding) {
  110. // End
  111. chunkBegin = Math.max(0, buffer.length - chunkLength)
  112. encoding = getEncodingSync(buffer, { chunkLength, chunkBegin })
  113. }
  114. }
  115. // Return
  116. return encoding
  117. }
  118. else {
  119. // Extract
  120. const { chunkLength = 24, chunkBegin = 0 } = opts
  121. const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength)
  122. const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd)
  123. let encoding = textEncoding
  124. // Detect encoding
  125. for (let i = 0; i < contentChunkUTF8.length; ++i) {
  126. const charCode = contentChunkUTF8.charCodeAt(i)
  127. if (charCode === 65533 || charCode <= 8) {
  128. // 8 and below are control characters (e.g. backspace, null, eof, etc.)
  129. // 65533 is the unknown character
  130. // console.log(charCode, contentChunkUTF8[i])
  131. encoding = binaryEncoding
  132. break
  133. }
  134. }
  135. // Return
  136. return encoding
  137. }
  138. }
  139. /**
  140. * Get the encoding of a buffer
  141. * Uses `getEncodingSync` behind the scenes.
  142. * @param {Buffer} buffer - forwarded to `getEncodingSync`
  143. * @param {Object} opts - forwarded to `getEncodingSync`
  144. * @param {Function} next - accepts arguments: (error: Error, result: Boolean)
  145. * @returns {nothing}
  146. */
  147. function getEncoding (buffer, opts, next) {
  148. // Fetch and wrap result
  149. const result = getEncodingSync(buffer, opts)
  150. if (result instanceof Error) {
  151. next(result)
  152. }
  153. else {
  154. next(null, result)
  155. }
  156. }
  157. // Export
  158. module.exports = { isTextSync, isText, isBinarySync, isBinary, getEncodingSync, getEncoding }