match.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. 'use strict'
  2. const check = require('check-types')
  3. const DataStream = require('./datastream')
  4. const events = require('./events')
  5. const Hoopy = require('hoopy')
  6. const walk = require('./walk')
  7. const DEFAULT_BUFFER_LENGTH = 1024
  8. module.exports = match
  9. /**
  10. * Public function `match`.
  11. *
  12. * Asynchronously parses a stream of JSON data, returning a stream of items
  13. * that match the argument. Note that if a value is `null`, it won't be matched
  14. * because `null` is used to signify end-of-stream in node.
  15. *
  16. * @param stream: Readable instance representing the incoming JSON.
  17. *
  18. * @param selector: Regular expression, string or predicate function used to
  19. * identify matches. If a regular expression or string is
  20. * passed, only property keys are tested. If a predicate is
  21. * passed, both the key and the value are passed to it as
  22. * arguments.
  23. *
  24. * @option minDepth: Number indicating the minimum depth to apply the selector
  25. * to. The default is `0`, but setting it to a higher value
  26. * can improve performance and reduce memory usage by
  27. * eliminating the need to actualise top-level items.
  28. *
  29. * @option numbers: Boolean, indicating whether numerical keys (e.g. array
  30. * indices) should be coerced to strings before testing the
  31. * match. Only applies if the `selector` argument is a string
  32. * or regular expression.
  33. *
  34. * @option ndjson: Set this to true to parse newline-delimited JSON,
  35. * default is `false`.
  36. *
  37. * @option yieldRate: The number of data items to process per timeslice,
  38. * default is 16384.
  39. *
  40. * @option bufferLength: The length of the match buffer, default is 1024.
  41. *
  42. * @option highWaterMark: If set, will be passed to the readable stream constructor
  43. * as the value for the highWaterMark option.
  44. *
  45. * @option Promise: The promise constructor to use, defaults to bluebird.
  46. **/
  47. function match (stream, selector, options = {}) {
  48. const scopes = []
  49. const properties = []
  50. const emitter = walk(stream, options)
  51. const matches = new Hoopy(options.bufferLength || DEFAULT_BUFFER_LENGTH)
  52. let streamOptions
  53. const { highWaterMark } = options
  54. if (highWaterMark) {
  55. streamOptions = { highWaterMark }
  56. }
  57. const results = new DataStream(read, streamOptions)
  58. let selectorFunction, selectorString, resume
  59. let coerceNumbers = false
  60. let awaitPush = true
  61. let isEnded = false
  62. let length = 0
  63. let index = 0
  64. const minDepth = options.minDepth || 0
  65. check.assert.greaterOrEqual(minDepth, 0)
  66. if (check.function(selector)) {
  67. selectorFunction = selector
  68. selector = null
  69. } else {
  70. coerceNumbers = !! options.numbers
  71. if (check.string(selector)) {
  72. check.assert.nonEmptyString(selector)
  73. selectorString = selector
  74. selector = null
  75. } else {
  76. check.assert.instanceStrict(selector, RegExp)
  77. }
  78. }
  79. emitter.on(events.array, array)
  80. emitter.on(events.object, object)
  81. emitter.on(events.property, property)
  82. emitter.on(events.endArray, endScope)
  83. emitter.on(events.endObject, endScope)
  84. emitter.on(events.string, value)
  85. emitter.on(events.number, value)
  86. emitter.on(events.literal, value)
  87. emitter.on(events.end, end)
  88. emitter.on(events.error, error)
  89. emitter.on(events.dataError, dataError)
  90. return results
  91. function read () {
  92. if (awaitPush) {
  93. awaitPush = false
  94. if (isEnded) {
  95. if (length > 0) {
  96. after()
  97. }
  98. return endResults()
  99. }
  100. }
  101. if (resume) {
  102. const resumeCopy = resume
  103. resume = null
  104. resumeCopy()
  105. after()
  106. }
  107. }
  108. function after () {
  109. if (awaitPush || resume) {
  110. return
  111. }
  112. let i
  113. for (i = 0; i < length && ! resume; ++i) {
  114. if (! results.push(matches[i + index])) {
  115. pause()
  116. }
  117. }
  118. if (i === length) {
  119. index = length = 0
  120. } else {
  121. length -= i
  122. index += i
  123. }
  124. }
  125. function pause () {
  126. resume = emitter.pause()
  127. }
  128. function endResults () {
  129. if (! awaitPush) {
  130. results.push(null)
  131. }
  132. }
  133. function array () {
  134. scopes.push([])
  135. }
  136. function object () {
  137. scopes.push({})
  138. }
  139. function property (name) {
  140. if (scopes.length < minDepth) {
  141. return
  142. }
  143. properties.push(name)
  144. }
  145. function endScope () {
  146. value(scopes.pop())
  147. }
  148. function value (v) {
  149. let key
  150. if (scopes.length < minDepth) {
  151. return
  152. }
  153. if (scopes.length > 0) {
  154. const scope = scopes[scopes.length - 1]
  155. if (Array.isArray(scope)) {
  156. key = scope.length
  157. } else {
  158. key = properties.pop()
  159. }
  160. scope[key] = v
  161. }
  162. if (v === null) {
  163. return
  164. }
  165. if (selectorFunction) {
  166. if (selectorFunction(key, v, scopes.length)) {
  167. push(v)
  168. }
  169. } else {
  170. if (coerceNumbers && typeof key === 'number') {
  171. key = key.toString()
  172. }
  173. if ((selectorString && selectorString === key) || (selector && selector.test(key))) {
  174. push(v)
  175. }
  176. }
  177. }
  178. function push (v) {
  179. if (length + 1 === matches.length) {
  180. pause()
  181. }
  182. matches[index + length++] = v
  183. after()
  184. }
  185. function end () {
  186. isEnded = true
  187. endResults()
  188. }
  189. function error (e) {
  190. results.emit('error', e)
  191. }
  192. function dataError (e) {
  193. results.emit('dataError', e)
  194. }
  195. }