indexOf.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. 'use strict'
  2. var tape = require('tape')
  3. , BufferList = require('../')
  4. , Buffer = require('safe-buffer').Buffer
  5. tape('indexOf single byte needle', t => {
  6. const bl = new BufferList(['abcdefg', 'abcdefg', '12345'])
  7. t.equal(bl.indexOf('e'), 4)
  8. t.equal(bl.indexOf('e', 5), 11)
  9. t.equal(bl.indexOf('e', 12), -1)
  10. t.equal(bl.indexOf('5'), 18)
  11. t.end()
  12. })
  13. tape('indexOf multiple byte needle', t => {
  14. const bl = new BufferList(['abcdefg', 'abcdefg'])
  15. t.equal(bl.indexOf('ef'), 4)
  16. t.equal(bl.indexOf('ef', 5), 11)
  17. t.end()
  18. })
  19. tape('indexOf multiple byte needles across buffer boundaries', t => {
  20. const bl = new BufferList(['abcdefg', 'abcdefg'])
  21. t.equal(bl.indexOf('fgabc'), 5)
  22. t.end()
  23. })
  24. tape('indexOf takes a buffer list search', t => {
  25. const bl = new BufferList(['abcdefg', 'abcdefg'])
  26. const search = new BufferList('fgabc')
  27. t.equal(bl.indexOf(search), 5)
  28. t.end()
  29. })
  30. tape('indexOf a zero byte needle', t => {
  31. const b = new BufferList('abcdef')
  32. const buf_empty = Buffer.from('')
  33. t.equal(b.indexOf(''), 0)
  34. t.equal(b.indexOf('', 1), 1)
  35. t.equal(b.indexOf('', b.length + 1), b.length)
  36. t.equal(b.indexOf('', Infinity), b.length)
  37. t.equal(b.indexOf(buf_empty), 0)
  38. t.equal(b.indexOf(buf_empty, 1), 1)
  39. t.equal(b.indexOf(buf_empty, b.length + 1), b.length)
  40. t.equal(b.indexOf(buf_empty, Infinity), b.length)
  41. t.end()
  42. })
  43. tape('indexOf buffers smaller and larger than the needle', t => {
  44. const bl = new BufferList(['abcdefg', 'a', 'bcdefg', 'a', 'bcfgab'])
  45. t.equal(bl.indexOf('fgabc'), 5)
  46. t.equal(bl.indexOf('fgabc', 6), 12)
  47. t.equal(bl.indexOf('fgabc', 13), -1)
  48. t.end()
  49. })
  50. // only present in node 6+
  51. ;(process.version.substr(1).split('.')[0] >= 6) && tape('indexOf latin1 and binary encoding', t => {
  52. const b = new BufferList('abcdef')
  53. // test latin1 encoding
  54. t.equal(
  55. new BufferList(Buffer.from(b.toString('latin1'), 'latin1'))
  56. .indexOf('d', 0, 'latin1'),
  57. 3
  58. )
  59. t.equal(
  60. new BufferList(Buffer.from(b.toString('latin1'), 'latin1'))
  61. .indexOf(Buffer.from('d', 'latin1'), 0, 'latin1'),
  62. 3
  63. )
  64. t.equal(
  65. new BufferList(Buffer.from('aa\u00e8aa', 'latin1'))
  66. .indexOf('\u00e8', 'latin1'),
  67. 2
  68. )
  69. t.equal(
  70. new BufferList(Buffer.from('\u00e8', 'latin1'))
  71. .indexOf('\u00e8', 'latin1'),
  72. 0
  73. )
  74. t.equal(
  75. new BufferList(Buffer.from('\u00e8', 'latin1'))
  76. .indexOf(Buffer.from('\u00e8', 'latin1'), 'latin1'),
  77. 0
  78. )
  79. // test binary encoding
  80. t.equal(
  81. new BufferList(Buffer.from(b.toString('binary'), 'binary'))
  82. .indexOf('d', 0, 'binary'),
  83. 3
  84. )
  85. t.equal(
  86. new BufferList(Buffer.from(b.toString('binary'), 'binary'))
  87. .indexOf(Buffer.from('d', 'binary'), 0, 'binary'),
  88. 3
  89. )
  90. t.equal(
  91. new BufferList(Buffer.from('aa\u00e8aa', 'binary'))
  92. .indexOf('\u00e8', 'binary'),
  93. 2
  94. )
  95. t.equal(
  96. new BufferList(Buffer.from('\u00e8', 'binary'))
  97. .indexOf('\u00e8', 'binary'),
  98. 0
  99. )
  100. t.equal(
  101. new BufferList(Buffer.from('\u00e8', 'binary'))
  102. .indexOf(Buffer.from('\u00e8', 'binary'), 'binary'),
  103. 0
  104. )
  105. t.end()
  106. })
  107. tape('indexOf the entire nodejs10 buffer test suite', t => {
  108. const b = new BufferList('abcdef')
  109. const buf_a = Buffer.from('a')
  110. const buf_bc = Buffer.from('bc')
  111. const buf_f = Buffer.from('f')
  112. const buf_z = Buffer.from('z')
  113. const stringComparison = 'abcdef'
  114. t.equal(b.indexOf('a'), 0)
  115. t.equal(b.indexOf('a', 1), -1)
  116. t.equal(b.indexOf('a', -1), -1)
  117. t.equal(b.indexOf('a', -4), -1)
  118. t.equal(b.indexOf('a', -b.length), 0)
  119. t.equal(b.indexOf('a', NaN), 0)
  120. t.equal(b.indexOf('a', -Infinity), 0)
  121. t.equal(b.indexOf('a', Infinity), -1)
  122. t.equal(b.indexOf('bc'), 1)
  123. t.equal(b.indexOf('bc', 2), -1)
  124. t.equal(b.indexOf('bc', -1), -1)
  125. t.equal(b.indexOf('bc', -3), -1)
  126. t.equal(b.indexOf('bc', -5), 1)
  127. t.equal(b.indexOf('bc', NaN), 1)
  128. t.equal(b.indexOf('bc', -Infinity), 1)
  129. t.equal(b.indexOf('bc', Infinity), -1)
  130. t.equal(b.indexOf('f'), b.length - 1)
  131. t.equal(b.indexOf('z'), -1)
  132. // empty search tests
  133. t.equal(b.indexOf(buf_a), 0)
  134. t.equal(b.indexOf(buf_a, 1), -1)
  135. t.equal(b.indexOf(buf_a, -1), -1)
  136. t.equal(b.indexOf(buf_a, -4), -1)
  137. t.equal(b.indexOf(buf_a, -b.length), 0)
  138. t.equal(b.indexOf(buf_a, NaN), 0)
  139. t.equal(b.indexOf(buf_a, -Infinity), 0)
  140. t.equal(b.indexOf(buf_a, Infinity), -1)
  141. t.equal(b.indexOf(buf_bc), 1)
  142. t.equal(b.indexOf(buf_bc, 2), -1)
  143. t.equal(b.indexOf(buf_bc, -1), -1)
  144. t.equal(b.indexOf(buf_bc, -3), -1)
  145. t.equal(b.indexOf(buf_bc, -5), 1)
  146. t.equal(b.indexOf(buf_bc, NaN), 1)
  147. t.equal(b.indexOf(buf_bc, -Infinity), 1)
  148. t.equal(b.indexOf(buf_bc, Infinity), -1)
  149. t.equal(b.indexOf(buf_f), b.length - 1)
  150. t.equal(b.indexOf(buf_z), -1)
  151. t.equal(b.indexOf(0x61), 0)
  152. t.equal(b.indexOf(0x61, 1), -1)
  153. t.equal(b.indexOf(0x61, -1), -1)
  154. t.equal(b.indexOf(0x61, -4), -1)
  155. t.equal(b.indexOf(0x61, -b.length), 0)
  156. t.equal(b.indexOf(0x61, NaN), 0)
  157. t.equal(b.indexOf(0x61, -Infinity), 0)
  158. t.equal(b.indexOf(0x61, Infinity), -1)
  159. t.equal(b.indexOf(0x0), -1)
  160. // test offsets
  161. t.equal(b.indexOf('d', 2), 3)
  162. t.equal(b.indexOf('f', 5), 5)
  163. t.equal(b.indexOf('f', -1), 5)
  164. t.equal(b.indexOf('f', 6), -1)
  165. t.equal(b.indexOf(Buffer.from('d'), 2), 3)
  166. t.equal(b.indexOf(Buffer.from('f'), 5), 5)
  167. t.equal(b.indexOf(Buffer.from('f'), -1), 5)
  168. t.equal(b.indexOf(Buffer.from('f'), 6), -1)
  169. t.equal(Buffer.from('ff').indexOf(Buffer.from('f'), 1, 'ucs2'), -1)
  170. // test invalid and uppercase encoding
  171. t.equal(b.indexOf('b', 'utf8'), 1)
  172. t.equal(b.indexOf('b', 'UTF8'), 1)
  173. t.equal(b.indexOf('62', 'HEX'), 1)
  174. t.throws(() => b.indexOf('bad', 'enc'), TypeError)
  175. // test hex encoding
  176. t.equal(
  177. Buffer.from(b.toString('hex'), 'hex')
  178. .indexOf('64', 0, 'hex'),
  179. 3
  180. )
  181. t.equal(
  182. Buffer.from(b.toString('hex'), 'hex')
  183. .indexOf(Buffer.from('64', 'hex'), 0, 'hex'),
  184. 3
  185. )
  186. // test base64 encoding
  187. t.equal(
  188. Buffer.from(b.toString('base64'), 'base64')
  189. .indexOf('ZA==', 0, 'base64'),
  190. 3
  191. )
  192. t.equal(
  193. Buffer.from(b.toString('base64'), 'base64')
  194. .indexOf(Buffer.from('ZA==', 'base64'), 0, 'base64'),
  195. 3
  196. )
  197. // test ascii encoding
  198. t.equal(
  199. Buffer.from(b.toString('ascii'), 'ascii')
  200. .indexOf('d', 0, 'ascii'),
  201. 3
  202. )
  203. t.equal(
  204. Buffer.from(b.toString('ascii'), 'ascii')
  205. .indexOf(Buffer.from('d', 'ascii'), 0, 'ascii'),
  206. 3
  207. )
  208. // test optional offset with passed encoding
  209. t.equal(Buffer.from('aaaa0').indexOf('30', 'hex'), 4)
  210. t.equal(Buffer.from('aaaa00a').indexOf('3030', 'hex'), 4)
  211. {
  212. // test usc2 encoding
  213. const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2')
  214. t.equal(8, twoByteString.indexOf('\u0395', 4, 'ucs2'))
  215. t.equal(6, twoByteString.indexOf('\u03a3', -4, 'ucs2'))
  216. t.equal(4, twoByteString.indexOf('\u03a3', -6, 'ucs2'))
  217. t.equal(4, twoByteString.indexOf(
  218. Buffer.from('\u03a3', 'ucs2'), -6, 'ucs2'))
  219. t.equal(-1, twoByteString.indexOf('\u03a3', -2, 'ucs2'))
  220. }
  221. const mixedByteStringUcs2 =
  222. Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2')
  223. t.equal(6, mixedByteStringUcs2.indexOf('bc', 0, 'ucs2'))
  224. t.equal(10, mixedByteStringUcs2.indexOf('\u03a3', 0, 'ucs2'))
  225. t.equal(-1, mixedByteStringUcs2.indexOf('\u0396', 0, 'ucs2'))
  226. t.equal(
  227. 6, mixedByteStringUcs2.indexOf(Buffer.from('bc', 'ucs2'), 0, 'ucs2'))
  228. t.equal(
  229. 10, mixedByteStringUcs2.indexOf(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2'))
  230. t.equal(
  231. -1, mixedByteStringUcs2.indexOf(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2'))
  232. {
  233. const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2')
  234. // Test single char pattern
  235. t.equal(0, twoByteString.indexOf('\u039a', 0, 'ucs2'))
  236. let index = twoByteString.indexOf('\u0391', 0, 'ucs2')
  237. t.equal(2, index, `Alpha - at index ${index}`)
  238. index = twoByteString.indexOf('\u03a3', 0, 'ucs2')
  239. t.equal(4, index, `First Sigma - at index ${index}`)
  240. index = twoByteString.indexOf('\u03a3', 6, 'ucs2')
  241. t.equal(6, index, `Second Sigma - at index ${index}`)
  242. index = twoByteString.indexOf('\u0395', 0, 'ucs2')
  243. t.equal(8, index, `Epsilon - at index ${index}`)
  244. index = twoByteString.indexOf('\u0392', 0, 'ucs2')
  245. t.equal(-1, index, `Not beta - at index ${index}`)
  246. // Test multi-char pattern
  247. index = twoByteString.indexOf('\u039a\u0391', 0, 'ucs2')
  248. t.equal(0, index, `Lambda Alpha - at index ${index}`)
  249. index = twoByteString.indexOf('\u0391\u03a3', 0, 'ucs2')
  250. t.equal(2, index, `Alpha Sigma - at index ${index}`)
  251. index = twoByteString.indexOf('\u03a3\u03a3', 0, 'ucs2')
  252. t.equal(4, index, `Sigma Sigma - at index ${index}`)
  253. index = twoByteString.indexOf('\u03a3\u0395', 0, 'ucs2')
  254. t.equal(6, index, `Sigma Epsilon - at index ${index}`)
  255. }
  256. const mixedByteStringUtf8 = Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395')
  257. t.equal(5, mixedByteStringUtf8.indexOf('bc'))
  258. t.equal(5, mixedByteStringUtf8.indexOf('bc', 5))
  259. t.equal(5, mixedByteStringUtf8.indexOf('bc', -8))
  260. t.equal(7, mixedByteStringUtf8.indexOf('\u03a3'))
  261. t.equal(-1, mixedByteStringUtf8.indexOf('\u0396'))
  262. // Test complex string indexOf algorithms. Only trigger for long strings.
  263. // Long string that isn't a simple repeat of a shorter string.
  264. let longString = 'A'
  265. for (let i = 66; i < 76; i++) { // from 'B' to 'K'
  266. longString = longString + String.fromCharCode(i) + longString
  267. }
  268. const longBufferString = Buffer.from(longString)
  269. // pattern of 15 chars, repeated every 16 chars in long
  270. let pattern = 'ABACABADABACABA'
  271. for (let i = 0; i < longBufferString.length - pattern.length; i += 7) {
  272. const index = longBufferString.indexOf(pattern, i)
  273. t.equal((i + 15) & ~0xf, index,
  274. `Long ABACABA...-string at index ${i}`)
  275. }
  276. let index = longBufferString.indexOf('AJABACA')
  277. t.equal(510, index, `Long AJABACA, First J - at index ${index}`)
  278. index = longBufferString.indexOf('AJABACA', 511)
  279. t.equal(1534, index, `Long AJABACA, Second J - at index ${index}`)
  280. pattern = 'JABACABADABACABA'
  281. index = longBufferString.indexOf(pattern)
  282. t.equal(511, index, `Long JABACABA..., First J - at index ${index}`)
  283. index = longBufferString.indexOf(pattern, 512)
  284. t.equal(
  285. 1535, index, `Long JABACABA..., Second J - at index ${index}`)
  286. // Search for a non-ASCII string in a pure ASCII string.
  287. const asciiString = Buffer.from(
  288. 'arglebargleglopglyfarglebargleglopglyfarglebargleglopglyf')
  289. t.equal(-1, asciiString.indexOf('\x2061'))
  290. t.equal(3, asciiString.indexOf('leb', 0))
  291. // Search in string containing many non-ASCII chars.
  292. const allCodePoints = []
  293. for (let i = 0; i < 65536; i++) allCodePoints[i] = i
  294. const allCharsString = String.fromCharCode.apply(String, allCodePoints)
  295. const allCharsBufferUtf8 = Buffer.from(allCharsString)
  296. const allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2')
  297. // Search for string long enough to trigger complex search with ASCII pattern
  298. // and UC16 subject.
  299. t.equal(-1, allCharsBufferUtf8.indexOf('notfound'))
  300. t.equal(-1, allCharsBufferUcs2.indexOf('notfound'))
  301. // Needle is longer than haystack, but only because it's encoded as UTF-16
  302. t.equal(Buffer.from('aaaa').indexOf('a'.repeat(4), 'ucs2'), -1)
  303. t.equal(Buffer.from('aaaa').indexOf('a'.repeat(4), 'utf8'), 0)
  304. t.equal(Buffer.from('aaaa').indexOf('你好', 'ucs2'), -1)
  305. // Haystack has odd length, but the needle is UCS2.
  306. t.equal(Buffer.from('aaaaa').indexOf('b', 'ucs2'), -1)
  307. {
  308. // Find substrings in Utf8.
  309. const lengths = [1, 3, 15]; // Single char, simple and complex.
  310. const indices = [0x5, 0x60, 0x400, 0x680, 0x7ee, 0xFF02, 0x16610, 0x2f77b]
  311. for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) {
  312. for (let i = 0; i < indices.length; i++) {
  313. const index = indices[i]
  314. let length = lengths[lengthIndex]
  315. if (index + length > 0x7F) {
  316. length = 2 * length
  317. }
  318. if (index + length > 0x7FF) {
  319. length = 3 * length
  320. }
  321. if (index + length > 0xFFFF) {
  322. length = 4 * length
  323. }
  324. const patternBufferUtf8 = allCharsBufferUtf8.slice(index, index + length)
  325. t.equal(index, allCharsBufferUtf8.indexOf(patternBufferUtf8))
  326. const patternStringUtf8 = patternBufferUtf8.toString()
  327. t.equal(index, allCharsBufferUtf8.indexOf(patternStringUtf8))
  328. }
  329. }
  330. }
  331. {
  332. // Find substrings in Usc2.
  333. const lengths = [2, 4, 16]; // Single char, simple and complex.
  334. const indices = [0x5, 0x65, 0x105, 0x205, 0x285, 0x2005, 0x2085, 0xfff0]
  335. for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) {
  336. for (let i = 0; i < indices.length; i++) {
  337. const index = indices[i] * 2
  338. const length = lengths[lengthIndex]
  339. const patternBufferUcs2 =
  340. allCharsBufferUcs2.slice(index, index + length)
  341. t.equal(
  342. index, allCharsBufferUcs2.indexOf(patternBufferUcs2, 0, 'ucs2'))
  343. const patternStringUcs2 = patternBufferUcs2.toString('ucs2')
  344. t.equal(
  345. index, allCharsBufferUcs2.indexOf(patternStringUcs2, 0, 'ucs2'))
  346. }
  347. }
  348. }
  349. [
  350. () => {},
  351. {},
  352. []
  353. ].forEach(val => {
  354. debugger
  355. t.throws(() => b.indexOf(val), TypeError, `"${JSON.stringify(val)}" should throw`)
  356. })
  357. // Test weird offset arguments.
  358. // The following offsets coerce to NaN or 0, searching the whole Buffer
  359. t.equal(b.indexOf('b', undefined), 1)
  360. t.equal(b.indexOf('b', {}), 1)
  361. t.equal(b.indexOf('b', 0), 1)
  362. t.equal(b.indexOf('b', null), 1)
  363. t.equal(b.indexOf('b', []), 1)
  364. // The following offset coerces to 2, in other words +[2] === 2
  365. t.equal(b.indexOf('b', [2]), -1)
  366. // Behavior should match String.indexOf()
  367. t.equal(
  368. b.indexOf('b', undefined),
  369. stringComparison.indexOf('b', undefined))
  370. t.equal(
  371. b.indexOf('b', {}),
  372. stringComparison.indexOf('b', {}))
  373. t.equal(
  374. b.indexOf('b', 0),
  375. stringComparison.indexOf('b', 0))
  376. t.equal(
  377. b.indexOf('b', null),
  378. stringComparison.indexOf('b', null))
  379. t.equal(
  380. b.indexOf('b', []),
  381. stringComparison.indexOf('b', []))
  382. t.equal(
  383. b.indexOf('b', [2]),
  384. stringComparison.indexOf('b', [2]))
  385. // test truncation of Number arguments to uint8
  386. {
  387. const buf = Buffer.from('this is a test')
  388. t.equal(buf.indexOf(0x6973), 3)
  389. t.equal(buf.indexOf(0x697320), 4)
  390. t.equal(buf.indexOf(0x69732069), 2)
  391. t.equal(buf.indexOf(0x697374657374), 0)
  392. t.equal(buf.indexOf(0x69737374), 0)
  393. t.equal(buf.indexOf(0x69737465), 11)
  394. t.equal(buf.indexOf(0x69737465), 11)
  395. t.equal(buf.indexOf(-140), 0)
  396. t.equal(buf.indexOf(-152), 1)
  397. t.equal(buf.indexOf(0xff), -1)
  398. t.equal(buf.indexOf(0xffff), -1)
  399. }
  400. // Test that Uint8Array arguments are okay.
  401. {
  402. const needle = new Uint8Array([ 0x66, 0x6f, 0x6f ])
  403. const haystack = new BufferList(Buffer.from('a foo b foo'))
  404. t.equal(haystack.indexOf(needle), 2)
  405. }
  406. t.end()
  407. })