index.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. /*
  2. * md ast - pluggable markdown parser
  3. */
  4. const syntax = {
  5. bold: {
  6. paired: true,
  7. recursive: true,
  8. startRegexp: /\*\*\S.*/,
  9. endRegexp: /\*\*\W/,
  10. content: {
  11. start: {
  12. point: 'start',
  13. offset: 2
  14. },
  15. end: {
  16. point: 'start',
  17. offset: 0
  18. }
  19. },
  20. begin: 0,
  21. forward: {
  22. point: 'endEnd', //start, startEnd, end, endEnd
  23. offset: -1
  24. }
  25. },
  26. bold2: {
  27. paired: true,
  28. recursive: true,
  29. startRegexp: /\s__\S.*/,
  30. endRegexp: /__\W/,
  31. content: {
  32. start: {
  33. point: 'start',
  34. offset: 3
  35. },
  36. end: {
  37. point: 'start',
  38. offset: 0
  39. }
  40. },
  41. begin: 1,
  42. forward: {
  43. point: 'endEnd', //start, startEnd, end, endEnd
  44. offset: -1
  45. }
  46. },
  47. italic: {
  48. paired: true,
  49. recursive: true,
  50. startRegexp: /\*\S.*/,
  51. endRegexp: /\S\*[^*]/,
  52. content: {
  53. start: {
  54. point: 'start',
  55. offset: 1
  56. },
  57. end: {
  58. point: 'start',
  59. offset: 1
  60. }
  61. },
  62. begin: 0,
  63. forward: {
  64. point: 'endEnd', //start, startEnd, end, endEnd
  65. offset: -1
  66. }
  67. },
  68. italic2: {
  69. paired: true,
  70. recursive: true,
  71. startRegexp: /\s_\S.*/,
  72. endRegexp: /\S_\W/,
  73. content: {
  74. start: {
  75. point: 'start',
  76. offset: 2
  77. },
  78. end: {
  79. point: 'start',
  80. offset: 1
  81. }
  82. },
  83. begin: 1,
  84. forward: {
  85. point: 'endEnd', //start, startEnd, end, endEnd
  86. offset: -1
  87. }
  88. },
  89. root: {
  90. paired: true,
  91. recursive: true,
  92. startRegexp: /^/,
  93. endRegexp: /$/,
  94. content: {
  95. start: {
  96. point: 'start',
  97. offset: 1
  98. },
  99. end: {
  100. point: 'end',
  101. offset: 0
  102. }
  103. },
  104. begin: 0,
  105. forward: {
  106. point: 'endEnd', //start, startEnd, end, endEnd
  107. offset: -1
  108. }
  109. },
  110. heading6: {
  111. paired: true,
  112. recursive: true,
  113. startRegexp: /\n######[ \t]*(.*)\n/,
  114. endRegexp: /\n#\s/,
  115. content: {
  116. start: {
  117. point: 'end',
  118. offset: -1
  119. },
  120. end: {
  121. point: 'start',
  122. offset: 0
  123. }
  124. },
  125. begin: 0,
  126. forward: {
  127. point: 'end', //start, startEnd, end, endEnd
  128. offset: 0
  129. },
  130. title: {
  131. //index: 1,
  132. recursive: true,
  133. },
  134. onbuild(md, mdTags, buildAST){ //this = {tag: }
  135. }
  136. },
  137. heading5: {
  138. paired: true,
  139. recursive: true,
  140. startRegexp: /\n#####[ \t]*(.*)\n/,
  141. endRegexp: /\n#{1,5}\s/,
  142. content: {
  143. start: {
  144. point: 'end',
  145. offset: -1
  146. },
  147. end: {
  148. point: 'start',
  149. offset: 0
  150. }
  151. },
  152. begin: 0,
  153. forward: {
  154. point: 'end', //start, startEnd, end, endEnd
  155. offset: 0
  156. },
  157. title: {
  158. //index: 1,
  159. recursive: true,
  160. },
  161. onbuild(md, mdTags, buildAST){ //this = {tag: }
  162. }
  163. },
  164. heading4: {
  165. paired: true,
  166. recursive: true,
  167. startRegexp: /\n####[ \t]*(.*)\n/,
  168. endRegexp: /\n#{1,4}\s/,
  169. content: {
  170. start: {
  171. point: 'end',
  172. offset: -1
  173. },
  174. end: {
  175. point: 'start',
  176. offset: 0
  177. }
  178. },
  179. begin: 0,
  180. forward: {
  181. point: 'end', //start, startEnd, end, endEnd
  182. offset: 0
  183. },
  184. title: {
  185. //index: 1,
  186. recursive: true,
  187. },
  188. onbuild(md, mdTags, buildAST){ //this = {tag: }
  189. }
  190. },
  191. heading3: {
  192. paired: true,
  193. recursive: true,
  194. startRegexp: /\n###[ \t]*(.*)\n/,
  195. endRegexp: /\n#{1,3}\s/,
  196. content: {
  197. start: {
  198. point: 'end',
  199. offset: -1
  200. },
  201. end: {
  202. point: 'start',
  203. offset: 0
  204. }
  205. },
  206. begin: 0,
  207. forward: {
  208. point: 'end', //start, startEnd, end, endEnd
  209. offset: 0
  210. },
  211. title: {
  212. //index: 1,
  213. recursive: true,
  214. },
  215. onbuild(md, mdTags, buildAST){ //this = {tag: }
  216. }
  217. },
  218. heading2: {
  219. paired: true,
  220. recursive: true,
  221. startRegexp: /\n##[ \t]*(.*)\n/,
  222. endRegexp: /\n#{1,2}\s/,
  223. content: {
  224. start: {
  225. point: 'end',
  226. offset: -1
  227. },
  228. end: {
  229. point: 'start',
  230. offset: 0
  231. }
  232. },
  233. begin: 0,
  234. forward: {
  235. point: 'end', //start, startEnd, end, endEnd
  236. offset: 0
  237. },
  238. title: {
  239. //index: 1,
  240. recursive: true,
  241. },
  242. onbuild(md, mdTags, buildAST){ //this = {tag: }
  243. }
  244. },
  245. heading1: {
  246. paired: true,
  247. recursive: true,
  248. startRegexp: /\n#[ \t]*(.*)\n/,
  249. endRegexp: /\n#\s/,
  250. content: {
  251. start: {
  252. point: 'end',
  253. offset: -1
  254. },
  255. end: {
  256. point: 'start',
  257. offset: 0
  258. }
  259. },
  260. begin: -1,
  261. forward: {
  262. point: 'end', //start, startEnd, end, endEnd
  263. offset: 0
  264. },
  265. title: {
  266. //index: 1,
  267. recursive: true,
  268. },
  269. onbuild(md, mdTags, buildAST){ //this = {tag: }
  270. }
  271. },
  272. code: {
  273. paired: true,
  274. recursive: false,
  275. startRegexp: /`/,
  276. endRegexp: /`/,
  277. content: {
  278. start: {
  279. point: 'start',
  280. offset: 1
  281. },
  282. end: {
  283. point: 'start',
  284. offset: 0
  285. }
  286. },
  287. begin: 0,
  288. forward: {
  289. point: 'end', //start, startEnd, end, endEnd
  290. offset: 1
  291. }
  292. },
  293. codeMultiLine: {
  294. paired: true,
  295. recursive: false,
  296. startRegexp: /\n```\s*\n/,
  297. endRegexp: /\n```\s*\n/,
  298. content:{
  299. start:{
  300. point: 'end',
  301. offset: 0
  302. },
  303. end:{
  304. point: 'start',
  305. offset: 0
  306. }
  307. },
  308. begin: 1,
  309. forward: {
  310. point: 'endEnd',
  311. offset: 0
  312. },
  313. },
  314. codeLanguage: {
  315. paired: true,
  316. recursive: false,
  317. startRegexp: /\n```(\w+)\s*\n/,
  318. endRegexp: /\n```\s*\n/,
  319. title: {
  320. recursive: false
  321. },
  322. content:{
  323. start:{
  324. point: 'end',
  325. offset: 0
  326. },
  327. end:{
  328. point: 'start',
  329. offset: 0
  330. }
  331. },
  332. begin: 1,
  333. forward: {
  334. point: 'endEnd',
  335. offset: 0
  336. },
  337. },
  338. unOrderedList: {
  339. indent: true,
  340. childName: 'unOrderedListItem',
  341. //paired: true,
  342. recursive: true,
  343. regexp: /-\s*\S/,
  344. content:{
  345. start:{
  346. point: 'end',
  347. offset: -1
  348. },
  349. end:{
  350. point: 'start',
  351. offset: 0
  352. }
  353. },
  354. begin: 1,
  355. forward: {
  356. point: 'end',
  357. offset: 0
  358. }
  359. },
  360. orderedList: {
  361. indent: true,
  362. childName: 'orderedListItem',
  363. //paired: true,
  364. recursive: true,
  365. regexp: /\d+\.\s*\S/,
  366. content:{
  367. start:{
  368. point: 'end',
  369. offset: -1
  370. },
  371. end:{
  372. point: 'start',
  373. offset: 0
  374. }
  375. },
  376. begin: 1,
  377. forward: {
  378. point: 'end',
  379. offset: 0
  380. }
  381. }
  382. }
  383. const indentRegexp = (regexp,count) => new RegExp(`\\n(\\s${count === undefined ? '*' : `{${count}}` })` + regexp.toString().slice(1,-1))
  384. const indentEndRegexp = (count) => new RegExp(`\\n(\\s${count === undefined ? '*' : `{${count}}` })\\S`)
  385. function findNearest(md, mdTags, offset=0){
  386. let nearest, nearestMatch = {index: Infinity};
  387. for (let [mdTag, {paired,
  388. startRegexp,
  389. regexp, indent}] of Object.entries(mdTags)) {
  390. if (mdTag === 'root') continue;
  391. regexp = startRegexp || regexp
  392. regexp = indent ? indentRegexp(regexp) : regexp
  393. let match = md.offsetMatch(offset, regexp)
  394. if (match && match.index < nearestMatch.index){
  395. nearestMatch = match
  396. nearest = mdTag
  397. }
  398. }
  399. return [nearest, nearestMatch]
  400. }
  401. //node:
  402. //{
  403. // tag: 'keyFromSyntax',
  404. // children: [String, Node]
  405. // parent: node
  406. //}
  407. //
  408. String.prototype.offsetMatch = function(offset, ...params){
  409. return this.slice(offset).match(...params)
  410. }
  411. Array.prototype.last = function(amount=-1){
  412. return this[this.length +amount]
  413. }
  414. String.prototype.cutIndent = function(indent){
  415. let lines = this.split('\n').map(line => line.slice(0, indent).match(/^\s*$/) ? line.slice(indent) : line)
  416. return lines.join('\n')
  417. }
  418. function buildAST(md, mdTags=syntax, offset=0, tree={tag: 'root'}, stack=[]){
  419. let currentNode = stack.last() || tree
  420. if (currentNode.tag === 'root') md = '\n' + md + '\n'
  421. currentNode.children = currentNode.children || []
  422. const { children } = currentNode
  423. let {indent, childName, title, recursive, regexp, endRegexp, content: {end: {offset: offsetEnd, point} }, forward } = mdTags[currentNode.tag]
  424. if (indent){
  425. if (currentNode.parent.tag !== currentNode.tag){ //li
  426. let { parent: {children: siblings} } = currentNode
  427. if (siblings.length > 1 && siblings.last(-2).tag === currentNode.tag){
  428. siblings.pop()
  429. currentNode = siblings.last()
  430. }
  431. const { children } = currentNode
  432. const indentLength = currentNode.startMatch[1].length
  433. console.log(indentLength)
  434. currentNode.indentLength = indentLength
  435. endRegexp = indentEndRegexp(indentLength)
  436. let endMatch = md.offsetMatch(offset, endRegexp) || {index: md.length +1, 0: 'zzz'}
  437. let listMD = md.slice(offset, endMatch.index + offset).cutIndent(currentNode.startMatch[0].length -1)
  438. const newNode = {tag: childName, startOffset: offset, parent: currentNode, startMatch: currentNode.startMatch}
  439. children.push(newNode)
  440. newNode.children = buildAST(listMD, mdTags).children
  441. newNode.children.forEach(item => item.parent = currentNode)
  442. offset = newNode.endOffset = currentNode.endOffset = endMatch.index + offset
  443. }
  444. }
  445. if (title){
  446. const {index=1, recursive} = title
  447. const {[index]: titleContent } = currentNode.startMatch
  448. if (titleContent && recursive){
  449. currentNode.title = buildAST(titleContent, mdTags).children
  450. currentNode.title.forEach(item => item.parent = currentNode)
  451. }
  452. else {
  453. currentNode.title = [titleContent]
  454. }
  455. }
  456. while(offset < md.length){
  457. const [nearest, nearestMatch] = findNearest(md, mdTags, offset)
  458. let endMatch = md.offsetMatch(offset, endRegexp)
  459. if (!recursive || endMatch) { //if we (should) find closing tag
  460. if (!recursive || !nearest || endMatch.index <= nearestMatch.index ){ //if closing tag closer than new nested tag
  461. endMatch = endMatch || {index: md.length - offset, 0: "zzz"}
  462. currentNode.endContent = offset + endMatch.index + offsetEnd + (point === 'end' ? endMatch[0].length : 0)
  463. offset !== currentNode.endContent && children.push(md.slice(offset, currentNode.endContent))
  464. offset += endMatch.index + forward.offset + (forward.point === 'endEnd' ? endMatch[0].length : 0)
  465. currentNode.endOffset = offset
  466. currentNode.endMatch = endMatch
  467. return currentNode
  468. }
  469. }
  470. if (nearest){ //new nested tag
  471. const {begin,content: {start}} = mdTags[nearest]
  472. if (nearestMatch.index){ //if just text before nested tag
  473. nearestMatch.index + begin > 0 && children.push(md.slice(offset, offset + nearestMatch.index + begin))
  474. offset += nearestMatch.index
  475. }
  476. else { //if new tag right under cursor (offset)
  477. let newNode = {tag: nearest, startOffset: offset, parent: currentNode, startMatch: nearestMatch}
  478. children.push(newNode)
  479. newNode = buildAST(md, mdTags, offset + start.offset + (start.point === 'end' ? nearestMatch[0].length : 0), tree, [...stack, newNode])
  480. offset = newNode.endOffset
  481. }
  482. }
  483. else { //no nearest - rest of line to children as text
  484. children.push(md.slice(offset))
  485. offset = md.length
  486. }
  487. }
  488. return currentNode
  489. }
  490. const Heading = ({react:React, children, title, node: {tag}}) => {
  491. const level = +tag.slice(-1)
  492. const _ = React.createElement.bind(React)
  493. if (isNaN(level)) throw new SyntaxError('wrong heading name')
  494. return _(React.Fragment, null,
  495. _(`h${level}`, null, ...title),
  496. _(`div`, null, ...children)
  497. )
  498. }
  499. const defaultMapMDToComponents = {
  500. heading1: Heading,
  501. heading2: Heading,
  502. heading3: Heading,
  503. heading4: Heading,
  504. heading5: Heading,
  505. heading6: Heading,
  506. bold1: "strong",
  507. bold2: "strong",
  508. italic1: "i",
  509. italic2: "i",
  510. unOrderedList: 'ul',
  511. orderedList: 'ol',
  512. unOrderedListItem: 'li',
  513. orderedListItem: 'li',
  514. code: 'code',
  515. codeMultiLine: 'pre',
  516. codeLanguage: 'pre',
  517. root: ""
  518. }
  519. function toReact(ast, React, mapMDToComponents=defaultMapMDToComponents){
  520. const gC = (tag, c) => (c = mapMDToComponents[tag]) ? c : (c === "" ? React.Fragment : "span")
  521. const RenderComponent = gC(ast.tag)
  522. const _ = React.createElement.bind(React)
  523. const childToReact = child => typeof child === 'string' ? child :
  524. toReact(child, React, mapMDToComponents)
  525. return _(RenderComponent, {node: ast,
  526. key: Math.random(),
  527. children: ast.children.map(childToReact),
  528. title: ast.title && ast.title.map(childToReact),
  529. react: React})
  530. }
  531. window.module && (module.exports = {
  532. buildAST,
  533. toReact
  534. })
  535. console.log(Object.keys(syntax))
  536. //const md =
  537. //`
  538. //# heading1
  539. //какой-то _текст_
  540. //# heading2
  541. //а тут **шо** цикавого?)))
  542. //`;
  543. //console.log( buildAST(md).children)