index.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. /*
  2. * md ast - pluggable markdown parser
  3. */
  4. const syntax = {
  5. bold: {
  6. paired: true,
  7. recursive: true,
  8. startRegexp: /\*\*\S.*/,
  9. endRegexp: /\*\*\W/,
  10. content: {
  11. start: {
  12. point: 'start',
  13. offset: 2
  14. },
  15. end: {
  16. point: 'start',
  17. offset: 0
  18. }
  19. },
  20. begin: 0,
  21. forward: {
  22. point: 'endEnd', //start, startEnd, end, endEnd
  23. offset: -1
  24. }
  25. },
  26. bold2: {
  27. paired: true,
  28. recursive: true,
  29. startRegexp: /\s__\S.*/,
  30. endRegexp: /__\W/,
  31. content: {
  32. start: {
  33. point: 'start',
  34. offset: 3
  35. },
  36. end: {
  37. point: 'start',
  38. offset: 0
  39. }
  40. },
  41. begin: 1,
  42. forward: {
  43. point: 'endEnd', //start, startEnd, end, endEnd
  44. offset: -1
  45. }
  46. },
  47. italic: {
  48. paired: true,
  49. recursive: true,
  50. startRegexp: /\*\S.*/,
  51. endRegexp: /\S\*[^*]/,
  52. content: {
  53. start: {
  54. point: 'start',
  55. offset: 1
  56. },
  57. end: {
  58. point: 'start',
  59. offset: 1
  60. }
  61. },
  62. begin: 0,
  63. forward: {
  64. point: 'endEnd', //start, startEnd, end, endEnd
  65. offset: -1
  66. }
  67. },
  68. italic2: {
  69. paired: true,
  70. recursive: true,
  71. startRegexp: /\s_\S.*/,
  72. endRegexp: /\S_\W/,
  73. content: {
  74. start: {
  75. point: 'start',
  76. offset: 2
  77. },
  78. end: {
  79. point: 'start',
  80. offset: 1
  81. }
  82. },
  83. begin: 1,
  84. forward: {
  85. point: 'endEnd', //start, startEnd, end, endEnd
  86. offset: -1
  87. }
  88. },
  89. root: {
  90. paired: true,
  91. recursive: true,
  92. startRegexp: /^/,
  93. endRegexp: /$/,
  94. content: {
  95. start: {
  96. point: 'start',
  97. offset: 1
  98. },
  99. end: {
  100. point: 'end',
  101. offset: 0
  102. }
  103. },
  104. begin: 0,
  105. forward: {
  106. point: 'endEnd', //start, startEnd, end, endEnd
  107. offset: -1
  108. }
  109. },
  110. heading1: {
  111. paired: true,
  112. recursive: true,
  113. startRegexp: /\n#[ \t]*(.*)\n/,
  114. endRegexp: /\n#/,
  115. content: {
  116. start: {
  117. point: 'end',
  118. offset: 0
  119. },
  120. end: {
  121. point: 'start',
  122. offset: 0
  123. }
  124. },
  125. begin: 1,
  126. forward: {
  127. point: 'end', //start, startEnd, end, endEnd
  128. offset: 0
  129. },
  130. title: {
  131. //index: 1,
  132. recursive: true,
  133. },
  134. onbuild(md, mdTags, buildAST){ //this = {tag: }
  135. }
  136. },
  137. code: {
  138. paired: true,
  139. recursive: false,
  140. startRegexp: /`/,
  141. endRegexp: /`/,
  142. content: {
  143. start: {
  144. point: 'start',
  145. offset: 1
  146. },
  147. end: {
  148. point: 'start',
  149. offset: 0
  150. }
  151. },
  152. begin: 0,
  153. forward: {
  154. point: 'end', //start, startEnd, end, endEnd
  155. offset: 1
  156. }
  157. },
  158. codeMultiLine: {
  159. paired: true,
  160. recursive: false,
  161. startRegexp: /\n```\s*\n/,
  162. endRegexp: /\n```\s*\n/,
  163. content:{
  164. start:{
  165. point: 'end',
  166. offset: 0
  167. },
  168. end:{
  169. point: 'start',
  170. offset: 0
  171. }
  172. },
  173. begin: 1,
  174. forward: {
  175. point: 'endEnd',
  176. offset: 0
  177. },
  178. },
  179. codeLanguage: {
  180. paired: true,
  181. recursive: false,
  182. startRegexp: /\n```(\w+)\s*\n/,
  183. endRegexp: /\n```\s*\n/,
  184. title: {
  185. recursive: false
  186. },
  187. content:{
  188. start:{
  189. point: 'end',
  190. offset: 0
  191. },
  192. end:{
  193. point: 'start',
  194. offset: 0
  195. }
  196. },
  197. begin: 1,
  198. forward: {
  199. point: 'endEnd',
  200. offset: 0
  201. },
  202. },
  203. unOrderedList: {
  204. indent: true,
  205. childName: 'unOrderedListItem',
  206. //paired: true,
  207. recursive: true,
  208. regexp: /-\s*\S/,
  209. content:{
  210. start:{
  211. point: 'end',
  212. offset: -1
  213. },
  214. end:{
  215. point: 'start',
  216. offset: 0
  217. }
  218. },
  219. begin: 1,
  220. forward: {
  221. point: 'end',
  222. offset: 0
  223. }
  224. }
  225. }
  226. const indentRegexp = (regexp,count) => new RegExp(`\\n(\\s${count === undefined ? '*' : `{${count}}` })` + regexp.toString().slice(1,-1))
  227. const indentEndRegexp = (count) => new RegExp(`\\n(\\s${count === undefined ? '*' : `{${count}}` })\\S`)
  228. function findNearest(md, mdTags, offset=0){
  229. let nearest, nearestMatch = {index: Infinity};
  230. for (let [mdTag, {paired,
  231. startRegexp,
  232. regexp, indent}] of Object.entries(mdTags)) {
  233. if (mdTag === 'root') continue;
  234. regexp = startRegexp || regexp
  235. regexp = indent ? indentRegexp(regexp) : regexp
  236. let match = md.offsetMatch(offset, regexp)
  237. if (match && match.index < nearestMatch.index){
  238. nearestMatch = match
  239. nearest = mdTag
  240. }
  241. }
  242. return [nearest, nearestMatch]
  243. }
  244. //node:
  245. //{
  246. // tag: 'keyFromSyntax',
  247. // children: [String, Node]
  248. // parent: node
  249. //}
  250. //
  251. String.prototype.offsetMatch = function(offset, ...params){
  252. return this.slice(offset).match(...params)
  253. }
  254. Array.prototype.last = function(amount=-1){
  255. return this[this.length +amount]
  256. }
  257. String.prototype.cutIndent = function(indent){
  258. let lines = this.split('\n').map(line => line.slice(0, indent).match(/^\s*$/) ? line.slice(indent) : line)
  259. return lines.join('\n')
  260. }
  261. function buildAST(md, mdTags=syntax, offset=0, tree={tag: 'root'}, stack=[]){
  262. let currentNode = stack.last() || tree
  263. if (currentNode.tag === 'root') md = '\n' + md + '\n'
  264. currentNode.children = currentNode.children || []
  265. const { children } = currentNode
  266. let {indent, childName, title, recursive, regexp, endRegexp, content: {end: {offset: offsetEnd, point} }, forward } = mdTags[currentNode.tag]
  267. if (indent){
  268. if (currentNode.parent.tag !== currentNode.tag){ //li
  269. let { parent: {children: siblings} } = currentNode
  270. if (siblings.length > 1 && siblings.last(-2).tag === currentNode.tag){
  271. siblings.pop()
  272. currentNode = siblings.last()
  273. }
  274. const { children } = currentNode
  275. const indentLength = currentNode.startMatch[1].length
  276. currentNode.indentLength = indentLength
  277. endRegexp = indentEndRegexp(indentLength)
  278. let endMatch = md.offsetMatch(offset, endRegexp) || {index: md.length +1, 0: 'zzz'}
  279. let listMD = md.slice(offset, endMatch.index + offset).cutIndent(currentNode.startMatch[0].length -1)
  280. const newNode = {tag: childName, startOffset: offset, parent: currentNode, startMatch: currentNode.startMatch}
  281. children.push(newNode)
  282. newNode.children = buildAST(listMD, mdTags).children
  283. newNode.children.forEach(item => item.parent = currentNode)
  284. offset = newNode.endOffset = currentNode.endOffset = endMatch.index + offset
  285. }
  286. }
  287. if (title){
  288. const {index=1, recursive} = title
  289. const {[index]: titleContent } = currentNode.startMatch
  290. if (titleContent && recursive){
  291. currentNode.title = buildAST(titleContent, mdTags).children
  292. currentNode.title.forEach(item => item.parent = currentNode)
  293. }
  294. else {
  295. currentNode.title = [titleContent]
  296. }
  297. }
  298. while(offset < md.length){
  299. const [nearest, nearestMatch] = findNearest(md, mdTags, offset)
  300. let endMatch = md.offsetMatch(offset, endRegexp)
  301. if (!recursive || endMatch) { //if we (should) find closing tag
  302. if (!recursive || !nearest || endMatch.index <= nearestMatch.index ){ //if closing tag closer than new nested tag
  303. endMatch = endMatch || {index: md.length - offset, 0: "zzz"}
  304. currentNode.endContent = offset + endMatch.index + offsetEnd + (point === 'end' ? endMatch[0].length : 0)
  305. offset !== currentNode.endContent && children.push(md.slice(offset, currentNode.endContent))
  306. offset += endMatch.index + forward.offset + (forward.point === 'endEnd' ? endMatch[0].length : 0)
  307. console.log(currentNode.tag, forward.point === 'endEnd' ? endMatch[0].length : 0)
  308. currentNode.endOffset = offset
  309. currentNode.endMatch = endMatch
  310. return currentNode
  311. }
  312. }
  313. if (nearest){ //new nested tag
  314. const {begin,content: {start}} = mdTags[nearest]
  315. if (nearestMatch.index){ //if just text before nested tag
  316. nearestMatch.index + begin > 0 && children.push(md.slice(offset, offset + nearestMatch.index + begin))
  317. offset += nearestMatch.index
  318. }
  319. else { //if new tag right under cursor (offset)
  320. let newNode = {tag: nearest, startOffset: offset, parent: currentNode, startMatch: nearestMatch}
  321. children.push(newNode)
  322. newNode = buildAST(md, mdTags, offset + start.offset + (start.point === 'end' ? nearestMatch[0].length : 0), tree, [...stack, newNode])
  323. offset = newNode.endOffset
  324. }
  325. }
  326. else { //no nearest - rest of line to children as text
  327. children.push(md.slice(offset))
  328. offset = md.length
  329. }
  330. }
  331. return currentNode
  332. }
  333. //const md =
  334. //`
  335. //# heading1
  336. //какой-то _текст_
  337. //# heading2
  338. //а тут **шо** цикавого?)))
  339. //`;
  340. //console.log( buildAST(md).children)