123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- /*
- * md ast - pluggable markdown parser
- */
- const syntax = {
- bold: {
- paired: true,
- recursive: true,
- startRegexp: /\*\*\S.*/,
- endRegexp: /\*\*\W/,
- content: {
- start: {
- point: 'start',
- offset: 2
- },
- end: {
- point: 'start',
- offset: 0
- }
- },
- begin: 0,
- forward: {
- point: 'endEnd', //start, startEnd, end, endEnd
- offset: -1
- }
- },
- bold2: {
- paired: true,
- recursive: true,
- startRegexp: /\s__\S.*/,
- endRegexp: /__\W/,
- content: {
- start: {
- point: 'start',
- offset: 3
- },
- end: {
- point: 'start',
- offset: 0
- }
- },
- begin: 1,
- forward: {
- point: 'endEnd', //start, startEnd, end, endEnd
- offset: -1
- }
- },
- italic: {
- paired: true,
- recursive: true,
- startRegexp: /\*\S.*/,
- endRegexp: /\S\*[^*]/,
- content: {
- start: {
- point: 'start',
- offset: 1
- },
- end: {
- point: 'start',
- offset: 1
- }
- },
- begin: 0,
- forward: {
- point: 'endEnd', //start, startEnd, end, endEnd
- offset: -1
- }
- },
- italic2: {
- paired: true,
- recursive: true,
- startRegexp: /\s_\S.*/,
- endRegexp: /\S_\W/,
- content: {
- start: {
- point: 'start',
- offset: 2
- },
- end: {
- point: 'start',
- offset: 1
- }
- },
- begin: 1,
- forward: {
- point: 'endEnd', //start, startEnd, end, endEnd
- offset: -1
- }
- },
- root: {
- paired: true,
- recursive: true,
- startRegexp: /^/,
- endRegexp: /$/,
- content: {
- start: {
- point: 'start',
- offset: 1
- },
- end: {
- point: 'end',
- offset: 0
- }
- },
- begin: 0,
- forward: {
- point: 'endEnd', //start, startEnd, end, endEnd
- offset: -1
- }
- },
- heading1: {
- paired: true,
- recursive: true,
- startRegexp: /\n#[ \t]*(.*)\n/,
- endRegexp: /\n#/,
- content: {
- start: {
- point: 'end',
- offset: 0
- },
- end: {
- point: 'start',
- offset: 0
- }
- },
- begin: 1,
- forward: {
- point: 'end', //start, startEnd, end, endEnd
- offset: 0
- },
- title: {
- //index: 1,
- recursive: true,
- },
- onbuild(md, mdTags, buildAST){ //this = {tag: }
- }
- },
- code: {
- paired: true,
- recursive: false,
- startRegexp: /`/,
- endRegexp: /`/,
- content: {
- start: {
- point: 'start',
- offset: 1
- },
- end: {
- point: 'start',
- offset: 0
- }
- },
- begin: 0,
- forward: {
- point: 'end', //start, startEnd, end, endEnd
- offset: 1
- }
- },
- codeMultiLine: {
- paired: true,
- recursive: false,
- startRegexp: /\n```\s*\n/,
- endRegexp: /\n```\s*\n/,
- content:{
- start:{
- point: 'end',
- offset: 0
- },
- end:{
- point: 'start',
- offset: 0
- }
- },
- begin: 1,
- forward: {
- point: 'endEnd',
- offset: 0
- },
- },
- codeLanguage: {
- paired: true,
- recursive: false,
- startRegexp: /\n```(\w+)\s*\n/,
- endRegexp: /\n```\s*\n/,
- title: {
- recursive: false
- },
- content:{
- start:{
- point: 'end',
- offset: 0
- },
- end:{
- point: 'start',
- offset: 0
- }
- },
- begin: 1,
- forward: {
- point: 'endEnd',
- offset: 0
- },
- },
- unOrderedList: {
- indent: true,
- childName: 'unOrderedListItem',
- //paired: true,
- recursive: true,
- regexp: /-\s*/,
- content:{
- start:{
- point: 'end',
- offset: 0
- },
- end:{
- point: 'start',
- offset: 0
- }
- },
- begin: 1,
- forward: {
- point: 'end',
- offset: -1
- }
- }
- }
- const indentRegexp = (regexp,count) => new RegExp(`\\n(\s${count === undefined ? '*' : `{${count}}` })` + regexp.toString().slice(1,-1))
- function findNearest(md, mdTags, offset=0){
- let nearest, nearestMatch = {index: Infinity};
- for (let [mdTag, {paired,
- startRegexp,
- regexp, indent}] of Object.entries(mdTags)) {
- if (mdTag === 'root') continue;
- regexp = startRegexp || regexp
- regexp = indent ? indentRegexp(regexp) : regexp
- let match = md.offsetMatch(offset, regexp)
- if (match && match.index < nearestMatch.index){
- nearestMatch = match
- nearest = mdTag
- }
- }
- return [nearest, nearestMatch]
- }
- //node:
- //{
- // tag: 'keyFromSyntax',
- // children: [String, Node]
- // parent: node
- //}
- //
- String.prototype.offsetMatch = function(offset, ...params){
- return this.slice(offset).match(...params)
- }
- Array.prototype.last = function(){
- return this[this.length -1]
- }
- String.prototype.cutIndent = function(indent){
- let lines = this.split('\n').map(line => line.slice(0, indent).match(/^\s*$/) ? line.slice(indent) : line)
- return lines.join('\n')
- }
- function buildAST(md, mdTags=syntax, offset=0, tree={tag: 'root'}, stack=[]){
- const currentNode = stack.last() || tree
- if (currentNode.tag === 'root') md = '\n' + md + '\n'
- currentNode.children = currentNode.children || []
- const { children } = currentNode
- let {indent, title, recursive, regexp, endRegexp, content: {end: {offset: offsetEnd, point} }, forward } = mdTags[currentNode.tag]
- if (indent){
- if (currentNode.parent.tag == currentNode.tag){ //li
- }
- else { // ul or ol
- const indentLength = currentNode.startMatch[1].length
- endRegexp = indentRegexp(regexp, indentLength)
- let endMatch = md.offsetMatch(offset, endRegexp) || {index: md.length +1, 0: 'zzz'}
- let listMD = md.slice(offset, endMatch.index + offset).cutIndent(currentNode.startMatch[0].length -1)
- debugger;
- }
- }
-
- if (title){
- const {index=1, recursive} = title
- const {[index]: titleContent } = currentNode.startMatch
- if (titleContent && recursive){
- currentNode.title = buildAST(titleContent, mdTags).children
- currentNode.title.forEach(item => item.parent = currentNode)
- }
- else {
- currentNode.title = [titleContent]
- }
- }
- while(offset < md.length){
- const [nearest, nearestMatch] = findNearest(md, mdTags, offset)
- let endMatch = md.offsetMatch(offset, endRegexp)
- if (!recursive || endMatch) {
- if (!recursive || !nearest || endMatch.index <= nearestMatch.index ){
- endMatch = endMatch || {index: md.length - offset, 0: "zzz"}
- currentNode.endContent = offset + endMatch.index + offsetEnd + (point === 'end' ? endMatch[0].length : 0)
- children.push(md.slice(offset, currentNode.endContent))
- offset += endMatch.index + forward.offset + (forward.point === 'endEnd' ? endMatch[0].length : 0)
- console.log(currentNode.tag, forward.point === 'endEnd' ? endMatch[0].length : 0)
- currentNode.endOffset = offset
- currentNode.endMatch = endMatch
- return currentNode
- }
- }
- if (nearest){
- const {begin,content: {start}} = mdTags[nearest]
- if (nearestMatch.index){
- children.push(md.slice(offset, offset + nearestMatch.index + begin))
- offset += nearestMatch.index
- }
- else {
- const newNode = {tag: nearest, startOffset: offset, parent: currentNode, startMatch: nearestMatch}
- children.push(newNode)
- buildAST(md, mdTags, offset + start.offset + (start.point === 'end' ? nearestMatch[0].length : 0), tree, [...stack, newNode])
- offset = newNode.endOffset
- }
- }
- else {
- children.push(md.slice(offset))
- offset = md.length
- }
- }
- return currentNode
- }
- //const md =
- //`
- //# heading1
- //какой-то _текст_
- //# heading2
- //а тут **шо** цикавого?)))
- //`;
- //console.log( buildAST(md).children)
|