You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
5.5 KiB

import * as marked from 'marked'
import {FrontMatter, isNamedSection, Link, MarkMark, Section} from './types'
export class Parser {
public parse(content: string): MarkMark {
const mm: MarkMark = {
frontmatter: {
syntax: 'v1',
},
sections: [],
}
let foundFrontmatter: boolean = false
let currentSection: Section = { links: [] }
let currentLink: Link|undefined
let sectionListItemsRemaining: number = 0
let linkListItemsRemaining: number = 0
const walkTokens = (token: marked.marked.Token) => {
// Parse out the front-matter
if ( token.type === 'paragraph' && !foundFrontmatter && token.raw.trim().startsWith('[//]:') ) {
mm.frontmatter = this.parseFrontmatter(token.raw.trim())
foundFrontmatter = true
return
}
// When we encounter a heading, start a new section
if ( token.type === 'heading' ) {
if ( currentSection.links.length ) mm.sections.push(currentSection)
currentSection = {
title: token.text,
links: []
}
return
}
// When we encounter a non-frontmatter paragraph and we're in a section,
// assume it's the description for the section
if ( token.type === 'paragraph' && isNamedSection(currentSection) && !token.raw.trim().startsWith('[//]:') ) {
currentSection.description = token.raw
return
}
// If we're not currently parsing a section and we encounter a list,
// start parsing that list (grab the # of items in the list)
if ( !sectionListItemsRemaining && token.type === 'list' ) {
token.items.map(listItem => {
listItem.tokens.map(token => {
// Explicitly mark the top-level text/list tokens as "section" items
// to prevent double-counting. This is because `marked` parses text
// <li>'s as a text-w/in-a-text.
(token as any).mmIsSectionLevel = true
})
})
sectionListItemsRemaining = token.items.length + 1
return // to avoid conflict with linkListItemsRemaining
}
// If we're parsing a section list and we're NOT parsing a link's URL list
// and we encounter some text, assume it's the name of a link and start parsing it
if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'text' && (token as any).mmIsSectionLevel ) {
currentLink = {
title: token.text.split(' #')[0].trim(),
tags: this.parseTags(token.text),
urls: [],
}
sectionListItemsRemaining -= 1
return
}
// If we're parsing a section list but not a link URL list and we encounter a list,
// assume it's the inner list of link URLs and start parsing it
if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'list' ) {
linkListItemsRemaining = token.items.length + 1
}
// If we're parsing the URL list for a link and we encounter a link,
// add its URL to the URLs for currentLink
if ( currentLink && sectionListItemsRemaining && linkListItemsRemaining && token.type === 'link' ) {
currentLink.urls.push(token.href)
linkListItemsRemaining -= 1
}
// If we were parsing a link and we ran out of URLs for the link,
// stop parsing that link and push it into the section
if ( currentLink && linkListItemsRemaining === 1 ) {
linkListItemsRemaining = 0
currentSection.links.push(currentLink)
currentLink = undefined
// If that was the last link in the section, end the section
if ( sectionListItemsRemaining === 1 ) {
mm.sections.push(currentSection)
sectionListItemsRemaining = 0
currentSection = { links: [] }
}
}
}
marked.marked.use({ walkTokens })
marked.marked.parse(content)
mm.sections.push(currentSection)
mm.sections = mm.sections.filter(s => s.links.length)
return mm
}
protected parseFrontmatter(text: string): FrontMatter {
const fm: FrontMatter = {
syntax: 'v1',
}
const matcher = /\[\/\/]:\s+#\(([a-zA-Z0-9_\-]+):\s+(.*)\)/g
const rawFrontmatter: Record<string, string> =
[...text.matchAll(matcher)]
.map(match => ({[match[1]]: match[2]}))
.reduce((carry, current) => ({...carry, ...current}), {})
if ( rawFrontmatter['markmark-author-name'] ) fm.authorName = rawFrontmatter['markmark-author-name']
if ( rawFrontmatter['markmark-author-email'] ) fm.authorEmail = rawFrontmatter['markmark-author-email']
if ( rawFrontmatter['markmark-author-href'] ) fm.authorHref = rawFrontmatter['markmark-author-href']
return fm
}
protected parseTags(text: string): string[] {
const matcher = /#([a-zA-Z0-9_\-]+)/g
return [...text.matchAll(matcher)].map(x => x[1])
}
}