import * as marked from 'marked' import {FrontMatter, isNamedSection, Link, MarkMark, Section} from './types' export class Parser { public parse(content: string): MarkMark { const mm: MarkMark = { frontmatter: { syntax: 'v1', }, sections: [], } let foundFrontmatter: boolean = false let currentSection: Section = { links: [] } let currentLink: Link|undefined let sectionListItemsRemaining: number = 0 let linkListItemsRemaining: number = 0 const walkTokens = (token: marked.marked.Token) => { // Parse out the front-matter if ( token.type === 'paragraph' && !foundFrontmatter && token.raw.trim().startsWith('[//]:') ) { mm.frontmatter = this.parseFrontmatter(token.raw.trim()) foundFrontmatter = true return } // When we encounter a heading, start a new section if ( token.type === 'heading' ) { if ( currentSection.links.length ) mm.sections.push(currentSection) currentSection = { title: token.text, links: [] } return } // When we encounter a non-frontmatter paragraph and we're in a section, // assume it's the description for the section if ( token.type === 'paragraph' && isNamedSection(currentSection) && !token.raw.trim().startsWith('[//]:') ) { currentSection.description = token.raw return } // If we're not currently parsing a section and we encounter a list, // start parsing that list (grab the # of items in the list) if ( !sectionListItemsRemaining && token.type === 'list' ) { token.items.map(listItem => { listItem.tokens.map(token => { // Explicitly mark the top-level text/list tokens as "section" items // to prevent double-counting. This is because `marked` parses text //
  • 's as a text-w/in-a-text. (token as any).mmIsSectionLevel = true }) }) sectionListItemsRemaining = token.items.length + 1 return // to avoid conflict with linkListItemsRemaining } // If we're parsing a section list and we're NOT parsing a link's URL list // and we encounter some text, assume it's the name of a link and start parsing it if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'text' && (token as any).mmIsSectionLevel ) { currentLink = { title: token.text.split(' #')[0].trim(), tags: this.parseTags(token.text), urls: [], } sectionListItemsRemaining -= 1 return } // If we're parsing a section list but not a link URL list and we encounter a list, // assume it's the inner list of link URLs and start parsing it if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'list' ) { linkListItemsRemaining = token.items.length + 1 } // If we're parsing the URL list for a link and we encounter a link, // add its URL to the URLs for currentLink if ( currentLink && sectionListItemsRemaining && linkListItemsRemaining && token.type === 'link' ) { currentLink.urls.push(token.href) linkListItemsRemaining -= 1 } // If we were parsing a link and we ran out of URLs for the link, // stop parsing that link and push it into the section if ( currentLink && linkListItemsRemaining === 1 ) { linkListItemsRemaining = 0 currentSection.links.push(currentLink) currentLink = undefined // If that was the last link in the section, end the section if ( sectionListItemsRemaining === 1 ) { mm.sections.push(currentSection) sectionListItemsRemaining = 0 currentSection = { links: [] } } } } marked.marked.use({ walkTokens }) marked.marked.parse(content) mm.sections.push(currentSection) mm.sections = mm.sections.filter(s => s.links.length) return mm } protected parseFrontmatter(text: string): FrontMatter { const fm: FrontMatter = { syntax: 'v1', } const matcher = /\[\/\/]:\s+#\(([a-zA-Z0-9_\-]+):\s+(.*)\)/g const rawFrontmatter: Record = [...text.matchAll(matcher)] .map(match => ({[match[1]]: match[2]})) .reduce((carry, current) => ({...carry, ...current}), {}) if ( rawFrontmatter['markmark-author-name'] ) fm.authorName = rawFrontmatter['markmark-author-name'] if ( rawFrontmatter['markmark-author-email'] ) fm.authorEmail = rawFrontmatter['markmark-author-email'] if ( rawFrontmatter['markmark-author-href'] ) fm.authorHref = rawFrontmatter['markmark-author-href'] return fm } protected parseTags(text: string): string[] { const matcher = /#([a-zA-Z0-9_\-]+)/g return [...text.matchAll(matcher)].map(x => x[1]) } }