www/src/markmark/parser.ts

import * as marked from 'marked'
import {FrontMatter, isNamedSection, Link, MarkMark, Section} from './types'

export class Parser {
    public parse(content: string): MarkMark {
        const mm: MarkMark = {
            frontmatter: {
                syntax: 'v1',
            },
            sections: [],
        }

        let foundFrontmatter: boolean = false
        let currentSection: Section = { links: [] }
        let currentLink: Link|undefined
        let sectionListItemsRemaining: number = 0
        let linkListItemsRemaining: number = 0
        const walkTokens = (token: marked.marked.Token) => {
            // Parse out the front-matter
            if ( token.type === 'paragraph' && !foundFrontmatter && token.raw.trim().startsWith('[//]:') ) {
                mm.frontmatter = this.parseFrontmatter(token.raw.trim())
                foundFrontmatter = true
                return
            }


            // When we encounter a heading, start a new section
            if ( token.type === 'heading' ) {
                if ( currentSection.links.length ) mm.sections.push(currentSection)
                currentSection = {
                    title: token.text,
                    links: []
                }
                return
            }

            // When we encounter a non-frontmatter paragraph and we're in a section,
            // assume it's the description for the section
            if ( token.type === 'paragraph' && isNamedSection(currentSection) && !token.raw.trim().startsWith('[//]:') ) {
                currentSection.description = token.raw
                return
            }


            // If we're not currently parsing a section and we encounter a list,
            // start parsing that list (grab the # of items in the list)
            if ( !sectionListItemsRemaining && token.type === 'list' ) {
                token.items.map(listItem => {
                    listItem.tokens.map(token => {
                        // Explicitly mark the top-level text/list tokens as "section" items
                        // to prevent double-counting. This is because `marked` parses text
                        // <li>'s as a text-w/in-a-text.
                        (token as any).mmIsSectionLevel = true
                    })
                })

                sectionListItemsRemaining = token.items.length + 1
                return  // to avoid conflict with linkListItemsRemaining
            }

            // If we're parsing a section list and we're NOT parsing a link's URL list
            // and we encounter some text, assume it's the name of a link and start parsing it
            if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'text' && (token as any).mmIsSectionLevel ) {
                currentLink = {
                    title: token.text.split(' #')[0].trim(),
                    tags: this.parseTags(token.text),
                    urls: [],
                }

                sectionListItemsRemaining -= 1
                return
            }


            // If we're parsing a section list but not a link URL list and we encounter a list,
            // assume it's the inner list of link URLs and start parsing it
            if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'list' ) {
                linkListItemsRemaining = token.items.length + 1
            }

            // If we're parsing the URL list for a link and we encounter a link,
            // add its URL to the URLs for currentLink
            if ( currentLink && sectionListItemsRemaining && linkListItemsRemaining && token.type === 'link' ) {
                currentLink.urls.push(token.href)
                linkListItemsRemaining -= 1
            }

            // If we were parsing a link and we ran out of URLs for the link,
            // stop parsing that link and push it into the section
            if ( currentLink && linkListItemsRemaining === 1 ) {
                linkListItemsRemaining = 0
                currentSection.links.push(currentLink)
                currentLink = undefined

                // If that was the last link in the section, end the section
                if ( sectionListItemsRemaining === 1 ) {
                    mm.sections.push(currentSection)
                    sectionListItemsRemaining = 0
                    currentSection = { links: [] }
                }
            }
        }


        marked.marked.use({ walkTokens })
        marked.marked.parse(content)

        mm.sections.push(currentSection)
        mm.sections = mm.sections.filter(s => s.links.length)

        return mm
    }

    protected parseFrontmatter(text: string): FrontMatter {
        const fm: FrontMatter = {
            syntax: 'v1',
        }

        const matcher = /\[\/\/]:\s+#\(([a-zA-Z0-9_\-]+):\s+(.*)\)/g
        const rawFrontmatter: Record<string, string> =
            [...text.matchAll(matcher)]
                .map(match => ({[match[1]]: match[2]}))
                .reduce((carry, current) => ({...carry, ...current}), {})

        if ( rawFrontmatter['markmark-author-name'] ) fm.authorName = rawFrontmatter['markmark-author-name']
        if ( rawFrontmatter['markmark-author-email'] ) fm.authorEmail = rawFrontmatter['markmark-author-email']
        if ( rawFrontmatter['markmark-author-href'] ) fm.authorHref = rawFrontmatter['markmark-author-href']

        return fm
    }

    protected parseTags(text: string): string[] {
        const matcher = /#([a-zA-Z0-9_\-]+)/g
        return [...text.matchAll(matcher)].map(x => x[1])
    }
}