www/src/markmark/parser.ts

import * as marked from 'marked'
import {FrontMatter, isNamedSection, Link, MarkMark, Section} from './types'

export class Parser {
    public parse(content: string): MarkMark {
        const mm: MarkMark = {
            frontmatter: {
                syntax: 'v1',
            },
            sections: [],
        }

        let foundFrontmatter: boolean = false
        let currentSection: Section = { links: [] }
        let currentLink: Link|undefined
        let sectionListItemsRemaining: number = 0
        let linkListItemsRemaining: number = 0
        const walkTokens = (token: marked.marked.Token) => {
            // Parse out the front-matter
            if ( token.type === 'paragraph' && !foundFrontmatter && token.raw.trim().startsWith('[//]:') ) {
                mm.frontmatter = this.parseFrontmatter(token.raw.trim())
                foundFrontmatter = true
                return
            }


            // When we encounter a heading, start a new section
            if ( token.type === 'heading' ) {
                if ( currentSection.links.length ) mm.sections.push(currentSection)
                currentSection = {
                    title: token.text,
                    links: []
                }
                return
            }

            // When we encounter a non-frontmatter paragraph and we're in a section,
            // assume it's the description for the section
            if ( token.type === 'paragraph' && isNamedSection(currentSection) && !token.raw.trim().startsWith('[//]:') ) {
                currentSection.description = token.raw
                return
            }


            // If we're not currently parsing a section and we encounter a list,
            // start parsing that list (grab the # of items in the list)
            if ( !sectionListItemsRemaining && token.type === 'list' ) {
                token.items.map(listItem => {
                    listItem.tokens.map(token => {
                        // Explicitly mark the top-level text/list tokens as "section" items
                        // to prevent double-counting. This is because `marked` parses text
                        // <li>'s as a text-w/in-a-text.
                        (token as any).mmIsSectionLevel = true
                    })
                })

                sectionListItemsRemaining = token.items.length + 1
                return  // to avoid conflict with linkListItemsRemaining
            }

            // If we're parsing a section list and we're NOT parsing a link's URL list
            // and we encounter some text, assume it's the name of a link and start parsing it
            if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'text' && (token as any).mmIsSectionLevel ) {
                currentLink = {
                    title: token.text.split(' #')[0].trim(),
                    tags: this.parseTags(token.text),
                    urls: [],
                }

                sectionListItemsRemaining -= 1
                return
            }


            // If we're parsing a section list but not a link URL list and we encounter a list,
            // assume it's the inner list of link URLs and start parsing it
            if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'list' ) {
                linkListItemsRemaining = token.items.length + 1
            }

            // If we're parsing the URL list for a link and we encounter a link,
            // add its URL to the URLs for currentLink
            if ( currentLink && sectionListItemsRemaining && linkListItemsRemaining && token.type === 'link' ) {
                currentLink.urls.push(token.href)
                linkListItemsRemaining -= 1
            }

            // If we were parsing a link and we ran out of URLs for the link,
            // stop parsing that link and push it into the section
            if ( currentLink && linkListItemsRemaining === 1 ) {
                linkListItemsRemaining = 0
                currentSection.links.push(currentLink)
                currentLink = undefined

                // If that was the last link in the section, end the section
                if ( sectionListItemsRemaining === 1 ) {
                    mm.sections.push(currentSection)
                    sectionListItemsRemaining = 0
                    currentSection = { links: [] }
                }
            }
        }


        marked.marked.use({ walkTokens })
        marked.marked.parse(content)

        mm.sections.push(currentSection)
        mm.sections = mm.sections.filter(s => s.links.length)

        return mm
    }

    protected parseFrontmatter(text: string): FrontMatter {
        const fm: FrontMatter = {
            syntax: 'v1',
        }

        const matcher = /\[\/\/]:\s+#\(([a-zA-Z0-9_\-]+):\s+(.*)\)/g
        const rawFrontmatter: Record<string, string> =
            [...text.matchAll(matcher)]
                .map(match => ({[match[1]]: match[2]}))
                .reduce((carry, current) => ({...carry, ...current}), {})

        if ( rawFrontmatter['markmark-author-name'] ) fm.authorName = rawFrontmatter['markmark-author-name']
        if ( rawFrontmatter['markmark-author-email'] ) fm.authorEmail = rawFrontmatter['markmark-author-email']
        if ( rawFrontmatter['markmark-author-href'] ) fm.authorHref = rawFrontmatter['markmark-author-href']

        return fm
    }

    protected parseTags(text: string): string[] {
        const matcher = /#([a-zA-Z0-9_\-]+)/g
        return [...text.matchAll(matcher)].map(x => x[1])
    }
}
Add initial MarkMark spec and integrate my own links.mark.md file 2023-11-21 04:53:59 +00:00			`import * as marked from 'marked'`
			`import {FrontMatter, isNamedSection, Link, MarkMark, Section} from './types'`

			`export class Parser {`
			`public parse(content: string): MarkMark {`
			`const mm: MarkMark = {`
			`frontmatter: {`
			`syntax: 'v1',`
			`},`
			`sections: [],`
			`}`

			`let foundFrontmatter: boolean = false`
			`let currentSection: Section = { links: [] }`
			`let currentLink: Link\|undefined`
			`let sectionListItemsRemaining: number = 0`
			`let linkListItemsRemaining: number = 0`
			`const walkTokens = (token: marked.marked.Token) => {`
			`// Parse out the front-matter`
			`if ( token.type === 'paragraph' && !foundFrontmatter && token.raw.trim().startsWith('[//]:') ) {`
			`mm.frontmatter = this.parseFrontmatter(token.raw.trim())`
			`foundFrontmatter = true`
			`return`
			`}`


			`// When we encounter a heading, start a new section`
			`if ( token.type === 'heading' ) {`
			`if ( currentSection.links.length ) mm.sections.push(currentSection)`
			`currentSection = {`
			`title: token.text,`
			`links: []`
			`}`
			`return`
			`}`

			`// When we encounter a non-frontmatter paragraph and we're in a section,`
			`// assume it's the description for the section`
			`if ( token.type === 'paragraph' && isNamedSection(currentSection) && !token.raw.trim().startsWith('[//]:') ) {`
			`currentSection.description = token.raw`
			`return`
			`}`


			`// If we're not currently parsing a section and we encounter a list,`
			`// start parsing that list (grab the # of items in the list)`
			`if ( !sectionListItemsRemaining && token.type === 'list' ) {`
			`token.items.map(listItem => {`
			`listItem.tokens.map(token => {`
			`// Explicitly mark the top-level text/list tokens as "section" items`
			// to prevent double-counting. This is because `marked` parses text
			`// <li>'s as a text-w/in-a-text.`
			`(token as any).mmIsSectionLevel = true`
			`})`
			`})`

			`sectionListItemsRemaining = token.items.length + 1`
			`return // to avoid conflict with linkListItemsRemaining`
			`}`

			`// If we're parsing a section list and we're NOT parsing a link's URL list`
			`// and we encounter some text, assume it's the name of a link and start parsing it`
			`if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'text' && (token as any).mmIsSectionLevel ) {`
			`currentLink = {`
			`title: token.text.split(' #')[0].trim(),`
			`tags: this.parseTags(token.text),`
			`urls: [],`
			`}`

			`sectionListItemsRemaining -= 1`
			`return`
			`}`


			`// If we're parsing a section list but not a link URL list and we encounter a list,`
			`// assume it's the inner list of link URLs and start parsing it`
			`if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'list' ) {`
			`linkListItemsRemaining = token.items.length + 1`
			`}`

			`// If we're parsing the URL list for a link and we encounter a link,`
			`// add its URL to the URLs for currentLink`
			`if ( currentLink && sectionListItemsRemaining && linkListItemsRemaining && token.type === 'link' ) {`
			`currentLink.urls.push(token.href)`
			`linkListItemsRemaining -= 1`
			`}`

			`// If we were parsing a link and we ran out of URLs for the link,`
			`// stop parsing that link and push it into the section`
			`if ( currentLink && linkListItemsRemaining === 1 ) {`
			`linkListItemsRemaining = 0`
			`currentSection.links.push(currentLink)`
			`currentLink = undefined`

			`// If that was the last link in the section, end the section`
			`if ( sectionListItemsRemaining === 1 ) {`
			`mm.sections.push(currentSection)`
			`sectionListItemsRemaining = 0`
			`currentSection = { links: [] }`
			`}`
			`}`
			`}`


			`marked.marked.use({ walkTokens })`
			`marked.marked.parse(content)`

			`mm.sections.push(currentSection)`
			`mm.sections = mm.sections.filter(s => s.links.length)`

			`return mm`
			`}`

			`protected parseFrontmatter(text: string): FrontMatter {`
			`const fm: FrontMatter = {`
			`syntax: 'v1',`
			`}`

			`const matcher = /\[\/\/]:\s+#\(([a-zA-Z0-9_\-]+):\s+(.*)\)/g`
			`const rawFrontmatter: Record<string, string> =`
			`[...text.matchAll(matcher)]`
			`.map(match => ({[match[1]]: match[2]}))`
			`.reduce((carry, current) => ({...carry, ...current}), {})`

			`if ( rawFrontmatter['markmark-author-name'] ) fm.authorName = rawFrontmatter['markmark-author-name']`
			`if ( rawFrontmatter['markmark-author-email'] ) fm.authorEmail = rawFrontmatter['markmark-author-email']`
			`if ( rawFrontmatter['markmark-author-href'] ) fm.authorHref = rawFrontmatter['markmark-author-href']`

			`return fm`
			`}`

			`protected parseTags(text: string): string[] {`
			`const matcher = /#([a-zA-Z0-9_\-]+)/g`
			`return [...text.matchAll(matcher)].map(x => x[1])`
			`}`
			`}`