137 lines
5.5 KiB
TypeScript
137 lines
5.5 KiB
TypeScript
|
import * as marked from 'marked'
|
||
|
import {FrontMatter, isNamedSection, Link, MarkMark, Section} from './types'
|
||
|
|
||
|
export class Parser {
|
||
|
public parse(content: string): MarkMark {
|
||
|
const mm: MarkMark = {
|
||
|
frontmatter: {
|
||
|
syntax: 'v1',
|
||
|
},
|
||
|
sections: [],
|
||
|
}
|
||
|
|
||
|
let foundFrontmatter: boolean = false
|
||
|
let currentSection: Section = { links: [] }
|
||
|
let currentLink: Link|undefined
|
||
|
let sectionListItemsRemaining: number = 0
|
||
|
let linkListItemsRemaining: number = 0
|
||
|
const walkTokens = (token: marked.marked.Token) => {
|
||
|
// Parse out the front-matter
|
||
|
if ( token.type === 'paragraph' && !foundFrontmatter && token.raw.trim().startsWith('[//]:') ) {
|
||
|
mm.frontmatter = this.parseFrontmatter(token.raw.trim())
|
||
|
foundFrontmatter = true
|
||
|
return
|
||
|
}
|
||
|
|
||
|
|
||
|
// When we encounter a heading, start a new section
|
||
|
if ( token.type === 'heading' ) {
|
||
|
if ( currentSection.links.length ) mm.sections.push(currentSection)
|
||
|
currentSection = {
|
||
|
title: token.text,
|
||
|
links: []
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// When we encounter a non-frontmatter paragraph and we're in a section,
|
||
|
// assume it's the description for the section
|
||
|
if ( token.type === 'paragraph' && isNamedSection(currentSection) && !token.raw.trim().startsWith('[//]:') ) {
|
||
|
currentSection.description = token.raw
|
||
|
return
|
||
|
}
|
||
|
|
||
|
|
||
|
// If we're not currently parsing a section and we encounter a list,
|
||
|
// start parsing that list (grab the # of items in the list)
|
||
|
if ( !sectionListItemsRemaining && token.type === 'list' ) {
|
||
|
token.items.map(listItem => {
|
||
|
listItem.tokens.map(token => {
|
||
|
// Explicitly mark the top-level text/list tokens as "section" items
|
||
|
// to prevent double-counting. This is because `marked` parses text
|
||
|
// <li>'s as a text-w/in-a-text.
|
||
|
(token as any).mmIsSectionLevel = true
|
||
|
})
|
||
|
})
|
||
|
|
||
|
sectionListItemsRemaining = token.items.length + 1
|
||
|
return // to avoid conflict with linkListItemsRemaining
|
||
|
}
|
||
|
|
||
|
// If we're parsing a section list and we're NOT parsing a link's URL list
|
||
|
// and we encounter some text, assume it's the name of a link and start parsing it
|
||
|
if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'text' && (token as any).mmIsSectionLevel ) {
|
||
|
currentLink = {
|
||
|
title: token.text.split(' #')[0].trim(),
|
||
|
tags: this.parseTags(token.text),
|
||
|
urls: [],
|
||
|
}
|
||
|
|
||
|
sectionListItemsRemaining -= 1
|
||
|
return
|
||
|
}
|
||
|
|
||
|
|
||
|
// If we're parsing a section list but not a link URL list and we encounter a list,
|
||
|
// assume it's the inner list of link URLs and start parsing it
|
||
|
if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'list' ) {
|
||
|
linkListItemsRemaining = token.items.length + 1
|
||
|
}
|
||
|
|
||
|
// If we're parsing the URL list for a link and we encounter a link,
|
||
|
// add its URL to the URLs for currentLink
|
||
|
if ( currentLink && sectionListItemsRemaining && linkListItemsRemaining && token.type === 'link' ) {
|
||
|
currentLink.urls.push(token.href)
|
||
|
linkListItemsRemaining -= 1
|
||
|
}
|
||
|
|
||
|
// If we were parsing a link and we ran out of URLs for the link,
|
||
|
// stop parsing that link and push it into the section
|
||
|
if ( currentLink && linkListItemsRemaining === 1 ) {
|
||
|
linkListItemsRemaining = 0
|
||
|
currentSection.links.push(currentLink)
|
||
|
currentLink = undefined
|
||
|
|
||
|
// If that was the last link in the section, end the section
|
||
|
if ( sectionListItemsRemaining === 1 ) {
|
||
|
mm.sections.push(currentSection)
|
||
|
sectionListItemsRemaining = 0
|
||
|
currentSection = { links: [] }
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
marked.marked.use({ walkTokens })
|
||
|
marked.marked.parse(content)
|
||
|
|
||
|
mm.sections.push(currentSection)
|
||
|
mm.sections = mm.sections.filter(s => s.links.length)
|
||
|
|
||
|
return mm
|
||
|
}
|
||
|
|
||
|
protected parseFrontmatter(text: string): FrontMatter {
|
||
|
const fm: FrontMatter = {
|
||
|
syntax: 'v1',
|
||
|
}
|
||
|
|
||
|
const matcher = /\[\/\/]:\s+#\(([a-zA-Z0-9_\-]+):\s+(.*)\)/g
|
||
|
const rawFrontmatter: Record<string, string> =
|
||
|
[...text.matchAll(matcher)]
|
||
|
.map(match => ({[match[1]]: match[2]}))
|
||
|
.reduce((carry, current) => ({...carry, ...current}), {})
|
||
|
|
||
|
if ( rawFrontmatter['markmark-author-name'] ) fm.authorName = rawFrontmatter['markmark-author-name']
|
||
|
if ( rawFrontmatter['markmark-author-email'] ) fm.authorEmail = rawFrontmatter['markmark-author-email']
|
||
|
if ( rawFrontmatter['markmark-author-href'] ) fm.authorHref = rawFrontmatter['markmark-author-href']
|
||
|
|
||
|
return fm
|
||
|
}
|
||
|
|
||
|
protected parseTags(text: string): string[] {
|
||
|
const matcher = /#([a-zA-Z0-9_\-]+)/g
|
||
|
return [...text.matchAll(matcher)].map(x => x[1])
|
||
|
}
|
||
|
}
|