www/src/markmark/parser.ts

158 lines
6.2 KiB
TypeScript

import * as marked from 'marked'
import {FrontMatter, isNamedSection, Link, MarkMark, Section} from './types'
import * as crypto from 'crypto'
export class Parser {
public parse(content: string): MarkMark {
const mm: MarkMark = {
frontmatter: {
syntax: 'v1',
},
sections: [],
}
let foundFrontmatter: boolean = false
let currentSection: Section = { links: [] }
let currentLink: Link|undefined
let sectionListItemsRemaining: number = 0
let linkListItemsRemaining: number = 0
const walkTokens = (token: marked.marked.Token) => {
// Parse out the front-matter
if ( token.type === 'paragraph' && !foundFrontmatter && token.raw.trim().startsWith('[//]:') ) {
mm.frontmatter = this.parseFrontmatter(token.raw.trim())
foundFrontmatter = true
return
}
// When we encounter a heading, start a new section
if ( token.type === 'heading' ) {
if ( currentSection.links.length ) mm.sections.push(currentSection)
currentSection = {
title: token.text,
links: []
}
return
}
// When we encounter a non-frontmatter paragraph and we're in a section,
// assume it's the description for the section
if ( token.type === 'paragraph' && isNamedSection(currentSection) && !token.raw.trim().startsWith('[//]:') ) {
currentSection.description = token.raw
return
}
// If we're not currently parsing a section and we encounter a list,
// start parsing that list (grab the # of items in the list)
if ( !sectionListItemsRemaining && token.type === 'list' ) {
token.items.map(listItem => {
listItem.tokens.map(token => {
// Explicitly mark the top-level text/list tokens as "section" items
// to prevent double-counting. This is because `marked` parses text
// <li>'s as a text-w/in-a-text.
(token as any).mmIsSectionLevel = true
})
})
sectionListItemsRemaining = token.items.length + 1
return // to avoid conflict with linkListItemsRemaining
}
// If we're parsing a section list and we're NOT parsing a link's URL list
// and we encounter some text, assume it's the name of a link and start parsing it
if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'text' && (token as any).mmIsSectionLevel ) {
const [title, date] = this.parseTitleAndDate(token.text.split(' #')[0].trim())
currentLink = {
title,
date,
hash: crypto.createHash('sha256').update(token.text).digest('hex'),
tags: this.parseTags(token.text),
urls: [],
}
sectionListItemsRemaining -= 1
return
}
// If we're parsing a section list but not a link URL list and we encounter a list,
// assume it's the inner list of link URLs and start parsing it
if ( sectionListItemsRemaining && !linkListItemsRemaining && token.type === 'list' ) {
linkListItemsRemaining = token.items.length + 1
}
// If we're parsing the URL list for a link and we encounter a link,
// add its URL to the URLs for currentLink
if ( currentLink && sectionListItemsRemaining && linkListItemsRemaining && token.type === 'link' ) {
currentLink.urls.push(token.href)
linkListItemsRemaining -= 1
}
// If we were parsing a link and we ran out of URLs for the link,
// stop parsing that link and push it into the section
if ( currentLink && linkListItemsRemaining === 1 ) {
linkListItemsRemaining = 0
currentSection.links.push(currentLink)
currentLink = undefined
// If that was the last link in the section, end the section
if ( sectionListItemsRemaining === 1 ) {
mm.sections.push(currentSection)
sectionListItemsRemaining = 0
currentSection = { links: [] }
}
}
}
marked.marked.use({ walkTokens })
marked.marked.parse(content)
mm.sections.push(currentSection)
mm.sections = mm.sections.filter(s => s.links.length)
return mm
}
protected parseFrontmatter(text: string): FrontMatter {
const fm: FrontMatter = {
syntax: 'v1',
}
const matcher = /\[\/\/]:\s+#\(([a-zA-Z0-9_\-]+):\s+(.*)\)/g
const rawFrontmatter: Record<string, string> =
[...text.matchAll(matcher)]
.map(match => ({[match[1]]: match[2]}))
.reduce((carry, current) => ({...carry, ...current}), {})
if ( rawFrontmatter['markmark-author-name'] ) fm.authorName = rawFrontmatter['markmark-author-name']
if ( rawFrontmatter['markmark-author-email'] ) fm.authorEmail = rawFrontmatter['markmark-author-email']
if ( rawFrontmatter['markmark-author-href'] ) fm.authorHref = rawFrontmatter['markmark-author-href']
return fm
}
protected parseTitleAndDate(text: string): [string, Date|undefined] {
text = text.trim()
const dateMatcher = /(.*)\(([0-9\-+:TZ]+)\)$/g
const result = dateMatcher.exec(text)
if ( !result ) {
return [text, undefined]
}
const [, title, dateString] = result
const date = new Date(dateString)
if ( isNaN(date.getTime()) ) {
return [text, undefined]
}
return [title.trim(), date]
}
protected parseTags(text: string): string[] {
const matcher = /#([a-zA-Z0-9_\-]+)/g
return [...text.matchAll(matcher)].map(x => x[1])
}
}