Files
str/src/vm/lexer.ts

121 lines
3.6 KiB
TypeScript

import {BehaviorSubject} from '../util/subject.js'
import {Input} from './input.js'
import {log} from '../log.js'
import {StreamLogger} from '../util/log.js'
export type LexTerminator = { type: 'terminator' }
export type LexInput = { type: 'input', value: string, literal?: true }
export type LexToken = LexTerminator | LexInput
const logger = log.getStreamLogger('lexer')
const LITERAL_MAP: Record<string, string> = {
'n': '\n',
'r': '\r',
't': '\t',
's': ' ',
}
export const tokenIsLVal = (input: LexInput): boolean =>
!input.literal && !!input.value.match(/^\$[a-zA-Z0-9_]+$/)
export class Lexer extends BehaviorSubject<LexToken> {
private isEscape: boolean = false
private inComment: boolean = false
private inQuote?: '"'|"'"
private tokenAccumulator: string = ''
private logger: StreamLogger
constructor(input: Input) {
super()
this.logger = log.getStreamLogger('lexer')
input.subscribe(input => this.lexInput(input))
}
private logState(c: string): void {
this.logger.verbose({
c,
isEscape: this.isEscape,
inQuote: this.inQuote,
tokenAccumulator: this.tokenAccumulator,
})
}
private async emitToken(reason: string, literal?: true): Promise<void> {
logger.verbose({ emitToken: reason })
await this.next({ type: 'input', value: this.tokenAccumulator, literal })
this.tokenAccumulator = ''
}
private async lexInput(input: string): Promise<void> {
logger.debug({ input })
let inputChars = input.split('')
while ( inputChars.length ) {
const c = inputChars.shift()!
this.logState(c)
// We're in a comment. Ignore everything except newlines.
if ( this.inComment && c !== '\n' ) {
continue
}
// We got the 2nd character after an escape
if ( this.isEscape ) {
this.tokenAccumulator += LITERAL_MAP[c] || c
this.isEscape = false
continue
}
// We are about to get an escape character
if ( c === '\\' ) {
this.isEscape = true
continue
}
// We got a statement terminator
if ( (c === ';' || c === '\n') && !this.inQuote ) {
if ( this.tokenAccumulator ) {
await this.emitToken('terminator')
}
this.inComment = false
await this.next({ type: 'terminator' })
continue
}
// Whitespace separates tokens
if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote ) {
if ( this.tokenAccumulator ) {
await this.emitToken('whitespace')
}
continue
}
// Comments start with --
if ( this.tokenAccumulator === '-' && c === '-' && !this.inQuote ) {
this.tokenAccumulator = ''
this.inComment = true
continue
}
// We are either starting or ending an unescaped matching quote.
// For now, only parse single quotes. Makes it nicer to type " in commands.
if ( c === `'` ) {
if ( c === this.inQuote ) {
this.inQuote = undefined
await this.emitToken('quote', true)
continue
} else if ( !this.inQuote ) {
this.inQuote = c
continue
}
}
this.tokenAccumulator += c
}
}
}