96 lines
2.9 KiB
TypeScript
96 lines
2.9 KiB
TypeScript
|
|
import {BehaviorSubject} from '../util/subject.js'
|
||
|
|
import {Input} from './input.js'
|
||
|
|
import {log} from '../log.js'
|
||
|
|
import {StreamLogger} from '../util/log.js'
|
||
|
|
|
||
|
|
export type LexTerminator = { type: 'terminator' }
|
||
|
|
export type LexInput = { type: 'input', value: string, literal?: true }
|
||
|
|
|
||
|
|
export type LexToken = LexTerminator | LexInput
|
||
|
|
|
||
|
|
const logger = log.getStreamLogger('lexer')
|
||
|
|
|
||
|
|
export class Lexer extends BehaviorSubject<LexToken> {
|
||
|
|
private isEscape: boolean = false
|
||
|
|
private inQuote?: '"'|"'"
|
||
|
|
private tokenAccumulator: string = ''
|
||
|
|
|
||
|
|
private logger: StreamLogger
|
||
|
|
|
||
|
|
constructor(input: Input) {
|
||
|
|
super()
|
||
|
|
this.logger = log.getStreamLogger('lexer')
|
||
|
|
input.subscribe(input => this.lexInput(input))
|
||
|
|
}
|
||
|
|
|
||
|
|
private logState(c: string): void {
|
||
|
|
this.logger.verbose({
|
||
|
|
c,
|
||
|
|
isEscape: this.isEscape,
|
||
|
|
inQuote: this.inQuote,
|
||
|
|
tokenAccumulator: this.tokenAccumulator,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
private async emitToken(reason: string, literal?: true): Promise<void> {
|
||
|
|
logger.verbose({ emitToken: reason })
|
||
|
|
await this.next({ type: 'input', value: this.tokenAccumulator, literal })
|
||
|
|
this.tokenAccumulator = ''
|
||
|
|
}
|
||
|
|
|
||
|
|
private async lexInput(input: string): Promise<void> {
|
||
|
|
logger.debug({ input })
|
||
|
|
|
||
|
|
let inputChars = input.split('')
|
||
|
|
|
||
|
|
while ( inputChars.length ) {
|
||
|
|
const c = inputChars.shift()!
|
||
|
|
this.logState(c)
|
||
|
|
|
||
|
|
// We got the 2nd character after an escape
|
||
|
|
if ( this.isEscape ) {
|
||
|
|
this.tokenAccumulator += c
|
||
|
|
this.isEscape = false
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
// We are about to get an escape character
|
||
|
|
if ( c === '\\' ) {
|
||
|
|
this.isEscape = true
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
// We got a statement terminator
|
||
|
|
if ( (c === ';' || c === '\n') && !this.inQuote ) {
|
||
|
|
if ( this.tokenAccumulator ) {
|
||
|
|
await this.emitToken('terminator')
|
||
|
|
}
|
||
|
|
await this.next({ type: 'terminator' })
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
// Whitespace separates tokens
|
||
|
|
if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote ) {
|
||
|
|
if ( this.tokenAccumulator ) {
|
||
|
|
await this.emitToken('whitespace')
|
||
|
|
}
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
// We are either starting or ending an unescaped matching quote
|
||
|
|
if ( c === `'` || c === `"` ) {
|
||
|
|
if ( c === this.inQuote ) {
|
||
|
|
this.inQuote = undefined
|
||
|
|
await this.emitToken('quote', true)
|
||
|
|
continue
|
||
|
|
} else if ( !this.inQuote ) {
|
||
|
|
this.inQuote = c
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
this.tokenAccumulator += c
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|