Start reimplementation in typescript

This commit is contained in:
2025-11-10 23:54:20 -06:00
parent 569bff2d3e
commit 144d90e871
30 changed files with 1347 additions and 0 deletions

View File

@@ -0,0 +1,73 @@
import {LexInput} from '../lexer.js'
import {ExpectedEndOfInputError, InvalidVariableNameError, UnexpectedEndOfInputError} from "../parse.js";
export type StrLVal = { term: 'variable', name: string }
export type StrTerm =
{ term: 'string', value: string }
| StrLVal
export class ParseContext {
constructor(
private inputs: LexInput[],
) {}
assertEmpty() {
if ( this.inputs.length ) {
throw new ExpectedEndOfInputError(`Expected end of input. Found: ${this.inputs[0].value}`)
}
}
popOptionalTerm(): StrTerm|undefined {
if ( this.inputs.length ) return this.popTerm()
return undefined
}
popTerm(): StrTerm {
if ( !this.inputs.length ) {
throw new UnexpectedEndOfInputError('Unexpected end of input. Expected term.');
}
const input = this.inputs.shift()!
// Check if the token is a literal variable name:
if ( !input.literal && input.value.startsWith('$') ) {
if ( !input.value.match(/^\$[a-zA-Z0-9_]+$/) ) {
throw new InvalidVariableNameError(`Invalid variable name: ${input.value}`)
}
return { term: 'variable', name: input.value }
}
// Otherwise, parse it as a string literal:
return { term: 'string', value: input.value }
}
popLVal(): StrLVal {
if ( !this.inputs.length ) {
throw new UnexpectedEndOfInputError('Unexpected end of input. Expected lval.');
}
const input = this.inputs.shift()!
if ( input.literal || !input.value.match(/^\$[a-zA-Z0-9_]+$/) ) {
throw new InvalidVariableNameError(`Expected variable name. Found: ${input.value}`)
}
return { term: 'variable', name: input.value }
}
}
export type CommandData = Record<string, unknown>
export abstract class Command<TData extends CommandData> {
abstract isParseCandidate(token: LexInput): boolean
abstract attemptParse(context: ParseContext): TData
abstract getDisplayName(): string
protected isKeyword(token: LexInput, keyword: string): boolean {
return !token.literal && token.value === keyword
}
}

16
src/vm/commands/copy.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext} from "./command.js";
import {LexInput} from "../lexer.js";
export class Copy extends Command<{}> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'copy')
}
attemptParse(context: ParseContext): {} {
return {}
}
getDisplayName(): string {
return 'copy'
}
}

16
src/vm/commands/edit.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext} from "./command.js";
import {LexInput} from "../lexer.js";
export class Edit extends Command<{}> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'edit')
}
attemptParse(context: ParseContext): {} {
return {}
}
getDisplayName(): string {
return 'edit'
}
}

16
src/vm/commands/exit.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext} from "./command.js";
import {LexInput} from "../lexer.js";
export class Exit extends Command<{}> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'exit')
}
attemptParse(context: ParseContext): {} {
return {}
}
getDisplayName(): string {
return 'exit'
}
}

16
src/vm/commands/from.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext, StrLVal} from "./command.js";
import {LexInput} from "../lexer.js";
export class From extends Command<{ var: StrLVal }> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'from')
}
attemptParse(context: ParseContext): { var: StrLVal } {
return { var: context.popLVal() }
}
getDisplayName(): string {
return 'from'
}
}

View File

@@ -0,0 +1,16 @@
import {Command, ParseContext} from "./command.js";
import {LexInput} from "../lexer.js";
export class History extends Command<{}> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'history')
}
attemptParse(context: ParseContext): {} {
return {}
}
getDisplayName(): string {
return 'history'
}
}

29
src/vm/commands/index.ts Normal file
View File

@@ -0,0 +1,29 @@
import {Command, CommandData} from './command.js'
import {Exit} from "./exit.js";
import {InFile} from "./infile.js";
import {Copy} from "./copy.js";
import {Edit} from "./edit.js";
import {From} from "./from.js";
import {History} from "./history.js";
import {Load} from "./load.js";
import {OutFile} from "./outfile.js";
import {Paste} from "./paste.js";
import {RunFile} from "./runfile.js";
import {Save} from "./save.js";
import {To} from "./to.js";
export type Commands = Command<CommandData>[]
export const commands: Commands = [
new Copy,
new Edit,
new Exit,
new From,
new History,
new InFile,
new Load,
new OutFile,
new Paste,
new RunFile,
new Save,
new To,
]

16
src/vm/commands/infile.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext, StrTerm} from "./command.js";
import {LexInput} from "../lexer.js";
export class InFile extends Command<{ path: StrTerm }> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'infile')
}
attemptParse(context: ParseContext): { path: StrTerm } {
return { path: context.popTerm() }
}
getDisplayName(): string {
return 'infile'
}
}

16
src/vm/commands/load.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext, StrTerm} from "./command.js";
import {LexInput} from "../lexer.js";
export class Load extends Command<{ path?: StrTerm }> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'load')
}
attemptParse(context: ParseContext): { path?: StrTerm } {
return { path: context.popOptionalTerm() }
}
getDisplayName(): string {
return 'load'
}
}

View File

@@ -0,0 +1,16 @@
import {Command, ParseContext, StrTerm} from "./command.js";
import {LexInput} from "../lexer.js";
export class OutFile extends Command<{ path: StrTerm }> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'outfile')
}
attemptParse(context: ParseContext): { path: StrTerm } {
return { path: context.popTerm() }
}
getDisplayName(): string {
return 'outfile'
}
}

16
src/vm/commands/paste.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext} from "./command.js";
import {LexInput} from "../lexer.js";
export class Paste extends Command<{}> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'paste')
}
attemptParse(context: ParseContext): {} {
return {}
}
getDisplayName(): string {
return 'paste'
}
}

View File

@@ -0,0 +1,16 @@
import {Command, ParseContext, StrTerm} from "./command.js";
import {LexInput} from "../lexer.js";
export class RunFile extends Command<{ path: StrTerm }> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'runfile')
}
attemptParse(context: ParseContext): { path: StrTerm } {
return { path: context.popTerm() }
}
getDisplayName(): string {
return 'runfile'
}
}

16
src/vm/commands/save.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext, StrTerm} from "./command.js";
import {LexInput} from "../lexer.js";
export class Save extends Command<{ path?: StrTerm }> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'save')
}
attemptParse(context: ParseContext): { path?: StrTerm } {
return { path: context.popOptionalTerm() }
}
getDisplayName(): string {
return 'save'
}
}

16
src/vm/commands/to.ts Normal file
View File

@@ -0,0 +1,16 @@
import {Command, ParseContext, StrLVal} from "./command.js";
import {LexInput} from "../lexer.js";
export class To extends Command<{ var: StrLVal }> {
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'to')
}
attemptParse(context: ParseContext): { var: StrLVal } {
return { var: context.popLVal() }
}
getDisplayName(): string {
return 'to'
}
}

7
src/vm/index.ts Normal file
View File

@@ -0,0 +1,7 @@
import {Input} from './input.js'
export class StrVM {
constructor(
private input: Input,
) {}
}

35
src/vm/input.ts Normal file
View File

@@ -0,0 +1,35 @@
import * as readline from 'node:readline'
import {BehaviorSubject} from "../util/subject.js";
import {Lifecycle, LifecycleAware} from "../util/lifecycle.js";
export class Input extends BehaviorSubject<string> implements LifecycleAware {
private rl?: readline.Interface
public setupPrompt(): void {
if ( this.rl ) {
this.closePrompt()
}
this.rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
prompt: 'str %> ',
})
this.rl.prompt()
this.rl.on('line', async (line) => {
await this.next(line + '\n')
this.rl?.prompt(true)
})
}
public closePrompt(): void {
this.rl?.close()
this.rl = undefined
}
adoptLifecycle(lifecycle: Lifecycle): void {
lifecycle.onClose(() => this.closePrompt())
}
}

95
src/vm/lexer.ts Normal file
View File

@@ -0,0 +1,95 @@
import {BehaviorSubject} from '../util/subject.js'
import {Input} from './input.js'
import {log} from '../log.js'
import {StreamLogger} from '../util/log.js'
export type LexTerminator = { type: 'terminator' }
export type LexInput = { type: 'input', value: string, literal?: true }
export type LexToken = LexTerminator | LexInput
const logger = log.getStreamLogger('lexer')
export class Lexer extends BehaviorSubject<LexToken> {
private isEscape: boolean = false
private inQuote?: '"'|"'"
private tokenAccumulator: string = ''
private logger: StreamLogger
constructor(input: Input) {
super()
this.logger = log.getStreamLogger('lexer')
input.subscribe(input => this.lexInput(input))
}
private logState(c: string): void {
this.logger.verbose({
c,
isEscape: this.isEscape,
inQuote: this.inQuote,
tokenAccumulator: this.tokenAccumulator,
})
}
private async emitToken(reason: string, literal?: true): Promise<void> {
logger.verbose({ emitToken: reason })
await this.next({ type: 'input', value: this.tokenAccumulator, literal })
this.tokenAccumulator = ''
}
private async lexInput(input: string): Promise<void> {
logger.debug({ input })
let inputChars = input.split('')
while ( inputChars.length ) {
const c = inputChars.shift()!
this.logState(c)
// We got the 2nd character after an escape
if ( this.isEscape ) {
this.tokenAccumulator += c
this.isEscape = false
continue
}
// We are about to get an escape character
if ( c === '\\' ) {
this.isEscape = true
continue
}
// We got a statement terminator
if ( (c === ';' || c === '\n') && !this.inQuote ) {
if ( this.tokenAccumulator ) {
await this.emitToken('terminator')
}
await this.next({ type: 'terminator' })
continue
}
// Whitespace separates tokens
if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote ) {
if ( this.tokenAccumulator ) {
await this.emitToken('whitespace')
}
continue
}
// We are either starting or ending an unescaped matching quote
if ( c === `'` || c === `"` ) {
if ( c === this.inQuote ) {
this.inQuote = undefined
await this.emitToken('quote', true)
continue
} else if ( !this.inQuote ) {
this.inQuote = c
continue
}
}
this.tokenAccumulator += c
}
}
}

14
src/vm/parse.ts Normal file
View File

@@ -0,0 +1,14 @@
import {Command, CommandData} from './commands/command.js'
export type Executable<TData extends CommandData> = {
command: Command<TData>,
data: TData,
}
export class ParseError extends Error {}
export class InternalParseError extends ParseError {}
export class IsNotKeywordError extends ParseError {}
export class InvalidCommandError extends ParseError {}
export class UnexpectedEndOfInputError extends ParseError {}
export class ExpectedEndOfInputError extends InvalidCommandError {}
export class InvalidVariableNameError extends ParseError {}

94
src/vm/parser.ts Normal file
View File

@@ -0,0 +1,94 @@
import {BehaviorSubject} from '../util/subject.js'
import {Lexer, LexInput, LexToken} from './lexer.js'
import {StreamLogger} from '../util/log.js'
import {log} from '../log.js'
import {Commands} from './commands/index.js'
import {Command, CommandData, ParseContext} from './commands/command.js'
import {Executable, InternalParseError, InvalidCommandError, IsNotKeywordError} from './parse.js'
export class Parser extends BehaviorSubject<Executable<CommandData>> {
private logger: StreamLogger
private parseCandidate?: Command<CommandData>
private inputForCandidate: LexInput[] = []
constructor(lexer: Lexer, private commands: Commands) {
super()
this.logger = log.getStreamLogger('parser')
lexer.subscribe(token => this.handleToken(token))
}
async handleToken(token: LexToken) {
// We are in between full commands, so try to identify a new parse candidate:
if ( !this.parseCandidate ) {
// Ignore duplicated terminators between commands
if ( token.type === 'terminator' ) {
return
}
this.logger.verbose({ identifyParseCandidate: token })
if ( !this.isKeyword(token) ) {
throw new IsNotKeywordError('Expected keyword, found: ' + this.displayToken(token))
}
this.parseCandidate = this.getParseCandidate(token)
return
}
// We have already identified a parse candidate:
// If this is normal input token, collect it so we can give it to the candidate to parse:
if ( token.type === 'input' ) {
this.inputForCandidate.push(token)
return
}
// If we got a terminator, then ask the candidate to actually perform its parse:
if ( token.type === 'terminator' ) {
try {
// Have the candidate attempt to parse itself from the collecte data:
const context = new ParseContext(this.inputForCandidate)
this.logger.verbose({ parsing: this.parseCandidate.getDisplayName(), context })
const data = this.parseCandidate.attemptParse(context)
// The candidate must consume every token in the context:
context.assertEmpty()
// Emit the parsed command:
this.logger.debug({ parsed: this.parseCandidate.getDisplayName() })
await this.next({
command: this.parseCandidate,
data,
})
return
} finally {
this.parseCandidate = undefined
this.inputForCandidate = []
}
}
throw new InternalParseError('Encountered invalid token.')
}
private isKeyword(token: LexToken): token is (LexInput & {literal: undefined}) {
return token.type === 'input' && !token.literal
}
private getParseCandidate(token: LexInput): Command<CommandData> {
for ( const command of this.commands ) {
if ( command.isParseCandidate(token) ) {
this.logger.debug({ foundParseCandidate: command.getDisplayName(), token })
return command
}
}
throw new InvalidCommandError('Could not find parser for: ' + this.displayToken(token))
}
private displayToken(token: LexToken) {
if ( token.type === 'terminator' ) {
return '(TERMINATOR)'
}
return `(${token.literal ? 'LITERAL' : 'INPUT'}) ${token.value}`
}
}

51
src/vm/string.ts Normal file
View File

@@ -0,0 +1,51 @@
export type Word = { type: 'word', value: string }
export type Whitespace = { type: 'space', value: string }
export type Component = Word | Whitespace
export const isWord = (cmp: Component): cmp is Word =>
cmp.type === 'word'
export const isWhitespace = (cmp: Component): cmp is Whitespace =>
cmp.type === 'space'
export type Line = {
components: Component[],
}
export type SString = {
lines: Line[],
}
export const toNativeString = (value: SString): string =>
value.lines
.map(line =>
line.components
.map(cmp => cmp.value)
.join(''))
.join('\n')
export const fromNativeString = (value: string): SString => ({
lines: value.split('\n')
.map(rawLine => {
const whitespace = [...rawLine.matchAll(/\s+/g)]
const words = rawLine.split(/\s+/g)
const line: Line = { components: [] }
for ( let i = 0; i < words.length; i += 1 ) {
line.components.push({
type: 'word',
value: words[i],
})
if ( i < whitespace.length ) {
line.components.push({
type: 'space',
value: whitespace[i][0],
})
}
}
return line
}),
})