diff --git a/src/vm/commands/chunk.ts b/src/vm/commands/chunk.ts new file mode 100644 index 0000000..be884e8 --- /dev/null +++ b/src/vm/commands/chunk.ts @@ -0,0 +1,101 @@ +import {Command, ParseContext, StrTerm, TypeError, wrapDestructured, wrapString} from "./command.js"; +import {LexInput} from "../lexer.js"; +import {StrVM} from "../vm.js"; +import {log} from "../../log.js"; + +type ChunkData = { + every: StrTerm, + type: 'line'|'word'|'char', +} + +export class Chunk extends Command { + private logger = log.getStreamLogger('cmd-chunk') + + async attemptParse(context: ParseContext): Promise { + return { + every: await context.popTerm(), + type: context.popKeywordInSet(['line', 'word', 'char']).value, + } + } + + getDisplayName(): string { + return 'chunk' + } + + isParseCandidate(token: LexInput): boolean { + return this.isKeyword(token, 'chunk') + } + + async execute(vm: StrVM, data: ChunkData): Promise { + return vm.replaceContextMatchingTerm(ctx => ({ + destructure: sub => { + const every = ctx.resolveInt(data.every) + + let delimiter: RegExp|string|undefined = undefined + if ( data.type === 'line' ) { + delimiter = '\n' + } else if ( data.type === 'char' ) { + delimiter = '' + } else if ( data.type === 'word' ) { + delimiter = /\s+/sg + } + + if ( typeof delimiter === 'undefined' ) { + // This would only happen if we add a case to `type` and don't handle it above. + throw new TypeError('Could not resolve delimiter.') + } + + this.logger.debug({ every, delimiter }) + + const chunks = this.chunkByDelimiter(sub, delimiter, every) + this.logger.verbose({ chunks }) + + return chunks.map(part => ({ + prefix: part[0], + value: wrapString(part[1]), + })) + }, + })) + } + + private chunkByDelimiter(sub: string, delimiter: string|RegExp, nth: number): [string|undefined, string][] { + if ( typeof delimiter === 'string' && delimiter ) { + delimiter = new RegExp(delimiter.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&'), 'sg') + } + + const separators = delimiter instanceof RegExp ? [...sub.matchAll(delimiter)] : [] + const parts = sub.split(delimiter) + this.logger.verbose({ parts, separators }) + + const chunkSeparators: string[] = [] + const chunks = [] + + let chunk = 0 + let acc = '' + for ( let i = 0; i < parts.length; i += 1 ) { + if ( !(i % nth) && chunk ) { + chunks.push(acc) + chunk = 0 + acc = '' + } + + if ( chunk ) { + acc += separators[i - 1]?.[0] || '' + } else { + const chunkSeparator = separators[i - 1]?.[0] || '' + this.logger.verbose({ i, chunkSeparator }) + chunkSeparators.push(chunkSeparator) + } + + acc += parts[i] + chunk += 1 + } + + chunks.push(acc) + + return chunks.map((chunk, idx) => [ + idx ? chunkSeparators[idx] : undefined, + chunk, + ]) + } +} diff --git a/src/vm/commands/index.ts b/src/vm/commands/index.ts index 5bee091..ff9abfd 100644 --- a/src/vm/commands/index.ts +++ b/src/vm/commands/index.ts @@ -49,11 +49,13 @@ import {Assign} from "./assign.js"; import {Zip} from "./zip.js"; import {Concat} from "./concat.js"; import {Call} from "./call.js"; +import {Chunk} from "./chunk.js"; export type Commands = Command[] export const commands: Commands = [ new Assign, new Call, + new Chunk, new Clear, new Concat, new Contains, diff --git a/src/vm/commands/over.ts b/src/vm/commands/over.ts index 2620918..c00dcfd 100644 --- a/src/vm/commands/over.ts +++ b/src/vm/commands/over.ts @@ -1,4 +1,4 @@ -import {Command, CommandData, ParseContext, StrLVal} from "./command.js"; +import {Command, CommandData, ParseContext, StrLVal, wrapString} from "./command.js"; import {Executable} from "../parse.js"; import {LexInput} from "../lexer.js"; import {StrVM} from "../vm.js"; @@ -26,7 +26,7 @@ export class Over extends Command { async execute(vm: StrVM, data: OverData): Promise { return vm.tapInPlace(async parentCtx => { - const oldValue = parentCtx.resolveRequired(data.subject) + const oldValue = parentCtx.resolve(data.subject) || wrapString('') const newValue = await vm.runInChild(async (child, childCtx) => { await childCtx.replaceSubject(() => oldValue) await data.exec.command.execute(child, data.exec.data) diff --git a/src/vm/commands/split.ts b/src/vm/commands/split.ts index 84080ae..5d4d47c 100644 --- a/src/vm/commands/split.ts +++ b/src/vm/commands/split.ts @@ -5,13 +5,14 @@ import {Awaitable} from "../../util/types.js"; export type SplitData = { on: StrTerm, - with?: StrTerm, + limit?: StrTerm, } export class Split extends Command { async attemptParse(context: ParseContext): Promise { return { on: await context.popTerm(), + limit: await context.popOptionalTerm(), } } @@ -27,7 +28,20 @@ export class Split extends Command { return vm.replaceContextMatchingTerm(ctx => ({ destructure: sub => { const prefix = ctx.resolveString(data.on) - return sub.split(prefix) + + let parts = sub.split(prefix) + if ( data.limit ) { + // If there was a limit, only split the specified number of times: + const limit = ctx.resolveInt(data.limit) + const head = parts.slice(0, limit) + const tail = parts.slice(limit) + parts = head + if ( tail.length ) { + parts.push(tail.join(prefix)) + } + } + + return parts .map((segment, idx) => ({ prefix: idx ? prefix : undefined, value: wrapString(segment),