diff --git a/HELP.md b/HELP.md index 18106c8..78ff6bc 100644 --- a/HELP.md +++ b/HELP.md @@ -301,6 +301,10 @@ Example: `[foo, bar, foo]` -> `unique` -> `[foo, bar]` Interleave the current destructured subject with the given other destructured subject. Example: Say `$a` is `[1, 2, 3]` and subject is `[a, b, c]` -> `zip $a` -> `[a, 1, b, 2, c, 3]` +#### `flatten []` +Collapse a nested destructured string down a single level, optionally limiting to a specific `index`. +Example: `[[a,b], c, [d,e]]` -> `flatten 2` -> `[[a,b], c, d, e]` + ### Working with Variables @@ -310,6 +314,8 @@ Assign a variable using the standard syntax: `$a = 'mystr'` You can then use them anywhere you may use a string: `split $a` +If you variable contains a `:: destructured`, you can access individual elements with their indices: `$a.0`, `$a.1`, &c. + There are also several commands for interactive with variables, defined below. #### `to ` diff --git a/src/vm/commands/command.ts b/src/vm/commands/command.ts index e56a8a5..5f2db0f 100644 --- a/src/vm/commands/command.ts +++ b/src/vm/commands/command.ts @@ -2,7 +2,7 @@ import {createHash} from 'node:crypto'; import {LexInput, LexToken, tokenIsLVal} from '../lexer.js' import { Executable, - ExpectedEndOfInputError, InvalidSubcontextError, + ExpectedEndOfInputError, InvalidRegularExpressionLiteralError, InvalidSubcontextError, InvalidVariableNameError, IsNotKeywordError, UnexpectedEndOfInputError, UnexpectedEndofStatementError @@ -10,10 +10,11 @@ import { import {Awaitable, ElementType, hasOwnProperty} from "../../util/types.js"; import {StrVM} from "../vm.js"; import os from "node:os"; +import {log} from "../../log.js"; export class TypeError extends Error {} -export type StrLVal = { term: 'variable', name: string } +export type StrLVal = { term: 'variable', name: string, index?: number } export const isStrLVal = (val: unknown): val is StrLVal => !!(typeof val === 'object' @@ -41,6 +42,8 @@ export type StrString = { term: 'string', value: string, literal?: true } export type StrInt = { term: 'int', value: number } +export type StrRex = { term: 'rex', value: string, flags: string } + export type StrLamba = { term: 'lambda', value: { @@ -49,7 +52,7 @@ export type StrLamba = { }, } -export type StrRVal = StrString | StrInt | StrDestructured | StrLamba +export type StrRVal = StrString | StrInt | StrDestructured | StrLamba | StrRex export type StrDestructuredTable = { term: 'destructured', @@ -91,6 +94,10 @@ export const hashStrRVal = (val: StrRVal): string => { return toHex(`s:int:${val.value}`) } + if ( val.term === 'rex' ) { + return toHex(`s:rex:${val.value}/${val.flags}`) + } + if ( val.term === 'lambda' ) { throw new Error('Cannot hash lambda') // todo } @@ -110,20 +117,28 @@ export const isStrTerm = (val: unknown): val is StrTerm => && hasOwnProperty(val, 'value'))) export const isStrRVal = (term: StrTerm): term is StrRVal => - term.term === 'string' || term.term === 'int' || term.term === 'destructured' || term.term === 'lambda' + ['string', 'int', 'destructured', 'lambda', 'rex'].includes(term.term) export const unwrapString = (term: StrRVal): string => { if ( term.term === 'int' ) { return String(term.value) } - if ( term.term === 'destructured' || term.term === 'lambda' ) { + if ( term.term === 'destructured' || term.term === 'lambda' || term.term === 'rex' ) { throw new TypeError(`Found unexpected ${term.term} (expected: string|int)`) } return term.value } +export const unwrapRex = (term: StrRVal): RegExp => { + if ( term.term !== 'rex' ) { + throw new TypeError(`Found unexpected ${term.term} (expected: rex)`) + } + + return new RegExp(term.value, term.flags) +} + export const coerceString = (term: StrRVal): string => { if ( term.term === 'destructured' ) { return joinDestructured(term.value) @@ -176,6 +191,8 @@ export interface ParseSubContext { } export class ParseContext { + private log = log.getStreamLogger('parseContext') + constructor( private inputs: LexToken[], private childParser: (tokens: LexToken[]) => Awaitable<[Executable, LexToken[]]>, @@ -246,11 +263,38 @@ export class ParseContext { private parseInputToTerm(input: LexInput): StrTerm { // Check if the token is a literal variable name: if ( !input.literal && input.value.startsWith('$') ) { - if ( !input.value.match(/^\$[a-zA-Z0-9_]+$/) ) { + if ( !input.value.match(/^\$[a-zA-Z0-9_]+(?:\.\d+)?$/) ) { throw new InvalidVariableNameError(`Invalid variable name: ${input.value}`) } - return { term: 'variable', name: input.value } + const parts = input.value.split('.') + let index: number|undefined = undefined + if ( parts.length > 1 ) { + index = parseInt(parts[1], 10) + } + + return { term: 'variable', name: parts[0], index } + } + + // Check if the token is a literal regular expression: + if ( !input.literal && input.value.startsWith('/') ) { + const parts = input.value + .substring(1) // trim the leading / + .split('') + .reverse() + + let flags = '' + while ( parts.length && parts[0] !== '/' ) { + flags += parts.shift() + } + + if ( !parts.length ) { + throw new InvalidRegularExpressionLiteralError(`Invalid regular expression literal: ${input.value}`) + } + + parts.shift() // trim the trailing / + const pattern = parts.reverse().join('') + return { term: 'rex', value: pattern, flags } } // Check if the token is a valid integer: @@ -379,6 +423,8 @@ export class ParseContext { })) } + this.log.debug({ lambdaBody: sc.inputs }) + // Now, the remainder of the subcontext inputs should be a series of executables // separated by `terminator` tokens -- e.g. (split _; join |), so parse executables // from the subcontext until it is empty: @@ -457,7 +503,10 @@ export class ParseContext { value: last.value.substring(0, last.value.length - 1), } } - sc.inputs.push(last) + if ( last.type !== 'input' || last.value || last.literal ) { + // Avoid pushing an empty input if the last input contained ONLY the empty right-paren + sc.inputs.push(last) + } return [sc, tokenIdx] } diff --git a/src/vm/commands/contains.ts b/src/vm/commands/contains.ts index 79628cf..69da62e 100644 --- a/src/vm/commands/contains.ts +++ b/src/vm/commands/contains.ts @@ -20,9 +20,16 @@ export class Contains extends Command<{ find: StrTerm }> { execute(vm: StrVM, data: { find: StrTerm }): Awaitable { return vm.replaceContextMatchingTerm(ctx => ({ - string: sub => sub.includes(ctx.resolveString(data.find)) ? sub : '', - destructuredOfStrings: parts => parts.filter(part => - part.includes(ctx.resolveString(data.find))), + string: sub => + ctx.applyStringOrRex(data.find, { + string: find => sub.includes(find) ? sub : '', + rex: find => find.test(sub) ? sub : '', + }), + destructuredOfStrings: parts => + ctx.applyStringOrRex(data.find, { + string: find => parts.filter(part => part.includes(find)), + rex: find => parts.filter(part => find.test(part)), + }), })) } } diff --git a/src/vm/commands/flatten.ts b/src/vm/commands/flatten.ts new file mode 100644 index 0000000..45c47e0 --- /dev/null +++ b/src/vm/commands/flatten.ts @@ -0,0 +1,60 @@ +import {Command, ParseContext, StrDestructured, StrTerm} from "./command.js"; +import {LexInput} from "../lexer.js"; +import {StrVM} from "../vm.js"; + +export type FlattenData = { + index?: StrTerm, +} + +/** + * [[a,b],c,[d,e]] -> flatten 0 -> [a,b,c,[d,e]] + */ +export class Flatten extends Command { + async attemptParse(context: ParseContext): Promise { + return { + index: await context.popOptionalTerm(), + } + } + + getDisplayName(): string { + return 'flatten' + } + + isParseCandidate(token: LexInput): boolean { + return this.isKeyword(token, 'flatten') + } + + async execute(vm: StrVM, data: FlattenData): Promise { + return vm.replaceContextMatchingTerm(ctx => ({ + destructured: async (parts: StrDestructured['value']) => { + let index: number|undefined + if ( data.index ) { + index = ctx.resolveInt(data.index) + } + + const newParts: StrDestructured['value'] = [] + for ( let i = 0; i < parts.length; i += 1 ) { + const part = parts[i]! + + if ( typeof index !== 'undefined' && index !== i ) { + // We're targeting a specific index, and we're not it -- so preserve the value. + newParts.push(part) + continue + } + + if ( part.value.term !== 'destructured' ) { + // This item is not a nested destructured, so nothing to flatten. + newParts.push(part) + continue + } + + for ( const child of part.value.value ) { + newParts.push(child) + } + } + + return newParts + }, + })) + } +} diff --git a/src/vm/commands/index.ts b/src/vm/commands/index.ts index 325d40d..ca45ebb 100644 --- a/src/vm/commands/index.ts +++ b/src/vm/commands/index.ts @@ -53,6 +53,7 @@ import {Chunk} from "./chunk.js"; import {Script} from "./script.js"; import {Take} from "./take.js"; import {Group} from "./group.js"; +import {Flatten} from "./flatten.js"; export type Commands = Command[] export const commands: Commands = [ @@ -68,6 +69,7 @@ export const commands: Commands = [ new Edit, new Enclose, new Exit, + new Flatten, new From, new Group, new Help, diff --git a/src/vm/commands/missing.ts b/src/vm/commands/missing.ts index 698fd2b..97d4930 100644 --- a/src/vm/commands/missing.ts +++ b/src/vm/commands/missing.ts @@ -20,9 +20,16 @@ export class Missing extends Command<{ find: StrTerm }> { execute(vm: StrVM, data: { find: StrTerm }): Awaitable { return vm.replaceContextMatchingTerm(ctx => ({ - string: sub => sub.includes(ctx.resolveString(data.find)) ? '' : sub, - destructuredOfStrings: parts => parts.filter(part => - !part.includes(ctx.resolveString(data.find))), + string: sub => + ctx.applyStringOrRex(data.find, { + string: find => !sub.includes(find) ? sub : '', + rex: find => !find.test(sub) ? sub : '', + }), + destructuredOfStrings: parts => + ctx.applyStringOrRex(data.find, { + string: find => parts.filter(part => !part.includes(find)), + rex: find => parts.filter(part => !find.test(part)), + }), })) } } diff --git a/src/vm/lexer.ts b/src/vm/lexer.ts index 3b1c033..63e888c 100644 --- a/src/vm/lexer.ts +++ b/src/vm/lexer.ts @@ -23,6 +23,7 @@ export const tokenIsLVal = (input: LexInput): boolean => export class Lexer extends BehaviorSubject { private isEscape: boolean = false private inComment: boolean = false + private inRex: boolean = false private inQuote?: '"'|"'" private tokenAccumulator: string = '' @@ -77,7 +78,7 @@ export class Lexer extends BehaviorSubject { } // We got a statement terminator - if ( (c === ';' || c === '\n') && !this.inQuote ) { + if ( (c === ';' || c === '\n') && !this.inQuote && !this.inRex ) { if ( this.tokenAccumulator ) { await this.emitToken('terminator') } @@ -87,7 +88,7 @@ export class Lexer extends BehaviorSubject { } // Whitespace separates tokens - if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote ) { + if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote && !this.inRex ) { if ( this.tokenAccumulator ) { await this.emitToken('whitespace') } @@ -95,7 +96,7 @@ export class Lexer extends BehaviorSubject { } // Comments start with -- - if ( this.tokenAccumulator === '-' && c === '-' && !this.inQuote ) { + if ( this.tokenAccumulator === '-' && c === '-' && !this.inQuote && !this.inRex ) { this.tokenAccumulator = '' this.inComment = true continue @@ -103,17 +104,32 @@ export class Lexer extends BehaviorSubject { // We are either starting or ending an unescaped matching quote. // For now, only parse single quotes. Makes it nicer to type " in commands. - if ( c === `'` ) { + if ( c === `'` && !this.inRex ) { if ( c === this.inQuote ) { this.inQuote = undefined await this.emitToken('quote', true) continue - } else if ( !this.inQuote ) { + } else if ( !this.inQuote && !this.tokenAccumulator ) { this.inQuote = c continue } } + // We are either starting or ending an unescaped regular expression literal. + if ( c === '/' && !this.inQuote ) { + if ( !this.inRex && !this.tokenAccumulator ) { + this.inRex = true + this.tokenAccumulator += c // include the slashes so we can parse them later + continue + } else if ( this.inRex ) { + // TODO: currently, this doesn't support tail modifiers (e.g. /./gi) + this.inRex = false + this.tokenAccumulator += c // include the slashes so we can parse them later + await this.emitToken('rex') + continue + } + } + this.tokenAccumulator += c } } diff --git a/src/vm/output.ts b/src/vm/output.ts index 6c23af8..0d76970 100644 --- a/src/vm/output.ts +++ b/src/vm/output.ts @@ -83,6 +83,10 @@ export const getSubjectDisplay = (sub: StrRVal, prefix: string = '', firstLinePr annotated += prefix + `\n│ ${sub.value}` } + if ( sub.term === 'rex' ) { + annotated += prefix + `\n│ /${sub.value}/${sub.flags}` + } + if ( sub.term === 'destructured' ) { const padLength = `${sub.value.length}`.length annotated += '\n' + sub.value diff --git a/src/vm/parse.ts b/src/vm/parse.ts index 152d83e..949a666 100644 --- a/src/vm/parse.ts +++ b/src/vm/parse.ts @@ -13,4 +13,5 @@ export class UnexpectedEndOfInputError extends ParseError {} export class UnexpectedEndofStatementError extends ParseError {} export class ExpectedEndOfInputError extends InvalidCommandError {} export class InvalidVariableNameError extends ParseError {} +export class InvalidRegularExpressionLiteralError extends ParseError {} export class InvalidSubcontextError extends ParseError {} diff --git a/src/vm/parser.ts b/src/vm/parser.ts index 62045ec..26fcece 100644 --- a/src/vm/parser.ts +++ b/src/vm/parser.ts @@ -35,6 +35,7 @@ export class Parser extends BehaviorSubject> { async handleParseError(error: Error) { if ( error instanceof ParseError ) { this.logger.error(`(${error.constructor.name}) ${error.message}`) + this.logger.debug(error) return } diff --git a/src/vm/vm.ts b/src/vm/vm.ts index 3de4ff0..d2754fc 100644 --- a/src/vm/vm.ts +++ b/src/vm/vm.ts @@ -5,7 +5,7 @@ import { StrLVal, StrRVal, StrTerm, TypeError, unwrapDestructured, - unwrapInt, + unwrapInt, unwrapRex, unwrapString, wrapDestructured, wrapInt, wrapString } from "./commands/command.js"; @@ -27,7 +27,20 @@ export class Scope { ) {} resolve(lval: StrLVal): StrRVal|undefined { - return this.entries[lval.name] || this.parent?.resolve(lval) + const entry = this.entries[lval.name] + if ( entry ) { + if ( typeof lval.index !== 'undefined' ) { + if ( entry.term !== 'destructured' ) { + throw new TypeError('Cannot access index ' + lval.index + ' on lval ' + lval.name + ' (is not destructured)') + } + + return entry.value[lval.index]?.value + } + + return entry + } + + return this.parent?.resolve(lval) } setOrShadowValue(lval: StrLVal, val: StrRVal): void { @@ -132,6 +145,11 @@ export class ExecutionError extends Error {} export class TermOperationError extends ExecutionError {} export class UndefinedTermError extends ExecutionError {} +export type StringOrRexOperator = { + string: (s: string) => Awaitable, + rex: (s: RegExp) => Awaitable, +} + export class ExecutionContext { private history: [StrRVal, Scope][] = [] private forwardHistory: [StrRVal, Scope][] = [] @@ -334,6 +352,20 @@ export class ExecutionContext { return unwrapString(this.resolveRequired(term)) } + resolveRex(term: StrTerm): RegExp { + return unwrapRex(this.resolveRequired(term)) + } + + resolveStringOrRex(term: StrTerm): string|RegExp { + const rval = this.resolveRequired(term) + return rval.term === 'rex' ? unwrapRex(rval) : unwrapString(rval) + } + + async applyStringOrRex(term: StrTerm, op: StringOrRexOperator): Promise { + const rval = this.resolveStringOrRex(term) + return (typeof rval === 'string') ? op.string(rval) : op.rex(rval) + } + resolveDestructured(term: StrTerm) { return unwrapDestructured(this.resolveRequired(term)) }