Add flatten command, accessing destructured elements by index, WIP support for regular expressions

This commit is contained in:
2026-04-20 21:56:02 -05:00
parent fa851b680e
commit 37c27e5d0b
11 changed files with 206 additions and 21 deletions

View File

@@ -301,6 +301,10 @@ Example: `[foo, bar, foo]` -> `unique` -> `[foo, bar]`
Interleave the current destructured subject with the given other destructured subject.
Example: Say `$a` is `[1, 2, 3]` and subject is `[a, b, c]` -> `zip $a` -> `[a, 1, b, 2, c, 3]`
#### `flatten [<index>]`
Collapse a nested destructured string down a single level, optionally limiting to a specific `index`.
Example: `[[a,b], c, [d,e]]` -> `flatten 2` -> `[[a,b], c, d, e]`
### Working with Variables
@@ -310,6 +314,8 @@ Assign a variable using the standard syntax: `$a = 'mystr'`
You can then use them anywhere you may use a string: `split $a`
If you variable contains a `:: destructured`, you can access individual elements with their indices: `$a.0`, `$a.1`, &c.
There are also several commands for interactive with variables, defined below.
#### `to <var>`

View File

@@ -2,7 +2,7 @@ import {createHash} from 'node:crypto';
import {LexInput, LexToken, tokenIsLVal} from '../lexer.js'
import {
Executable,
ExpectedEndOfInputError, InvalidSubcontextError,
ExpectedEndOfInputError, InvalidRegularExpressionLiteralError, InvalidSubcontextError,
InvalidVariableNameError,
IsNotKeywordError,
UnexpectedEndOfInputError, UnexpectedEndofStatementError
@@ -10,10 +10,11 @@ import {
import {Awaitable, ElementType, hasOwnProperty} from "../../util/types.js";
import {StrVM} from "../vm.js";
import os from "node:os";
import {log} from "../../log.js";
export class TypeError extends Error {}
export type StrLVal = { term: 'variable', name: string }
export type StrLVal = { term: 'variable', name: string, index?: number }
export const isStrLVal = (val: unknown): val is StrLVal =>
!!(typeof val === 'object'
@@ -41,6 +42,8 @@ export type StrString = { term: 'string', value: string, literal?: true }
export type StrInt = { term: 'int', value: number }
export type StrRex = { term: 'rex', value: string, flags: string }
export type StrLamba = {
term: 'lambda',
value: {
@@ -49,7 +52,7 @@ export type StrLamba = {
},
}
export type StrRVal = StrString | StrInt | StrDestructured | StrLamba
export type StrRVal = StrString | StrInt | StrDestructured | StrLamba | StrRex
export type StrDestructuredTable = {
term: 'destructured',
@@ -91,6 +94,10 @@ export const hashStrRVal = (val: StrRVal): string => {
return toHex(`s:int:${val.value}`)
}
if ( val.term === 'rex' ) {
return toHex(`s:rex:${val.value}/${val.flags}`)
}
if ( val.term === 'lambda' ) {
throw new Error('Cannot hash lambda') // todo
}
@@ -110,20 +117,28 @@ export const isStrTerm = (val: unknown): val is StrTerm =>
&& hasOwnProperty(val, 'value')))
export const isStrRVal = (term: StrTerm): term is StrRVal =>
term.term === 'string' || term.term === 'int' || term.term === 'destructured' || term.term === 'lambda'
['string', 'int', 'destructured', 'lambda', 'rex'].includes(term.term)
export const unwrapString = (term: StrRVal): string => {
if ( term.term === 'int' ) {
return String(term.value)
}
if ( term.term === 'destructured' || term.term === 'lambda' ) {
if ( term.term === 'destructured' || term.term === 'lambda' || term.term === 'rex' ) {
throw new TypeError(`Found unexpected ${term.term} (expected: string|int)`)
}
return term.value
}
export const unwrapRex = (term: StrRVal): RegExp => {
if ( term.term !== 'rex' ) {
throw new TypeError(`Found unexpected ${term.term} (expected: rex)`)
}
return new RegExp(term.value, term.flags)
}
export const coerceString = (term: StrRVal): string => {
if ( term.term === 'destructured' ) {
return joinDestructured(term.value)
@@ -176,6 +191,8 @@ export interface ParseSubContext {
}
export class ParseContext {
private log = log.getStreamLogger('parseContext')
constructor(
private inputs: LexToken[],
private childParser: (tokens: LexToken[]) => Awaitable<[Executable<CommandData>, LexToken[]]>,
@@ -246,11 +263,38 @@ export class ParseContext {
private parseInputToTerm(input: LexInput): StrTerm {
// Check if the token is a literal variable name:
if ( !input.literal && input.value.startsWith('$') ) {
if ( !input.value.match(/^\$[a-zA-Z0-9_]+$/) ) {
if ( !input.value.match(/^\$[a-zA-Z0-9_]+(?:\.\d+)?$/) ) {
throw new InvalidVariableNameError(`Invalid variable name: ${input.value}`)
}
return { term: 'variable', name: input.value }
const parts = input.value.split('.')
let index: number|undefined = undefined
if ( parts.length > 1 ) {
index = parseInt(parts[1], 10)
}
return { term: 'variable', name: parts[0], index }
}
// Check if the token is a literal regular expression:
if ( !input.literal && input.value.startsWith('/') ) {
const parts = input.value
.substring(1) // trim the leading /
.split('')
.reverse()
let flags = ''
while ( parts.length && parts[0] !== '/' ) {
flags += parts.shift()
}
if ( !parts.length ) {
throw new InvalidRegularExpressionLiteralError(`Invalid regular expression literal: ${input.value}`)
}
parts.shift() // trim the trailing /
const pattern = parts.reverse().join('')
return { term: 'rex', value: pattern, flags }
}
// Check if the token is a valid integer:
@@ -379,6 +423,8 @@ export class ParseContext {
}))
}
this.log.debug({ lambdaBody: sc.inputs })
// Now, the remainder of the subcontext inputs should be a series of executables
// separated by `terminator` tokens -- e.g. (split _; join |), so parse executables
// from the subcontext until it is empty:
@@ -457,7 +503,10 @@ export class ParseContext {
value: last.value.substring(0, last.value.length - 1),
}
}
sc.inputs.push(last)
if ( last.type !== 'input' || last.value || last.literal ) {
// Avoid pushing an empty input if the last input contained ONLY the empty right-paren
sc.inputs.push(last)
}
return [sc, tokenIdx]
}

View File

@@ -20,9 +20,16 @@ export class Contains extends Command<{ find: StrTerm }> {
execute(vm: StrVM, data: { find: StrTerm }): Awaitable<StrVM> {
return vm.replaceContextMatchingTerm(ctx => ({
string: sub => sub.includes(ctx.resolveString(data.find)) ? sub : '',
destructuredOfStrings: parts => parts.filter(part =>
part.includes(ctx.resolveString(data.find))),
string: sub =>
ctx.applyStringOrRex(data.find, {
string: find => sub.includes(find) ? sub : '',
rex: find => find.test(sub) ? sub : '',
}),
destructuredOfStrings: parts =>
ctx.applyStringOrRex(data.find, {
string: find => parts.filter(part => part.includes(find)),
rex: find => parts.filter(part => find.test(part)),
}),
}))
}
}

View File

@@ -0,0 +1,60 @@
import {Command, ParseContext, StrDestructured, StrTerm} from "./command.js";
import {LexInput} from "../lexer.js";
import {StrVM} from "../vm.js";
export type FlattenData = {
index?: StrTerm,
}
/**
* [[a,b],c,[d,e]] -> flatten 0 -> [a,b,c,[d,e]]
*/
export class Flatten extends Command<FlattenData> {
async attemptParse(context: ParseContext): Promise<FlattenData> {
return {
index: await context.popOptionalTerm(),
}
}
getDisplayName(): string {
return 'flatten'
}
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'flatten')
}
async execute(vm: StrVM, data: FlattenData): Promise<StrVM> {
return vm.replaceContextMatchingTerm(ctx => ({
destructured: async (parts: StrDestructured['value']) => {
let index: number|undefined
if ( data.index ) {
index = ctx.resolveInt(data.index)
}
const newParts: StrDestructured['value'] = []
for ( let i = 0; i < parts.length; i += 1 ) {
const part = parts[i]!
if ( typeof index !== 'undefined' && index !== i ) {
// We're targeting a specific index, and we're not it -- so preserve the value.
newParts.push(part)
continue
}
if ( part.value.term !== 'destructured' ) {
// This item is not a nested destructured, so nothing to flatten.
newParts.push(part)
continue
}
for ( const child of part.value.value ) {
newParts.push(child)
}
}
return newParts
},
}))
}
}

View File

@@ -53,6 +53,7 @@ import {Chunk} from "./chunk.js";
import {Script} from "./script.js";
import {Take} from "./take.js";
import {Group} from "./group.js";
import {Flatten} from "./flatten.js";
export type Commands = Command<CommandData>[]
export const commands: Commands = [
@@ -68,6 +69,7 @@ export const commands: Commands = [
new Edit,
new Enclose,
new Exit,
new Flatten,
new From,
new Group,
new Help,

View File

@@ -20,9 +20,16 @@ export class Missing extends Command<{ find: StrTerm }> {
execute(vm: StrVM, data: { find: StrTerm }): Awaitable<StrVM> {
return vm.replaceContextMatchingTerm(ctx => ({
string: sub => sub.includes(ctx.resolveString(data.find)) ? '' : sub,
destructuredOfStrings: parts => parts.filter(part =>
!part.includes(ctx.resolveString(data.find))),
string: sub =>
ctx.applyStringOrRex(data.find, {
string: find => !sub.includes(find) ? sub : '',
rex: find => !find.test(sub) ? sub : '',
}),
destructuredOfStrings: parts =>
ctx.applyStringOrRex(data.find, {
string: find => parts.filter(part => !part.includes(find)),
rex: find => parts.filter(part => !find.test(part)),
}),
}))
}
}

View File

@@ -23,6 +23,7 @@ export const tokenIsLVal = (input: LexInput): boolean =>
export class Lexer extends BehaviorSubject<LexToken> {
private isEscape: boolean = false
private inComment: boolean = false
private inRex: boolean = false
private inQuote?: '"'|"'"
private tokenAccumulator: string = ''
@@ -77,7 +78,7 @@ export class Lexer extends BehaviorSubject<LexToken> {
}
// We got a statement terminator
if ( (c === ';' || c === '\n') && !this.inQuote ) {
if ( (c === ';' || c === '\n') && !this.inQuote && !this.inRex ) {
if ( this.tokenAccumulator ) {
await this.emitToken('terminator')
}
@@ -87,7 +88,7 @@ export class Lexer extends BehaviorSubject<LexToken> {
}
// Whitespace separates tokens
if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote ) {
if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote && !this.inRex ) {
if ( this.tokenAccumulator ) {
await this.emitToken('whitespace')
}
@@ -95,7 +96,7 @@ export class Lexer extends BehaviorSubject<LexToken> {
}
// Comments start with --
if ( this.tokenAccumulator === '-' && c === '-' && !this.inQuote ) {
if ( this.tokenAccumulator === '-' && c === '-' && !this.inQuote && !this.inRex ) {
this.tokenAccumulator = ''
this.inComment = true
continue
@@ -103,17 +104,32 @@ export class Lexer extends BehaviorSubject<LexToken> {
// We are either starting or ending an unescaped matching quote.
// For now, only parse single quotes. Makes it nicer to type " in commands.
if ( c === `'` ) {
if ( c === `'` && !this.inRex ) {
if ( c === this.inQuote ) {
this.inQuote = undefined
await this.emitToken('quote', true)
continue
} else if ( !this.inQuote ) {
} else if ( !this.inQuote && !this.tokenAccumulator ) {
this.inQuote = c
continue
}
}
// We are either starting or ending an unescaped regular expression literal.
if ( c === '/' && !this.inQuote ) {
if ( !this.inRex && !this.tokenAccumulator ) {
this.inRex = true
this.tokenAccumulator += c // include the slashes so we can parse them later
continue
} else if ( this.inRex ) {
// TODO: currently, this doesn't support tail modifiers (e.g. /./gi)
this.inRex = false
this.tokenAccumulator += c // include the slashes so we can parse them later
await this.emitToken('rex')
continue
}
}
this.tokenAccumulator += c
}
}

View File

@@ -83,6 +83,10 @@ export const getSubjectDisplay = (sub: StrRVal, prefix: string = '', firstLinePr
annotated += prefix + `\n│ ${sub.value}`
}
if ( sub.term === 'rex' ) {
annotated += prefix + `\n│ /${sub.value}/${sub.flags}`
}
if ( sub.term === 'destructured' ) {
const padLength = `${sub.value.length}`.length
annotated += '\n' + sub.value

View File

@@ -13,4 +13,5 @@ export class UnexpectedEndOfInputError extends ParseError {}
export class UnexpectedEndofStatementError extends ParseError {}
export class ExpectedEndOfInputError extends InvalidCommandError {}
export class InvalidVariableNameError extends ParseError {}
export class InvalidRegularExpressionLiteralError extends ParseError {}
export class InvalidSubcontextError extends ParseError {}

View File

@@ -35,6 +35,7 @@ export class Parser extends BehaviorSubject<Executable<CommandData>> {
async handleParseError(error: Error) {
if ( error instanceof ParseError ) {
this.logger.error(`(${error.constructor.name}) ${error.message}`)
this.logger.debug(error)
return
}

View File

@@ -5,7 +5,7 @@ import {
StrLVal,
StrRVal,
StrTerm, TypeError, unwrapDestructured,
unwrapInt,
unwrapInt, unwrapRex,
unwrapString, wrapDestructured, wrapInt,
wrapString
} from "./commands/command.js";
@@ -27,7 +27,20 @@ export class Scope {
) {}
resolve(lval: StrLVal): StrRVal|undefined {
return this.entries[lval.name] || this.parent?.resolve(lval)
const entry = this.entries[lval.name]
if ( entry ) {
if ( typeof lval.index !== 'undefined' ) {
if ( entry.term !== 'destructured' ) {
throw new TypeError('Cannot access index ' + lval.index + ' on lval ' + lval.name + ' (is not destructured)')
}
return entry.value[lval.index]?.value
}
return entry
}
return this.parent?.resolve(lval)
}
setOrShadowValue(lval: StrLVal, val: StrRVal): void {
@@ -132,6 +145,11 @@ export class ExecutionError extends Error {}
export class TermOperationError extends ExecutionError {}
export class UndefinedTermError extends ExecutionError {}
export type StringOrRexOperator<TReturn> = {
string: (s: string) => Awaitable<TReturn>,
rex: (s: RegExp) => Awaitable<TReturn>,
}
export class ExecutionContext {
private history: [StrRVal, Scope][] = []
private forwardHistory: [StrRVal, Scope][] = []
@@ -334,6 +352,20 @@ export class ExecutionContext {
return unwrapString(this.resolveRequired(term))
}
resolveRex(term: StrTerm): RegExp {
return unwrapRex(this.resolveRequired(term))
}
resolveStringOrRex(term: StrTerm): string|RegExp {
const rval = this.resolveRequired(term)
return rval.term === 'rex' ? unwrapRex(rval) : unwrapString(rval)
}
async applyStringOrRex<TReturn>(term: StrTerm, op: StringOrRexOperator<TReturn>): Promise<TReturn> {
const rval = this.resolveStringOrRex(term)
return (typeof rval === 'string') ? op.string(rval) : op.rex(rval)
}
resolveDestructured(term: StrTerm) {
return unwrapDestructured(this.resolveRequired(term))
}