Add flatten command, accessing destructured elements by index, WIP support for regular expressions

This commit is contained in:
2026-04-20 21:56:02 -05:00
parent fa851b680e
commit 37c27e5d0b
11 changed files with 206 additions and 21 deletions

View File

@@ -301,6 +301,10 @@ Example: `[foo, bar, foo]` -> `unique` -> `[foo, bar]`
Interleave the current destructured subject with the given other destructured subject. Interleave the current destructured subject with the given other destructured subject.
Example: Say `$a` is `[1, 2, 3]` and subject is `[a, b, c]` -> `zip $a` -> `[a, 1, b, 2, c, 3]` Example: Say `$a` is `[1, 2, 3]` and subject is `[a, b, c]` -> `zip $a` -> `[a, 1, b, 2, c, 3]`
#### `flatten [<index>]`
Collapse a nested destructured string down a single level, optionally limiting to a specific `index`.
Example: `[[a,b], c, [d,e]]` -> `flatten 2` -> `[[a,b], c, d, e]`
### Working with Variables ### Working with Variables
@@ -310,6 +314,8 @@ Assign a variable using the standard syntax: `$a = 'mystr'`
You can then use them anywhere you may use a string: `split $a` You can then use them anywhere you may use a string: `split $a`
If you variable contains a `:: destructured`, you can access individual elements with their indices: `$a.0`, `$a.1`, &c.
There are also several commands for interactive with variables, defined below. There are also several commands for interactive with variables, defined below.
#### `to <var>` #### `to <var>`

View File

@@ -2,7 +2,7 @@ import {createHash} from 'node:crypto';
import {LexInput, LexToken, tokenIsLVal} from '../lexer.js' import {LexInput, LexToken, tokenIsLVal} from '../lexer.js'
import { import {
Executable, Executable,
ExpectedEndOfInputError, InvalidSubcontextError, ExpectedEndOfInputError, InvalidRegularExpressionLiteralError, InvalidSubcontextError,
InvalidVariableNameError, InvalidVariableNameError,
IsNotKeywordError, IsNotKeywordError,
UnexpectedEndOfInputError, UnexpectedEndofStatementError UnexpectedEndOfInputError, UnexpectedEndofStatementError
@@ -10,10 +10,11 @@ import {
import {Awaitable, ElementType, hasOwnProperty} from "../../util/types.js"; import {Awaitable, ElementType, hasOwnProperty} from "../../util/types.js";
import {StrVM} from "../vm.js"; import {StrVM} from "../vm.js";
import os from "node:os"; import os from "node:os";
import {log} from "../../log.js";
export class TypeError extends Error {} export class TypeError extends Error {}
export type StrLVal = { term: 'variable', name: string } export type StrLVal = { term: 'variable', name: string, index?: number }
export const isStrLVal = (val: unknown): val is StrLVal => export const isStrLVal = (val: unknown): val is StrLVal =>
!!(typeof val === 'object' !!(typeof val === 'object'
@@ -41,6 +42,8 @@ export type StrString = { term: 'string', value: string, literal?: true }
export type StrInt = { term: 'int', value: number } export type StrInt = { term: 'int', value: number }
export type StrRex = { term: 'rex', value: string, flags: string }
export type StrLamba = { export type StrLamba = {
term: 'lambda', term: 'lambda',
value: { value: {
@@ -49,7 +52,7 @@ export type StrLamba = {
}, },
} }
export type StrRVal = StrString | StrInt | StrDestructured | StrLamba export type StrRVal = StrString | StrInt | StrDestructured | StrLamba | StrRex
export type StrDestructuredTable = { export type StrDestructuredTable = {
term: 'destructured', term: 'destructured',
@@ -91,6 +94,10 @@ export const hashStrRVal = (val: StrRVal): string => {
return toHex(`s:int:${val.value}`) return toHex(`s:int:${val.value}`)
} }
if ( val.term === 'rex' ) {
return toHex(`s:rex:${val.value}/${val.flags}`)
}
if ( val.term === 'lambda' ) { if ( val.term === 'lambda' ) {
throw new Error('Cannot hash lambda') // todo throw new Error('Cannot hash lambda') // todo
} }
@@ -110,20 +117,28 @@ export const isStrTerm = (val: unknown): val is StrTerm =>
&& hasOwnProperty(val, 'value'))) && hasOwnProperty(val, 'value')))
export const isStrRVal = (term: StrTerm): term is StrRVal => export const isStrRVal = (term: StrTerm): term is StrRVal =>
term.term === 'string' || term.term === 'int' || term.term === 'destructured' || term.term === 'lambda' ['string', 'int', 'destructured', 'lambda', 'rex'].includes(term.term)
export const unwrapString = (term: StrRVal): string => { export const unwrapString = (term: StrRVal): string => {
if ( term.term === 'int' ) { if ( term.term === 'int' ) {
return String(term.value) return String(term.value)
} }
if ( term.term === 'destructured' || term.term === 'lambda' ) { if ( term.term === 'destructured' || term.term === 'lambda' || term.term === 'rex' ) {
throw new TypeError(`Found unexpected ${term.term} (expected: string|int)`) throw new TypeError(`Found unexpected ${term.term} (expected: string|int)`)
} }
return term.value return term.value
} }
export const unwrapRex = (term: StrRVal): RegExp => {
if ( term.term !== 'rex' ) {
throw new TypeError(`Found unexpected ${term.term} (expected: rex)`)
}
return new RegExp(term.value, term.flags)
}
export const coerceString = (term: StrRVal): string => { export const coerceString = (term: StrRVal): string => {
if ( term.term === 'destructured' ) { if ( term.term === 'destructured' ) {
return joinDestructured(term.value) return joinDestructured(term.value)
@@ -176,6 +191,8 @@ export interface ParseSubContext {
} }
export class ParseContext { export class ParseContext {
private log = log.getStreamLogger('parseContext')
constructor( constructor(
private inputs: LexToken[], private inputs: LexToken[],
private childParser: (tokens: LexToken[]) => Awaitable<[Executable<CommandData>, LexToken[]]>, private childParser: (tokens: LexToken[]) => Awaitable<[Executable<CommandData>, LexToken[]]>,
@@ -246,11 +263,38 @@ export class ParseContext {
private parseInputToTerm(input: LexInput): StrTerm { private parseInputToTerm(input: LexInput): StrTerm {
// Check if the token is a literal variable name: // Check if the token is a literal variable name:
if ( !input.literal && input.value.startsWith('$') ) { if ( !input.literal && input.value.startsWith('$') ) {
if ( !input.value.match(/^\$[a-zA-Z0-9_]+$/) ) { if ( !input.value.match(/^\$[a-zA-Z0-9_]+(?:\.\d+)?$/) ) {
throw new InvalidVariableNameError(`Invalid variable name: ${input.value}`) throw new InvalidVariableNameError(`Invalid variable name: ${input.value}`)
} }
return { term: 'variable', name: input.value } const parts = input.value.split('.')
let index: number|undefined = undefined
if ( parts.length > 1 ) {
index = parseInt(parts[1], 10)
}
return { term: 'variable', name: parts[0], index }
}
// Check if the token is a literal regular expression:
if ( !input.literal && input.value.startsWith('/') ) {
const parts = input.value
.substring(1) // trim the leading /
.split('')
.reverse()
let flags = ''
while ( parts.length && parts[0] !== '/' ) {
flags += parts.shift()
}
if ( !parts.length ) {
throw new InvalidRegularExpressionLiteralError(`Invalid regular expression literal: ${input.value}`)
}
parts.shift() // trim the trailing /
const pattern = parts.reverse().join('')
return { term: 'rex', value: pattern, flags }
} }
// Check if the token is a valid integer: // Check if the token is a valid integer:
@@ -379,6 +423,8 @@ export class ParseContext {
})) }))
} }
this.log.debug({ lambdaBody: sc.inputs })
// Now, the remainder of the subcontext inputs should be a series of executables // Now, the remainder of the subcontext inputs should be a series of executables
// separated by `terminator` tokens -- e.g. (split _; join |), so parse executables // separated by `terminator` tokens -- e.g. (split _; join |), so parse executables
// from the subcontext until it is empty: // from the subcontext until it is empty:
@@ -457,7 +503,10 @@ export class ParseContext {
value: last.value.substring(0, last.value.length - 1), value: last.value.substring(0, last.value.length - 1),
} }
} }
sc.inputs.push(last) if ( last.type !== 'input' || last.value || last.literal ) {
// Avoid pushing an empty input if the last input contained ONLY the empty right-paren
sc.inputs.push(last)
}
return [sc, tokenIdx] return [sc, tokenIdx]
} }

View File

@@ -20,9 +20,16 @@ export class Contains extends Command<{ find: StrTerm }> {
execute(vm: StrVM, data: { find: StrTerm }): Awaitable<StrVM> { execute(vm: StrVM, data: { find: StrTerm }): Awaitable<StrVM> {
return vm.replaceContextMatchingTerm(ctx => ({ return vm.replaceContextMatchingTerm(ctx => ({
string: sub => sub.includes(ctx.resolveString(data.find)) ? sub : '', string: sub =>
destructuredOfStrings: parts => parts.filter(part => ctx.applyStringOrRex(data.find, {
part.includes(ctx.resolveString(data.find))), string: find => sub.includes(find) ? sub : '',
rex: find => find.test(sub) ? sub : '',
}),
destructuredOfStrings: parts =>
ctx.applyStringOrRex(data.find, {
string: find => parts.filter(part => part.includes(find)),
rex: find => parts.filter(part => find.test(part)),
}),
})) }))
} }
} }

View File

@@ -0,0 +1,60 @@
import {Command, ParseContext, StrDestructured, StrTerm} from "./command.js";
import {LexInput} from "../lexer.js";
import {StrVM} from "../vm.js";
export type FlattenData = {
index?: StrTerm,
}
/**
* [[a,b],c,[d,e]] -> flatten 0 -> [a,b,c,[d,e]]
*/
export class Flatten extends Command<FlattenData> {
async attemptParse(context: ParseContext): Promise<FlattenData> {
return {
index: await context.popOptionalTerm(),
}
}
getDisplayName(): string {
return 'flatten'
}
isParseCandidate(token: LexInput): boolean {
return this.isKeyword(token, 'flatten')
}
async execute(vm: StrVM, data: FlattenData): Promise<StrVM> {
return vm.replaceContextMatchingTerm(ctx => ({
destructured: async (parts: StrDestructured['value']) => {
let index: number|undefined
if ( data.index ) {
index = ctx.resolveInt(data.index)
}
const newParts: StrDestructured['value'] = []
for ( let i = 0; i < parts.length; i += 1 ) {
const part = parts[i]!
if ( typeof index !== 'undefined' && index !== i ) {
// We're targeting a specific index, and we're not it -- so preserve the value.
newParts.push(part)
continue
}
if ( part.value.term !== 'destructured' ) {
// This item is not a nested destructured, so nothing to flatten.
newParts.push(part)
continue
}
for ( const child of part.value.value ) {
newParts.push(child)
}
}
return newParts
},
}))
}
}

View File

@@ -53,6 +53,7 @@ import {Chunk} from "./chunk.js";
import {Script} from "./script.js"; import {Script} from "./script.js";
import {Take} from "./take.js"; import {Take} from "./take.js";
import {Group} from "./group.js"; import {Group} from "./group.js";
import {Flatten} from "./flatten.js";
export type Commands = Command<CommandData>[] export type Commands = Command<CommandData>[]
export const commands: Commands = [ export const commands: Commands = [
@@ -68,6 +69,7 @@ export const commands: Commands = [
new Edit, new Edit,
new Enclose, new Enclose,
new Exit, new Exit,
new Flatten,
new From, new From,
new Group, new Group,
new Help, new Help,

View File

@@ -20,9 +20,16 @@ export class Missing extends Command<{ find: StrTerm }> {
execute(vm: StrVM, data: { find: StrTerm }): Awaitable<StrVM> { execute(vm: StrVM, data: { find: StrTerm }): Awaitable<StrVM> {
return vm.replaceContextMatchingTerm(ctx => ({ return vm.replaceContextMatchingTerm(ctx => ({
string: sub => sub.includes(ctx.resolveString(data.find)) ? '' : sub, string: sub =>
destructuredOfStrings: parts => parts.filter(part => ctx.applyStringOrRex(data.find, {
!part.includes(ctx.resolveString(data.find))), string: find => !sub.includes(find) ? sub : '',
rex: find => !find.test(sub) ? sub : '',
}),
destructuredOfStrings: parts =>
ctx.applyStringOrRex(data.find, {
string: find => parts.filter(part => !part.includes(find)),
rex: find => parts.filter(part => !find.test(part)),
}),
})) }))
} }
} }

View File

@@ -23,6 +23,7 @@ export const tokenIsLVal = (input: LexInput): boolean =>
export class Lexer extends BehaviorSubject<LexToken> { export class Lexer extends BehaviorSubject<LexToken> {
private isEscape: boolean = false private isEscape: boolean = false
private inComment: boolean = false private inComment: boolean = false
private inRex: boolean = false
private inQuote?: '"'|"'" private inQuote?: '"'|"'"
private tokenAccumulator: string = '' private tokenAccumulator: string = ''
@@ -77,7 +78,7 @@ export class Lexer extends BehaviorSubject<LexToken> {
} }
// We got a statement terminator // We got a statement terminator
if ( (c === ';' || c === '\n') && !this.inQuote ) { if ( (c === ';' || c === '\n') && !this.inQuote && !this.inRex ) {
if ( this.tokenAccumulator ) { if ( this.tokenAccumulator ) {
await this.emitToken('terminator') await this.emitToken('terminator')
} }
@@ -87,7 +88,7 @@ export class Lexer extends BehaviorSubject<LexToken> {
} }
// Whitespace separates tokens // Whitespace separates tokens
if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote ) { if ( (c === ' ' || c === '\t' || c === '\r') && !this.inQuote && !this.inRex ) {
if ( this.tokenAccumulator ) { if ( this.tokenAccumulator ) {
await this.emitToken('whitespace') await this.emitToken('whitespace')
} }
@@ -95,7 +96,7 @@ export class Lexer extends BehaviorSubject<LexToken> {
} }
// Comments start with -- // Comments start with --
if ( this.tokenAccumulator === '-' && c === '-' && !this.inQuote ) { if ( this.tokenAccumulator === '-' && c === '-' && !this.inQuote && !this.inRex ) {
this.tokenAccumulator = '' this.tokenAccumulator = ''
this.inComment = true this.inComment = true
continue continue
@@ -103,17 +104,32 @@ export class Lexer extends BehaviorSubject<LexToken> {
// We are either starting or ending an unescaped matching quote. // We are either starting or ending an unescaped matching quote.
// For now, only parse single quotes. Makes it nicer to type " in commands. // For now, only parse single quotes. Makes it nicer to type " in commands.
if ( c === `'` ) { if ( c === `'` && !this.inRex ) {
if ( c === this.inQuote ) { if ( c === this.inQuote ) {
this.inQuote = undefined this.inQuote = undefined
await this.emitToken('quote', true) await this.emitToken('quote', true)
continue continue
} else if ( !this.inQuote ) { } else if ( !this.inQuote && !this.tokenAccumulator ) {
this.inQuote = c this.inQuote = c
continue continue
} }
} }
// We are either starting or ending an unescaped regular expression literal.
if ( c === '/' && !this.inQuote ) {
if ( !this.inRex && !this.tokenAccumulator ) {
this.inRex = true
this.tokenAccumulator += c // include the slashes so we can parse them later
continue
} else if ( this.inRex ) {
// TODO: currently, this doesn't support tail modifiers (e.g. /./gi)
this.inRex = false
this.tokenAccumulator += c // include the slashes so we can parse them later
await this.emitToken('rex')
continue
}
}
this.tokenAccumulator += c this.tokenAccumulator += c
} }
} }

View File

@@ -83,6 +83,10 @@ export const getSubjectDisplay = (sub: StrRVal, prefix: string = '', firstLinePr
annotated += prefix + `\n│ ${sub.value}` annotated += prefix + `\n│ ${sub.value}`
} }
if ( sub.term === 'rex' ) {
annotated += prefix + `\n│ /${sub.value}/${sub.flags}`
}
if ( sub.term === 'destructured' ) { if ( sub.term === 'destructured' ) {
const padLength = `${sub.value.length}`.length const padLength = `${sub.value.length}`.length
annotated += '\n' + sub.value annotated += '\n' + sub.value

View File

@@ -13,4 +13,5 @@ export class UnexpectedEndOfInputError extends ParseError {}
export class UnexpectedEndofStatementError extends ParseError {} export class UnexpectedEndofStatementError extends ParseError {}
export class ExpectedEndOfInputError extends InvalidCommandError {} export class ExpectedEndOfInputError extends InvalidCommandError {}
export class InvalidVariableNameError extends ParseError {} export class InvalidVariableNameError extends ParseError {}
export class InvalidRegularExpressionLiteralError extends ParseError {}
export class InvalidSubcontextError extends ParseError {} export class InvalidSubcontextError extends ParseError {}

View File

@@ -35,6 +35,7 @@ export class Parser extends BehaviorSubject<Executable<CommandData>> {
async handleParseError(error: Error) { async handleParseError(error: Error) {
if ( error instanceof ParseError ) { if ( error instanceof ParseError ) {
this.logger.error(`(${error.constructor.name}) ${error.message}`) this.logger.error(`(${error.constructor.name}) ${error.message}`)
this.logger.debug(error)
return return
} }

View File

@@ -5,7 +5,7 @@ import {
StrLVal, StrLVal,
StrRVal, StrRVal,
StrTerm, TypeError, unwrapDestructured, StrTerm, TypeError, unwrapDestructured,
unwrapInt, unwrapInt, unwrapRex,
unwrapString, wrapDestructured, wrapInt, unwrapString, wrapDestructured, wrapInt,
wrapString wrapString
} from "./commands/command.js"; } from "./commands/command.js";
@@ -27,7 +27,20 @@ export class Scope {
) {} ) {}
resolve(lval: StrLVal): StrRVal|undefined { resolve(lval: StrLVal): StrRVal|undefined {
return this.entries[lval.name] || this.parent?.resolve(lval) const entry = this.entries[lval.name]
if ( entry ) {
if ( typeof lval.index !== 'undefined' ) {
if ( entry.term !== 'destructured' ) {
throw new TypeError('Cannot access index ' + lval.index + ' on lval ' + lval.name + ' (is not destructured)')
}
return entry.value[lval.index]?.value
}
return entry
}
return this.parent?.resolve(lval)
} }
setOrShadowValue(lval: StrLVal, val: StrRVal): void { setOrShadowValue(lval: StrLVal, val: StrRVal): void {
@@ -132,6 +145,11 @@ export class ExecutionError extends Error {}
export class TermOperationError extends ExecutionError {} export class TermOperationError extends ExecutionError {}
export class UndefinedTermError extends ExecutionError {} export class UndefinedTermError extends ExecutionError {}
export type StringOrRexOperator<TReturn> = {
string: (s: string) => Awaitable<TReturn>,
rex: (s: RegExp) => Awaitable<TReturn>,
}
export class ExecutionContext { export class ExecutionContext {
private history: [StrRVal, Scope][] = [] private history: [StrRVal, Scope][] = []
private forwardHistory: [StrRVal, Scope][] = [] private forwardHistory: [StrRVal, Scope][] = []
@@ -334,6 +352,20 @@ export class ExecutionContext {
return unwrapString(this.resolveRequired(term)) return unwrapString(this.resolveRequired(term))
} }
resolveRex(term: StrTerm): RegExp {
return unwrapRex(this.resolveRequired(term))
}
resolveStringOrRex(term: StrTerm): string|RegExp {
const rval = this.resolveRequired(term)
return rval.term === 'rex' ? unwrapRex(rval) : unwrapString(rval)
}
async applyStringOrRex<TReturn>(term: StrTerm, op: StringOrRexOperator<TReturn>): Promise<TReturn> {
const rval = this.resolveStringOrRex(term)
return (typeof rval === 'string') ? op.string(rval) : op.rex(rval)
}
resolveDestructured(term: StrTerm) { resolveDestructured(term: StrTerm) {
return unwrapDestructured(this.resolveRequired(term)) return unwrapDestructured(this.resolveRequired(term))
} }