Add drop, sort commands, start tail command; implement support for destructuredOrLines match type; misc fixes

2026-03-02 22:20:18 -06:00
parent e5acc2e8b1
commit d5d69e58a4
10 changed files with 224 additions and 8 deletions
--- a/HELP.txt
+++ b/HELP.txt
@@ -109,6 +109,9 @@ on <line|word> <index> <command...>
    Run the given command on the nth line or word.
    Example: on line 2 prefix +
 drop <line|word|index> <index>
    Drops the nth line, word, or element from the subject.
 map <line|word> <start index> [to <end index>] [by <nth lines>] <command...>
    Map the subject line-wise or word-wise for the given range.
    Default is to map until the end of the string if `to` is not provided,
--- a/src/vm/commands/command.ts
+++ b/src/vm/commands/command.ts
@@ -170,7 +170,7 @@ export class ParseContext {
        }
        // Check if the token is a valid integer:
-        if ( /^-?[1-9][0-9]*$/.test(input.value) ) {
+        if ( /^-?[0-9]+$/.test(input.value) ) {
            return { term: 'int', value: parseInt(input.value, 10) }
        }
--- a/src/vm/commands/drop.ts
+++ b/src/vm/commands/drop.ts
@@ -0,0 +1,93 @@
 import {Command, ParseContext, StrTerm} from "./command.js";
 import {LexInput} from "../lexer.js";
 import {StrVM} from "../vm.js";
 import {Lines} from "./lines.js";
 import {Words} from "./words.js";
 import {Join} from "./join.js";
 export type DropData = {
    type: 'line'|'word'|'index',
    specific: StrTerm,
 }
 /**
 * Like `on`, this command has a few forms:
 *
 * drop word 3 | drop line 3
 *  Assume the subject is a string. Split into words|lines respectively, remove the 3rd index, and rejoin.
 *
 * drop 3 | drop index 3
 *  Assume the subject is a destructured. Drop the 3rd element.
 */
 export class Drop extends Command<DropData> {
    async attemptParse(context: ParseContext): Promise<DropData> {
        const next = context.peekTerm()
        if ( next?.term === 'int' || next?.term === 'variable' ) {
            return {
                type: 'index',
                specific: context.popTerm(),
            }
        }
        return {
            type: context.popKeywordInSet(['line', 'word', 'index']).value,
            specific: context.popTerm(),
        }
    }
    getDisplayName(): string {
        return 'drop'
    }
    isParseCandidate(token: LexInput): boolean {
        return this.isKeyword(token, 'drop')
    }
    async execute(vm: StrVM, data: DropData): Promise<StrVM> {
        // If the type is line|word, first destructure the subject accordingly:
        let rejoin = false
        if ( data.type === 'line' ) {
            vm = await (new Lines).execute(vm)
            rejoin = true
        } else if ( data.type === 'word' ) {
            vm = await (new Words).execute(vm)
            rejoin = true
        }
        // Then, drop the item at the given index in the destructured subject:
        vm = await vm.replaceContextMatchingTerm(ctx => ({
            destructured: async sub => {
                const idx = ctx.resolveInt(data.specific)
                sub = [...sub]
                // In word mode, all whitespace is preserved. So, we need to take the prefix
                // of the element that's about to be deleted and add it to the prefix of the
                // word right after it (example: "a\nb c" `drop word 1` should become "a\n c", not "a c"
                if ( data.type === 'word' && sub[idx]?.prefix && sub[idx + 1] ) {
                    sub[idx + 1].prefix = `${sub[idx].prefix}${sub[idx + 1].prefix || ''}`
                }
                // Remove the nth element.
                sub.splice(idx, 1)
                // In line mode, if we delete a line, we *don't* want the newline
                // prefixes to be preserved, as that would clear the contents of the line,
                // but leave an empty line in-place. The only edge case we need to account
                // for is if we removed the first line, we should remove the newline prefix
                // from the *new* first line:
                if ( data.type === 'line' && idx === 0 && sub.length > 0 ) {
                    sub[0].prefix = undefined
                }
                return sub
            },
        }))
        // If we previously split the value (i.e. for type = line|word), rejoin it:
        if ( rejoin ) {
            vm = await (new Join).execute(vm, {})
        }
        return vm
    }
 }
--- a/src/vm/commands/index.ts
+++ b/src/vm/commands/index.ts
@@ -41,12 +41,16 @@ import {Word} from "./word.js";
 import {On} from "./on.js";
 import {Each} from "./each.js";
 import {Words} from "./words.js";
 import {Drop} from "./drop.js";
 import {Sort} from "./sort.js";
 import {Tail} from "./tail.js";
 export type Commands = Command<CommandData>[]
 export const commands: Commands = [
    new Clear,
    new Contains,
    new Copy,
    new Drop,
    new Each,
    new Edit,
    new Enclose,
@@ -76,8 +80,10 @@ export const commands: Commands = [
    new RunFile,
    new Save,
    new Show,
    new Sort,
    new Split,
    new Suffix,
    new Tail,
    new To,
    new Trim,
    new Undo,
--- a/src/vm/commands/sort.ts
+++ b/src/vm/commands/sort.ts
@@ -0,0 +1,45 @@
 import {Command, ParseContext} from "./command.js";
 import {Awaitable} from "../../util/types.js";
 import {LexInput} from "../lexer.js";
 import {StrVM} from "../vm.js";
 export type SortData = {
    direction?: 'asc'|'desc'
 }
 export class Sort extends Command<SortData> {
    attemptParse(context: ParseContext): Awaitable<SortData> {
        return {
            direction: context.popOptionalKeywordInSet(['asc', 'desc'])?.value,
        }
    }
    getDisplayName(): string {
        return 'sort'
    }
    isParseCandidate(token: LexInput): boolean {
        return this.isKeyword(token, 'sort')
    }
    execute(vm: StrVM, data: SortData): Awaitable<StrVM> {
        return vm.replaceContextMatchingTerm({
            destructuredOrLines: sub => {
                sub = [...sub]
                sub.sort((a, b) => {
                    if ( a.value > b.value ) {
                        return 1
                    }
                    if ( a.value < b.value ) {
                        return -1
                    }
                    return 0
                })
                if ( data.direction === 'desc' ) {
                    sub.reverse()
                }
                return sub
            }
        })
    }
 }
--- a/src/vm/commands/tail.ts
+++ b/src/vm/commands/tail.ts
@@ -0,0 +1,31 @@
 import {Command, ParseContext, StrTerm} from "./command.js";
 import {LexInput} from "../lexer.js";
 import {StrVM} from "../vm.js";
 import {Lines} from "./lines.js";
 import {Words} from "./words.js";
 import {Join} from "./join.js";
 import {Awaitable} from "../../util/types.js";
 export type TailData = {
    length?: StrTerm,
 }
 export class Tail extends Command<TailData> {
    attemptParse(context: ParseContext): Awaitable<TailData> {
        return {
            length: context.popOptionalTerm(),
        }
    }
    getDisplayName(): string {
        return 'tail'
    }
    isParseCandidate(token: LexInput): boolean {
        return this.isKeyword(token, 'tail')
    }
    async execute(vm: StrVM, data: TailData): Promise<StrVM> {
        return vm
    }
 }
--- a/src/vm/commands/unique.ts
+++ b/src/vm/commands/unique.ts
@@ -18,7 +18,7 @@ export class Unique extends Command<{}> {
    execute(vm: StrVM): Awaitable<StrVM> {
        return vm.replaceContextMatchingTerm({
-            destructured: sub => {
+            destructuredOrLines: sub => {
                const seen: Record<string, boolean> = {}
                return sub.filter(part => {
                    const hash = hashStrRVal(wrapString(part.value))
--- a/src/vm/lexer.ts
+++ b/src/vm/lexer.ts
@@ -10,6 +10,13 @@ export type LexToken = LexTerminator | LexInput
 const logger = log.getStreamLogger('lexer')
 const LITERAL_MAP: Record<string, string> = {
    'n': '\n',
    'r': '\r',
    't': '\t',
    's': ' ',
 }
 export class Lexer extends BehaviorSubject<LexToken> {
    private isEscape: boolean = false
    private inQuote?: '"'|"'"
@@ -49,7 +56,7 @@ export class Lexer extends BehaviorSubject<LexToken> {
            // We got the 2nd character after an escape
            if ( this.isEscape ) {
-                this.tokenAccumulator += c
+                this.tokenAccumulator += LITERAL_MAP[c] || c
                this.isEscape = false
                continue
            }
--- a/src/vm/output.ts
+++ b/src/vm/output.ts
@@ -2,12 +2,15 @@ import {StrRVal} from "./commands/command.js";
 import {Awaitable} from "../util/types.js";
 import childProcess from "node:child_process";
 import fs from "node:fs";
 import crypto from "node:crypto";
 import {tempFile} from "../util/fs.js";
 export const getSubjectDisplay = (sub: StrRVal): string => {
    if ( sub.term === 'string' ) {
-        return sub.value
+        const lines = sub.value.split('\n')
        const padLength = `${lines.length}`.length  // heh
        return lines
            .map((line, idx) => idx.toString().padStart(padLength, ' ') + ' ⎸' + line)
            .join('\n')
    }
    if ( sub.term === 'int' ) {
--- a/src/vm/vm.ts
+++ b/src/vm/vm.ts
@@ -1,7 +1,7 @@
 import {Awaitable, JSONData} from "../util/types.js";
 import {
    CommandData,
-    isStrRVal, StrDestructured,
+    isStrRVal, joinDestructured, StrDestructured,
    StrLVal,
    StrRVal,
    StrTerm, unwrapDestructured,
@@ -79,6 +79,11 @@ export type TermOperator = {
    restructure?: (sub: StrDestructured['value']) => Awaitable<string>,
    /** Map `destructured` to `destructured`. */
    destructured?: (sub: StrDestructured['value']) => Awaitable<StrDestructured['value']>,
    /**
     * If `string`, destructure to lines, map each line `string` to `string`, then join.
     * If `destructured, map each part individual ement, but keep it as a `destructured`.
     */
    destructuredOrLines?: (sub: StrDestructured['value']) => Awaitable<StrDestructured['value']>,
    /**
     * If `string`, map to `string`.
     * If `destructured`, map each part individual element, but keep it as a `destructured`.
@@ -180,6 +185,24 @@ export class ExecutionContext {
            return
        }
        if ( (sub.term === 'int' || sub.term === 'string') && operator.destructuredOrLines ) {
            const fake: StrDestructured['value'] = unwrapString(sub)
                .split('\n')
                .map((line, idx) => {
                    if ( idx ) {
                        return { prefix: '\n', value: line }
                    }
                    return { value: line }
                })
            const rejoined = (await operator.destructuredOrLines(fake))
                .map(x => x.value)
                .join('\n')
            this.subject = wrapString(rejoined)
            return
        }
        if ( sub.term === 'destructured' && operator.restructure ) {
            this.subject = wrapString(await operator.restructure(unwrapDestructured(sub)))
            return
@@ -190,6 +213,11 @@ export class ExecutionContext {
            return
        }
        if ( sub.term === 'destructured' && operator.destructuredOrLines ) {
            this.subject = wrapDestructured(await operator.destructuredOrLines(unwrapDestructured(sub)))
            return
        }
        if ( sub.term === 'destructured' && operator.stringOrDestructuredPart ) {
            this.subject = wrapDestructured(await Promise.all(
                unwrapDestructured(sub)
@@ -330,12 +358,12 @@ export class StrVM implements LifecycleAware {
    }
    public async replaceContextMatchingTerm(operator: TermOperator|((ctx: ExecutionContext) => TermOperator)): Promise<this> {
-        return this.tapInPlace(ctx => {
+        return this.tapInPlace(async ctx => {
            if ( typeof operator === 'function' ) {
                operator = operator(ctx)
            }
-            ctx.replaceSubjectMatchingTerm(operator)
+            await ctx.replaceSubjectMatchingTerm(operator)
        })
    }