Add drop, sort commands, start tail command; implement support for destructuredOrLines match type; misc fixes

2026-03-02 22:20:18 -06:00
parent e5acc2e8b1
commit d5d69e58a4
10 changed files with 224 additions and 8 deletions
--- a/src/vm/commands/command.ts
+++ b/src/vm/commands/command.ts
@@ -170,7 +170,7 @@ export class ParseContext {
        }

        // Check if the token is a valid integer:
-        if ( /^-?[1-9][0-9]*$/.test(input.value) ) {
+        if ( /^-?[0-9]+$/.test(input.value) ) {
            return { term: 'int', value: parseInt(input.value, 10) }
        }

--- a/src/vm/commands/drop.ts
+++ b/src/vm/commands/drop.ts
@@ -0,0 +1,93 @@
+import {Command, ParseContext, StrTerm} from "./command.js";
+import {LexInput} from "../lexer.js";
+import {StrVM} from "../vm.js";
+import {Lines} from "./lines.js";
+import {Words} from "./words.js";
+import {Join} from "./join.js";
+
+export type DropData = {
+    type: 'line'|'word'|'index',
+    specific: StrTerm,
+}
+
+/**
+ * Like `on`, this command has a few forms:
+ *
+ * drop word 3 | drop line 3
+ *  Assume the subject is a string. Split into words|lines respectively, remove the 3rd index, and rejoin.
+ *
+ * drop 3 | drop index 3
+ *  Assume the subject is a destructured. Drop the 3rd element.
+ */
+export class Drop extends Command<DropData> {
+    async attemptParse(context: ParseContext): Promise<DropData> {
+        const next = context.peekTerm()
+        if ( next?.term === 'int' || next?.term === 'variable' ) {
+            return {
+                type: 'index',
+                specific: context.popTerm(),
+            }
+        }
+
+        return {
+            type: context.popKeywordInSet(['line', 'word', 'index']).value,
+            specific: context.popTerm(),
+        }
+    }
+
+    getDisplayName(): string {
+        return 'drop'
+    }
+
+    isParseCandidate(token: LexInput): boolean {
+        return this.isKeyword(token, 'drop')
+    }
+
+    async execute(vm: StrVM, data: DropData): Promise<StrVM> {
+        // If the type is line|word, first destructure the subject accordingly:
+        let rejoin = false
+        if ( data.type === 'line' ) {
+            vm = await (new Lines).execute(vm)
+            rejoin = true
+        } else if ( data.type === 'word' ) {
+            vm = await (new Words).execute(vm)
+            rejoin = true
+        }
+
+        // Then, drop the item at the given index in the destructured subject:
+        vm = await vm.replaceContextMatchingTerm(ctx => ({
+            destructured: async sub => {
+                const idx = ctx.resolveInt(data.specific)
+                sub = [...sub]
+
+                // In word mode, all whitespace is preserved. So, we need to take the prefix
+                // of the element that's about to be deleted and add it to the prefix of the
+                // word right after it (example: "a\nb c" `drop word 1` should become "a\n c", not "a c"
+                if ( data.type === 'word' && sub[idx]?.prefix && sub[idx + 1] ) {
+                    sub[idx + 1].prefix = `${sub[idx].prefix}${sub[idx + 1].prefix || ''}`
+                }
+
+                // Remove the nth element.
+                sub.splice(idx, 1)
+
+                // In line mode, if we delete a line, we *don't* want the newline
+                // prefixes to be preserved, as that would clear the contents of the line,
+                // but leave an empty line in-place. The only edge case we need to account
+                // for is if we removed the first line, we should remove the newline prefix
+                // from the *new* first line:
+                if ( data.type === 'line' && idx === 0 && sub.length > 0 ) {
+                    sub[0].prefix = undefined
+                }
+
+                return sub
+            },
+        }))
+
+        // If we previously split the value (i.e. for type = line|word), rejoin it:
+        if ( rejoin ) {
+            vm = await (new Join).execute(vm, {})
+        }
+
+        return vm
+    }
+}
--- a/src/vm/commands/index.ts
+++ b/src/vm/commands/index.ts
@@ -41,12 +41,16 @@ import {Word} from "./word.js";
 import {On} from "./on.js";
 import {Each} from "./each.js";
 import {Words} from "./words.js";
+import {Drop} from "./drop.js";
+import {Sort} from "./sort.js";
+import {Tail} from "./tail.js";

 export type Commands = Command<CommandData>[]
 export const commands: Commands = [
    new Clear,
    new Contains,
    new Copy,
+    new Drop,
    new Each,
    new Edit,
    new Enclose,
@@ -76,8 +80,10 @@ export const commands: Commands = [
    new RunFile,
    new Save,
    new Show,
+    new Sort,
    new Split,
    new Suffix,
+    new Tail,
    new To,
    new Trim,
    new Undo,
--- a/src/vm/commands/sort.ts
+++ b/src/vm/commands/sort.ts
@@ -0,0 +1,45 @@
+import {Command, ParseContext} from "./command.js";
+import {Awaitable} from "../../util/types.js";
+import {LexInput} from "../lexer.js";
+import {StrVM} from "../vm.js";
+
+export type SortData = {
+    direction?: 'asc'|'desc'
+}
+
+export class Sort extends Command<SortData> {
+    attemptParse(context: ParseContext): Awaitable<SortData> {
+        return {
+            direction: context.popOptionalKeywordInSet(['asc', 'desc'])?.value,
+        }
+    }
+
+    getDisplayName(): string {
+        return 'sort'
+    }
+
+    isParseCandidate(token: LexInput): boolean {
+        return this.isKeyword(token, 'sort')
+    }
+
+    execute(vm: StrVM, data: SortData): Awaitable<StrVM> {
+        return vm.replaceContextMatchingTerm({
+            destructuredOrLines: sub => {
+                sub = [...sub]
+                sub.sort((a, b) => {
+                    if ( a.value > b.value ) {
+                        return 1
+                    }
+                    if ( a.value < b.value ) {
+                        return -1
+                    }
+                    return 0
+                })
+                if ( data.direction === 'desc' ) {
+                    sub.reverse()
+                }
+                return sub
+            }
+        })
+    }
+}
--- a/src/vm/commands/tail.ts
+++ b/src/vm/commands/tail.ts
@@ -0,0 +1,31 @@
+import {Command, ParseContext, StrTerm} from "./command.js";
+import {LexInput} from "../lexer.js";
+import {StrVM} from "../vm.js";
+import {Lines} from "./lines.js";
+import {Words} from "./words.js";
+import {Join} from "./join.js";
+import {Awaitable} from "../../util/types.js";
+
+export type TailData = {
+    length?: StrTerm,
+}
+
+export class Tail extends Command<TailData> {
+    attemptParse(context: ParseContext): Awaitable<TailData> {
+        return {
+            length: context.popOptionalTerm(),
+        }
+    }
+
+    getDisplayName(): string {
+        return 'tail'
+    }
+
+    isParseCandidate(token: LexInput): boolean {
+        return this.isKeyword(token, 'tail')
+    }
+
+    async execute(vm: StrVM, data: TailData): Promise<StrVM> {
+        return vm
+    }
+}
--- a/src/vm/commands/unique.ts
+++ b/src/vm/commands/unique.ts
@@ -18,7 +18,7 @@ export class Unique extends Command<{}> {

    execute(vm: StrVM): Awaitable<StrVM> {
        return vm.replaceContextMatchingTerm({
-            destructured: sub => {
+            destructuredOrLines: sub => {
                const seen: Record<string, boolean> = {}
                return sub.filter(part => {
                    const hash = hashStrRVal(wrapString(part.value))
--- a/src/vm/lexer.ts
+++ b/src/vm/lexer.ts
@@ -10,6 +10,13 @@ export type LexToken = LexTerminator | LexInput

 const logger = log.getStreamLogger('lexer')

+const LITERAL_MAP: Record<string, string> = {
+    'n': '\n',
+    'r': '\r',
+    't': '\t',
+    's': ' ',
+}
+
 export class Lexer extends BehaviorSubject<LexToken> {
    private isEscape: boolean = false
    private inQuote?: '"'|"'"
@@ -49,7 +56,7 @@ export class Lexer extends BehaviorSubject<LexToken> {

            // We got the 2nd character after an escape
            if ( this.isEscape ) {
-                this.tokenAccumulator += c
+                this.tokenAccumulator += LITERAL_MAP[c] || c
                this.isEscape = false
                continue
            }
--- a/src/vm/output.ts
+++ b/src/vm/output.ts
@@ -2,12 +2,15 @@ import {StrRVal} from "./commands/command.js";
 import {Awaitable} from "../util/types.js";
 import childProcess from "node:child_process";
 import fs from "node:fs";
-import crypto from "node:crypto";
 import {tempFile} from "../util/fs.js";

 export const getSubjectDisplay = (sub: StrRVal): string => {
    if ( sub.term === 'string' ) {
-        return sub.value
+        const lines = sub.value.split('\n')
+        const padLength = `${lines.length}`.length  // heh
+        return lines
+            .map((line, idx) => idx.toString().padStart(padLength, ' ') + ' ⎸' + line)
+            .join('\n')
    }

    if ( sub.term === 'int' ) {
--- a/src/vm/vm.ts
+++ b/src/vm/vm.ts
@@ -1,7 +1,7 @@
 import {Awaitable, JSONData} from "../util/types.js";
 import {
    CommandData,
-    isStrRVal, StrDestructured,
+    isStrRVal, joinDestructured, StrDestructured,
    StrLVal,
    StrRVal,
    StrTerm, unwrapDestructured,
@@ -79,6 +79,11 @@ export type TermOperator = {
    restructure?: (sub: StrDestructured['value']) => Awaitable<string>,
    /** Map `destructured` to `destructured`. */
    destructured?: (sub: StrDestructured['value']) => Awaitable<StrDestructured['value']>,
+    /**
+     * If `string`, destructure to lines, map each line `string` to `string`, then join.
+     * If `destructured, map each part individual ement, but keep it as a `destructured`.
+     */
+    destructuredOrLines?: (sub: StrDestructured['value']) => Awaitable<StrDestructured['value']>,
    /**
     * If `string`, map to `string`.
     * If `destructured`, map each part individual element, but keep it as a `destructured`.
@@ -180,6 +185,24 @@ export class ExecutionContext {
            return
        }

+        if ( (sub.term === 'int' || sub.term === 'string') && operator.destructuredOrLines ) {
+            const fake: StrDestructured['value'] = unwrapString(sub)
+                .split('\n')
+                .map((line, idx) => {
+                    if ( idx ) {
+                        return { prefix: '\n', value: line }
+                    }
+                    return { value: line }
+                })
+
+            const rejoined = (await operator.destructuredOrLines(fake))
+                .map(x => x.value)
+                .join('\n')
+
+            this.subject = wrapString(rejoined)
+            return
+        }
+
        if ( sub.term === 'destructured' && operator.restructure ) {
            this.subject = wrapString(await operator.restructure(unwrapDestructured(sub)))
            return
@@ -190,6 +213,11 @@ export class ExecutionContext {
            return
        }

+        if ( sub.term === 'destructured' && operator.destructuredOrLines ) {
+            this.subject = wrapDestructured(await operator.destructuredOrLines(unwrapDestructured(sub)))
+            return
+        }
+
        if ( sub.term === 'destructured' && operator.stringOrDestructuredPart ) {
            this.subject = wrapDestructured(await Promise.all(
                unwrapDestructured(sub)
@@ -330,12 +358,12 @@ export class StrVM implements LifecycleAware {
    }

    public async replaceContextMatchingTerm(operator: TermOperator|((ctx: ExecutionContext) => TermOperator)): Promise<this> {
-        return this.tapInPlace(ctx => {
+        return this.tapInPlace(async ctx => {
            if ( typeof operator === 'function' ) {
                operator = operator(ctx)
            }

-            ctx.replaceSubjectMatchingTerm(operator)
+            await ctx.replaceSubjectMatchingTerm(operator)
        })
    }