Implement range, lipsum, unique

2025-11-10 19:41:57 -06:00
parent 298041ecf7
commit 569bff2d3e
3 changed files with 670 additions and 4 deletions
--- a/str.mjs
+++ b/str.mjs
@@ -21,6 +21,34 @@ const PRESERVE_SUBJECT_NO_PRINT = Symbol('preserve-subject-no-print')
 const EXIT = Symbol('should-exit')
 const TERM = Symbol('terminator')
 const tempFile = () => `/tmp/str-${crypto.randomBytes(4).readUInt32LE(0)}.txt`
+const lipsumFile = () => `${dirname(fileURLToPath(import.meta.url))}/lipsum.txt`
+const capFirst = s => `${s[0].toUpperCase()}${s.slice(1)}`
+
+const randomInt = (min=0, max=100) => {
+    min = Math.ceil(min)
+    max = Math.floor(max)
+    return Math.floor(Math.random() * (max - min + 1)) + min
+}
+
+const coinFlip = (chance=0.5) => Math.random() < chance
+
+let lipsumDict = []
+const getLipsumDict = () => {
+    if ( !lipsumDict.length ) {
+        lipsumDict = fs.readFileSync(lipsumFile())
+            .toString('utf-8')
+            .split('\n')
+            .map(x => x.trim())
+    }
+    return lipsumDict
+}
+
+const getRandomLipsum = (i=undefined) => {
+    if ( i === 0 ) return 'lorem'
+    if ( i === 1 ) return 'ipsum'
+    const dict = getLipsumDict()
+    return dict[Math.floor(Math.random() * dict.length)]
+}

 const makeState = () => ({
    quote: "'",
@@ -158,6 +186,7 @@ class Parser {
            to: () => ({ command: 'to', lval: this.popLValToken() }),
            from: () => ({ command: 'from', lval: this.popLValToken() }),

+            lipsum: () => ({ command: 'lipsum', len: this.popToken(), type: this.popTokenInSet(['word', 'line', 'para']) }),
            indent: () => ({
                command: 'indent',
                with: this.popTokenInSet(['spaces', 'tabs']),
@@ -176,7 +205,12 @@ class Parser {
            split: () => ({ command: 'split', on: this.popToken(), with: this.popOptionalToken('\n') }),
            lines: () => ({ command: 'lines', on: this.popOptionalToken(), with: this.popOptionalToken('\n') }),
            join: () => ({ command: 'join', with: this.popOptionalToken(',') }),
-            replace: () => ({ command: 'replace', find: this.popToken(), with: this.popToken() }),
+            replace: () => ({
+                command: 'replace',
+                find: this.popToken(),
+                with: this.popToken(),
+                range: this.popOptionalRange(),
+            }),
            lsub: () => ({ command: 'lsub', offset: this.popToken(), len: this.popOptionalToken() }),
            rsub: () => ({ command: 'rsub', offset: this.popToken(), len: this.popOptionalToken() }),
            reparse: () => ({
@@ -188,6 +222,7 @@ class Parser {
            missing: () => ({ command: 'missing', find: this.popToken() }),
            upper: () => ({ command: 'upper' }),
            lower: () => ({ command: 'lower' }),
+            unique: () => ({ command: 'unique' }),

            help: () => ({ command: 'help' }),
            show: () => ({ command: 'show' }),
@@ -281,6 +316,49 @@ class Parser {
        return parser()
    }

+    popOptionalRange() {
+        const token = this.peekToken()
+        const tokenValue = token?.token.trim()
+
+        if ( token && tokenValue.startsWith('$') && !token.asLiteral ) {
+            // If the token is a variable, assume it may be a range and return it
+            return this.popToken()
+        }
+
+        if ( !token || !tokenValue.startsWith('[') ) {
+            return undefined
+        }
+
+        // Consume tokens until we find either a `]` or an invalid token:
+        const rangeParts = []
+
+        while ( true ) {
+            const next = this.popToken()
+            let nextValue = next.token
+
+            // If we are the first token, strip off the opening [
+            if ( !rangeParts.length && nextValue.startsWith('[') ) nextValue = nextValue.trim().substring(1)
+
+            // Strip off the closing ] if present
+            const hasClose = nextValue.trim().endsWith(']')
+            if ( hasClose ) nextValue = nextValue.trim().substring(0, nextValue.length - 1)
+
+            // Within a range, we may only have numbers, whitespace, and commas
+            if ( !nextValue.match(/^[0-9\s,]*$/s) ) {
+                throw new Error(`Found invalid characters in range context: ${nextValue}`)
+            }
+
+            rangeParts.push(nextValue)
+            if ( hasClose ) break
+        }
+
+        const range = rangeParts.join('')
+            .split(',')
+            .map(x => parseInt(x.trim(), 10))
+
+        return { token: range, asLiteral: false, asRange: true }
+    }
+
    popTerm() {
        if ( this.tokens.length && this.tokens[0].token !== TERM ) throw new Error('Expected TERM; instead found token: ' + this.tokens[0].token)
        if ( this.tokens.length ) this.tokens.shift()
@@ -431,7 +509,12 @@ class VM {
    }

    resolveImmediate(token) {
-        if ( !token.token || token.asLiteral || !token.token.startsWith('$') ) {
+        if (
+            !token.token
+            || token.asLiteral
+            || token.asRange
+            || !token.token.startsWith('$')
+        ) {
            return token.token
        }

@@ -547,6 +630,37 @@ class VM {
            },
            from: () => this.resolveImmediate(cmd.lval),

+            lipsum: () => {
+                const len = parseInt(this.resolveImmediate(cmd.len), 10)
+                const type = this.resolveImmediate(cmd.type)
+                const base = Array(len).fill(undefined)
+
+                const genLipsumSentence = (i=0) => {
+                    const words = Array(randomInt(7, 18))
+                        .fill(undefined)
+                        .map((_, j) => getRandomLipsum(i + j) + (coinFlip(0.2) ? ',' : ''))
+
+                    let line = words.join(' ')
+                    if ( line.endsWith(',') ) line = line.slice(0, -1)
+                    return capFirst(line) + '.'
+                }
+
+                if ( type === 'word' ) {
+                    return base.map((_, i) => getRandomLipsum(i))
+                        .join(' ')
+                } else if ( type === 'line' ) {
+                    return base.map((_, i) => genLipsumSentence(i))
+                        .join('\n')
+                } else if ( type === 'para' ) {
+                    return base.map((_, i) =>
+                        Array(randomInt(2, 6))
+                            .fill(undefined)
+                            .map((_, j) => genLipsumSentence(i + j)))
+                        .join('\n\n')
+                    return base.trim().split('\n\n').slice(0, len).join('\n\n')
+                }
+                return PRESERVE_SUBJECT
+            },
            indent: () => {
                const dent = this.resolveImmediate(cmd.with) === 'spaces'
                    ? ''.padStart(parseInt(String(this.resolveImmediate(cmd.level) || '4')), ' ')
@@ -607,7 +721,48 @@ class VM {
            lines: () => subject.split(new RegExp(`${this.resolveImmediate(cmd.on) || '\\s'}+`, 's'))
                .join(this.resolveImmediate(cmd.with)),
            join: () => subject.split('\n').join(this.resolveImmediate(cmd.with)),
-            replace: () => subject.replaceAll(this.resolveImmediate(cmd.find), this.resolveImmediate(cmd.with)),
+            replace: () => {
+                const find = this.resolveImmediate(cmd.find)
+                const replace = this.resolveImmediate(cmd.with)
+                const rangeToken = cmd.range
+                    ? this.resolveImmediate(cmd.range)
+                    : undefined
+
+                const rangeArr = rangeToken
+                    ? [...rangeToken]
+                    : undefined
+
+                // Split the string apart based on the `find` string:
+                const literalParts = subject.split(find)
+
+                // Now, stitch the string back together with the `replace` string, respecting
+                // the range if it has been given:
+                let replacedParts = []
+                let partsSinceLastReplace = 0
+                for ( let partIdx = 0; partIdx < literalParts.length; partIdx += 1 ) {
+                    const part = literalParts[partIdx]
+
+                    replacedParts.push(part)
+                    partsSinceLastReplace += 1
+
+                    // If this is the last part of the string, we don't need to "replace" at the end
+                    if ( partIdx === literalParts.length - 1 ) break
+
+                    if ( !rangeArr?.length || partsSinceLastReplace === rangeArr[0] ) {
+                        // This is an occurrence we need to replace.
+                        // Do so, then reset the counter.
+                        replacedParts.push(replace)
+                        partsSinceLastReplace = 0
+                        if ( rangeArr?.length > 1 ) rangeArr.shift()
+                        continue
+                    }
+
+                    // This isn't an occurrence we need to replace, so stitch it back w/ the original string
+                    replacedParts.push(find)
+                }
+
+                return replacedParts.join('')
+            },
            lsub: () => subject.slice(
                this.resolveImmediate(cmd.offset),
                this.resolveImmediate(cmd.offset) + (this.resolveImmediate(cmd.len) || subject.length)),
@@ -620,6 +775,7 @@ class VM {
                .join(''),
            upper: () => subject.toUpperCase(),
            lower: () => subject.toLowerCase(),
+            unique: () => [...(new Set(subject.split('\n')))].join('\n'),
            // reparse,

            help: () => {