From 569bff2d3ec43deab9c04045d01c1adb29fc221d Mon Sep 17 00:00:00 2001 From: garrettmills Date: Mon, 10 Nov 2025 19:41:57 -0600 Subject: [PATCH] Implement range, lipsum, unique --- HELP.txt | 9 +- lipsum.txt | 503 +++++++++++++++++++++++++++++++++++++++++++++++++++++ str.mjs | 162 ++++++++++++++++- 3 files changed, 670 insertions(+), 4 deletions(-) create mode 100644 lipsum.txt diff --git a/HELP.txt b/HELP.txt index 98e1b8e..2eee9e1 100644 --- a/HELP.txt +++ b/HELP.txt @@ -72,8 +72,15 @@ lines [] [] join [] Join separate lines in the string using the given separator. -replace +replace [] Replace all instances of with . + Optionally, define a "range pattern" to replace. Range patterns define + the nth occurrences of that should be replaced. The last number + in a range will be repeated as necessary. + Example: + "First Name Last Name Salutation Age Height" + replace ' ' , [2, 2, 1] + "First Name,Last Name,Salutation,Age,Height" lsub [] Replace the current string with a substring from the left, starting at . diff --git a/lipsum.txt b/lipsum.txt new file mode 100644 index 0000000..3e5e051 --- /dev/null +++ b/lipsum.txt @@ -0,0 +1,503 @@ +a +ac +accommodare +accumsan +accusata +ad +adhuc +adipisci +adipiscing +adolescens +adversarium +aenean +aeque +affert +agam +alia +alienum +aliquam +aliquet +aliquid +aliquip +altera +alterum +amet +an +ancillae +animal +ante +antiopam +aperiri +appareat +appetere +aptent +arcu +assueverit +at +atomorum +atqui +auctor +audire +augue +autem +bibendum +blandit +brute +causae +cetero +ceteros +civibus +class +commodo +commune +comprehensam +conceptam +conclusionemque +condimentum +congue +consectetuer +consectetur +consequat +consetetur +constituam +constituto +consul +contentiones +conubia +convallis +convenire +corrumpit +cras +cu +cubilia +cum +curabitur +curae +cursus +dapibus +debet +decore +definiebas +definitionem +definitiones +delectus +delenit +delicata +deseruisse +deserunt +deterruisset +detracto +detraxit +diam +dicam +dicant +dicat +dicit +dico +dicta +dictas +dictum +dictumst +dicunt +dignissim +dis +discere +disputationi +dissentiunt +docendi +doctus +dolor +dolore +dolorem +dolores +dolorum +doming +donec +dui +duis +duo +ea +eam +efficiantur +efficitur +egestas +eget +ei +eirmod +eius +elaboraret +electram +eleifend +elementum +elit +elitr +eloquentiam +enim +eos +epicurei +epicuri +equidem +erat +eripuit +eros +errem +error +erroribus +eruditi +esse +est +et +etiam +eu +euismod +eum +euripidis +evertitur +ex +expetenda +expetendis +explicari +fabellas +fabulas +facilis +facilisi +facilisis +falli +fames +fastidii +faucibus +felis +fermentum +ferri +feugait +feugiat +finibus +fringilla +fugit +fuisset +fusce +gloriatur +graece +graeci +graecis +graeco +gravida +gubergren +habemus +habeo +habitant +habitasse +hac +harum +has +hendrerit +himenaeos +hinc +his +homero +honestatis +iaculis +id +idque +ignota +iisque +imperdiet +impetus +in +inani +inceptos +inciderint +indoctum +inimicus +instructior +integer +intellegat +intellegebat +interdum +interesset +interpretaris +invenire +invidunt +ipsum +iriure +iudicabit +ius +iusto +iuvaret +justo +labores +lacinia +lacus +laoreet +latine +laudem +lectus +legere +legimus +leo +liber +libero +libris +ligula +litora +lobortis +lorem +luctus +ludus +luptatum +maecenas +magna +magnis +maiestatis +maiorum +malesuada +malorum +maluisset +mandamus +massa +mattis +mauris +maximus +mazim +mea +mediocrem +mediocritatem +mei +mel +meliore +melius +menandri +mentitum +metus +mi +minim +mnesarchum +moderatius +molestiae +molestie +mollis +montes +morbi +movet +mucius +mus +mutat +nam +nascetur +natoque +natum +ne +nec +necessitatibus +neglegentur +neque +netus +nibh +nihil +nisi +nisl +no +nobis +noluisse +nominavi +non +nonumes +nonumy +noster +nostra +nostrum +novum +nulla +nullam +numquam +nunc +ocurreret +odio +offendit +omittam +omittantur +omnesque +oporteat +option +oratio +orci +ornare +ornatus +partiendo +parturient +patrioque +pellentesque +penatibus +per +percipit +pericula +periculis +perpetua +persecuti +persequeris +persius +pertinacia +pertinax +petentium +pharetra +phasellus +placerat +platea +platonem +ponderum +populo +porro +porta +porttitor +posidonium +posse +possim +possit +postea +postulant +posuere +potenti +praesent +pretium +pri +primis +principes +pro +prodesset +proin +prompta +propriae +pulvinar +purus +putent +quaeque +quaerendum +quaestio +qualisque +quam +quas +quem +qui +quidam +quis +quisque +quo +quod +quot +recteque +referrentur +reformidans +regione +reprehendunt +reprimique +repudiandae +repudiare +reque +rhoncus +ridens +ridiculus +risus +rutrum +sadipscing +saepe +sagittis +sale +salutatus +sanctus +saperet +sapien +sapientem +scelerisque +scripserit +scripta +sea +sed +sem +semper +senectus +senserit +sententiae +signiferumque +similique +simul +singulis +sit +sociis +sociosqu +sodales +solet +sollicitudin +solum +sonet +splendide +suas +suavitate +sumo +suscipiantur +suscipit +suspendisse +tacimates +taciti +tale +tamquam +tantas +tation +te +tellus +tempor +tempus +theophrastus +tibique +tincidunt +torquent +tortor +tota +tractatos +tristique +tritani +turpis +ubique +ullamcorper +ultrices +ultricies +unum +urbanitas +urna +usu +ut +utamur +utinam +utroque +varius +vehicula +vel +velit +venenatis +veniam +verear +veri +veritus +vero +verterem +vestibulum +viderer +vidisse +vim +viris +vis +vitae +vituperata +vituperatoribus +vivamus +vivendo +viverra +vix +vocent +vocibus +volumus +voluptaria +voluptatibus +voluptatum +volutpat +vulputate +wisi diff --git a/str.mjs b/str.mjs index 9e9b899..54952bc 100644 --- a/str.mjs +++ b/str.mjs @@ -21,6 +21,34 @@ const PRESERVE_SUBJECT_NO_PRINT = Symbol('preserve-subject-no-print') const EXIT = Symbol('should-exit') const TERM = Symbol('terminator') const tempFile = () => `/tmp/str-${crypto.randomBytes(4).readUInt32LE(0)}.txt` +const lipsumFile = () => `${dirname(fileURLToPath(import.meta.url))}/lipsum.txt` +const capFirst = s => `${s[0].toUpperCase()}${s.slice(1)}` + +const randomInt = (min=0, max=100) => { + min = Math.ceil(min) + max = Math.floor(max) + return Math.floor(Math.random() * (max - min + 1)) + min +} + +const coinFlip = (chance=0.5) => Math.random() < chance + +let lipsumDict = [] +const getLipsumDict = () => { + if ( !lipsumDict.length ) { + lipsumDict = fs.readFileSync(lipsumFile()) + .toString('utf-8') + .split('\n') + .map(x => x.trim()) + } + return lipsumDict +} + +const getRandomLipsum = (i=undefined) => { + if ( i === 0 ) return 'lorem' + if ( i === 1 ) return 'ipsum' + const dict = getLipsumDict() + return dict[Math.floor(Math.random() * dict.length)] +} const makeState = () => ({ quote: "'", @@ -158,6 +186,7 @@ class Parser { to: () => ({ command: 'to', lval: this.popLValToken() }), from: () => ({ command: 'from', lval: this.popLValToken() }), + lipsum: () => ({ command: 'lipsum', len: this.popToken(), type: this.popTokenInSet(['word', 'line', 'para']) }), indent: () => ({ command: 'indent', with: this.popTokenInSet(['spaces', 'tabs']), @@ -176,7 +205,12 @@ class Parser { split: () => ({ command: 'split', on: this.popToken(), with: this.popOptionalToken('\n') }), lines: () => ({ command: 'lines', on: this.popOptionalToken(), with: this.popOptionalToken('\n') }), join: () => ({ command: 'join', with: this.popOptionalToken(',') }), - replace: () => ({ command: 'replace', find: this.popToken(), with: this.popToken() }), + replace: () => ({ + command: 'replace', + find: this.popToken(), + with: this.popToken(), + range: this.popOptionalRange(), + }), lsub: () => ({ command: 'lsub', offset: this.popToken(), len: this.popOptionalToken() }), rsub: () => ({ command: 'rsub', offset: this.popToken(), len: this.popOptionalToken() }), reparse: () => ({ @@ -188,6 +222,7 @@ class Parser { missing: () => ({ command: 'missing', find: this.popToken() }), upper: () => ({ command: 'upper' }), lower: () => ({ command: 'lower' }), + unique: () => ({ command: 'unique' }), help: () => ({ command: 'help' }), show: () => ({ command: 'show' }), @@ -281,6 +316,49 @@ class Parser { return parser() } + popOptionalRange() { + const token = this.peekToken() + const tokenValue = token?.token.trim() + + if ( token && tokenValue.startsWith('$') && !token.asLiteral ) { + // If the token is a variable, assume it may be a range and return it + return this.popToken() + } + + if ( !token || !tokenValue.startsWith('[') ) { + return undefined + } + + // Consume tokens until we find either a `]` or an invalid token: + const rangeParts = [] + + while ( true ) { + const next = this.popToken() + let nextValue = next.token + + // If we are the first token, strip off the opening [ + if ( !rangeParts.length && nextValue.startsWith('[') ) nextValue = nextValue.trim().substring(1) + + // Strip off the closing ] if present + const hasClose = nextValue.trim().endsWith(']') + if ( hasClose ) nextValue = nextValue.trim().substring(0, nextValue.length - 1) + + // Within a range, we may only have numbers, whitespace, and commas + if ( !nextValue.match(/^[0-9\s,]*$/s) ) { + throw new Error(`Found invalid characters in range context: ${nextValue}`) + } + + rangeParts.push(nextValue) + if ( hasClose ) break + } + + const range = rangeParts.join('') + .split(',') + .map(x => parseInt(x.trim(), 10)) + + return { token: range, asLiteral: false, asRange: true } + } + popTerm() { if ( this.tokens.length && this.tokens[0].token !== TERM ) throw new Error('Expected TERM; instead found token: ' + this.tokens[0].token) if ( this.tokens.length ) this.tokens.shift() @@ -431,7 +509,12 @@ class VM { } resolveImmediate(token) { - if ( !token.token || token.asLiteral || !token.token.startsWith('$') ) { + if ( + !token.token + || token.asLiteral + || token.asRange + || !token.token.startsWith('$') + ) { return token.token } @@ -547,6 +630,37 @@ class VM { }, from: () => this.resolveImmediate(cmd.lval), + lipsum: () => { + const len = parseInt(this.resolveImmediate(cmd.len), 10) + const type = this.resolveImmediate(cmd.type) + const base = Array(len).fill(undefined) + + const genLipsumSentence = (i=0) => { + const words = Array(randomInt(7, 18)) + .fill(undefined) + .map((_, j) => getRandomLipsum(i + j) + (coinFlip(0.2) ? ',' : '')) + + let line = words.join(' ') + if ( line.endsWith(',') ) line = line.slice(0, -1) + return capFirst(line) + '.' + } + + if ( type === 'word' ) { + return base.map((_, i) => getRandomLipsum(i)) + .join(' ') + } else if ( type === 'line' ) { + return base.map((_, i) => genLipsumSentence(i)) + .join('\n') + } else if ( type === 'para' ) { + return base.map((_, i) => + Array(randomInt(2, 6)) + .fill(undefined) + .map((_, j) => genLipsumSentence(i + j))) + .join('\n\n') + return base.trim().split('\n\n').slice(0, len).join('\n\n') + } + return PRESERVE_SUBJECT + }, indent: () => { const dent = this.resolveImmediate(cmd.with) === 'spaces' ? ''.padStart(parseInt(String(this.resolveImmediate(cmd.level) || '4')), ' ') @@ -607,7 +721,48 @@ class VM { lines: () => subject.split(new RegExp(`${this.resolveImmediate(cmd.on) || '\\s'}+`, 's')) .join(this.resolveImmediate(cmd.with)), join: () => subject.split('\n').join(this.resolveImmediate(cmd.with)), - replace: () => subject.replaceAll(this.resolveImmediate(cmd.find), this.resolveImmediate(cmd.with)), + replace: () => { + const find = this.resolveImmediate(cmd.find) + const replace = this.resolveImmediate(cmd.with) + const rangeToken = cmd.range + ? this.resolveImmediate(cmd.range) + : undefined + + const rangeArr = rangeToken + ? [...rangeToken] + : undefined + + // Split the string apart based on the `find` string: + const literalParts = subject.split(find) + + // Now, stitch the string back together with the `replace` string, respecting + // the range if it has been given: + let replacedParts = [] + let partsSinceLastReplace = 0 + for ( let partIdx = 0; partIdx < literalParts.length; partIdx += 1 ) { + const part = literalParts[partIdx] + + replacedParts.push(part) + partsSinceLastReplace += 1 + + // If this is the last part of the string, we don't need to "replace" at the end + if ( partIdx === literalParts.length - 1 ) break + + if ( !rangeArr?.length || partsSinceLastReplace === rangeArr[0] ) { + // This is an occurrence we need to replace. + // Do so, then reset the counter. + replacedParts.push(replace) + partsSinceLastReplace = 0 + if ( rangeArr?.length > 1 ) rangeArr.shift() + continue + } + + // This isn't an occurrence we need to replace, so stitch it back w/ the original string + replacedParts.push(find) + } + + return replacedParts.join('') + }, lsub: () => subject.slice( this.resolveImmediate(cmd.offset), this.resolveImmediate(cmd.offset) + (this.resolveImmediate(cmd.len) || subject.length)), @@ -620,6 +775,7 @@ class VM { .join(''), upper: () => subject.toUpperCase(), lower: () => subject.toLowerCase(), + unique: () => [...(new Set(subject.split('\n')))].join('\n'), // reparse, help: () => {