Implement range, lipsum, unique

This commit is contained in:
2025-11-10 19:41:57 -06:00
parent 298041ecf7
commit 569bff2d3e
3 changed files with 670 additions and 4 deletions

162
str.mjs
View File

@@ -21,6 +21,34 @@ const PRESERVE_SUBJECT_NO_PRINT = Symbol('preserve-subject-no-print')
const EXIT = Symbol('should-exit')
const TERM = Symbol('terminator')
const tempFile = () => `/tmp/str-${crypto.randomBytes(4).readUInt32LE(0)}.txt`
const lipsumFile = () => `${dirname(fileURLToPath(import.meta.url))}/lipsum.txt`
const capFirst = s => `${s[0].toUpperCase()}${s.slice(1)}`
const randomInt = (min=0, max=100) => {
min = Math.ceil(min)
max = Math.floor(max)
return Math.floor(Math.random() * (max - min + 1)) + min
}
const coinFlip = (chance=0.5) => Math.random() < chance
let lipsumDict = []
const getLipsumDict = () => {
if ( !lipsumDict.length ) {
lipsumDict = fs.readFileSync(lipsumFile())
.toString('utf-8')
.split('\n')
.map(x => x.trim())
}
return lipsumDict
}
const getRandomLipsum = (i=undefined) => {
if ( i === 0 ) return 'lorem'
if ( i === 1 ) return 'ipsum'
const dict = getLipsumDict()
return dict[Math.floor(Math.random() * dict.length)]
}
const makeState = () => ({
quote: "'",
@@ -158,6 +186,7 @@ class Parser {
to: () => ({ command: 'to', lval: this.popLValToken() }),
from: () => ({ command: 'from', lval: this.popLValToken() }),
lipsum: () => ({ command: 'lipsum', len: this.popToken(), type: this.popTokenInSet(['word', 'line', 'para']) }),
indent: () => ({
command: 'indent',
with: this.popTokenInSet(['spaces', 'tabs']),
@@ -176,7 +205,12 @@ class Parser {
split: () => ({ command: 'split', on: this.popToken(), with: this.popOptionalToken('\n') }),
lines: () => ({ command: 'lines', on: this.popOptionalToken(), with: this.popOptionalToken('\n') }),
join: () => ({ command: 'join', with: this.popOptionalToken(',') }),
replace: () => ({ command: 'replace', find: this.popToken(), with: this.popToken() }),
replace: () => ({
command: 'replace',
find: this.popToken(),
with: this.popToken(),
range: this.popOptionalRange(),
}),
lsub: () => ({ command: 'lsub', offset: this.popToken(), len: this.popOptionalToken() }),
rsub: () => ({ command: 'rsub', offset: this.popToken(), len: this.popOptionalToken() }),
reparse: () => ({
@@ -188,6 +222,7 @@ class Parser {
missing: () => ({ command: 'missing', find: this.popToken() }),
upper: () => ({ command: 'upper' }),
lower: () => ({ command: 'lower' }),
unique: () => ({ command: 'unique' }),
help: () => ({ command: 'help' }),
show: () => ({ command: 'show' }),
@@ -281,6 +316,49 @@ class Parser {
return parser()
}
popOptionalRange() {
const token = this.peekToken()
const tokenValue = token?.token.trim()
if ( token && tokenValue.startsWith('$') && !token.asLiteral ) {
// If the token is a variable, assume it may be a range and return it
return this.popToken()
}
if ( !token || !tokenValue.startsWith('[') ) {
return undefined
}
// Consume tokens until we find either a `]` or an invalid token:
const rangeParts = []
while ( true ) {
const next = this.popToken()
let nextValue = next.token
// If we are the first token, strip off the opening [
if ( !rangeParts.length && nextValue.startsWith('[') ) nextValue = nextValue.trim().substring(1)
// Strip off the closing ] if present
const hasClose = nextValue.trim().endsWith(']')
if ( hasClose ) nextValue = nextValue.trim().substring(0, nextValue.length - 1)
// Within a range, we may only have numbers, whitespace, and commas
if ( !nextValue.match(/^[0-9\s,]*$/s) ) {
throw new Error(`Found invalid characters in range context: ${nextValue}`)
}
rangeParts.push(nextValue)
if ( hasClose ) break
}
const range = rangeParts.join('')
.split(',')
.map(x => parseInt(x.trim(), 10))
return { token: range, asLiteral: false, asRange: true }
}
popTerm() {
if ( this.tokens.length && this.tokens[0].token !== TERM ) throw new Error('Expected TERM; instead found token: ' + this.tokens[0].token)
if ( this.tokens.length ) this.tokens.shift()
@@ -431,7 +509,12 @@ class VM {
}
resolveImmediate(token) {
if ( !token.token || token.asLiteral || !token.token.startsWith('$') ) {
if (
!token.token
|| token.asLiteral
|| token.asRange
|| !token.token.startsWith('$')
) {
return token.token
}
@@ -547,6 +630,37 @@ class VM {
},
from: () => this.resolveImmediate(cmd.lval),
lipsum: () => {
const len = parseInt(this.resolveImmediate(cmd.len), 10)
const type = this.resolveImmediate(cmd.type)
const base = Array(len).fill(undefined)
const genLipsumSentence = (i=0) => {
const words = Array(randomInt(7, 18))
.fill(undefined)
.map((_, j) => getRandomLipsum(i + j) + (coinFlip(0.2) ? ',' : ''))
let line = words.join(' ')
if ( line.endsWith(',') ) line = line.slice(0, -1)
return capFirst(line) + '.'
}
if ( type === 'word' ) {
return base.map((_, i) => getRandomLipsum(i))
.join(' ')
} else if ( type === 'line' ) {
return base.map((_, i) => genLipsumSentence(i))
.join('\n')
} else if ( type === 'para' ) {
return base.map((_, i) =>
Array(randomInt(2, 6))
.fill(undefined)
.map((_, j) => genLipsumSentence(i + j)))
.join('\n\n')
return base.trim().split('\n\n').slice(0, len).join('\n\n')
}
return PRESERVE_SUBJECT
},
indent: () => {
const dent = this.resolveImmediate(cmd.with) === 'spaces'
? ''.padStart(parseInt(String(this.resolveImmediate(cmd.level) || '4')), ' ')
@@ -607,7 +721,48 @@ class VM {
lines: () => subject.split(new RegExp(`${this.resolveImmediate(cmd.on) || '\\s'}+`, 's'))
.join(this.resolveImmediate(cmd.with)),
join: () => subject.split('\n').join(this.resolveImmediate(cmd.with)),
replace: () => subject.replaceAll(this.resolveImmediate(cmd.find), this.resolveImmediate(cmd.with)),
replace: () => {
const find = this.resolveImmediate(cmd.find)
const replace = this.resolveImmediate(cmd.with)
const rangeToken = cmd.range
? this.resolveImmediate(cmd.range)
: undefined
const rangeArr = rangeToken
? [...rangeToken]
: undefined
// Split the string apart based on the `find` string:
const literalParts = subject.split(find)
// Now, stitch the string back together with the `replace` string, respecting
// the range if it has been given:
let replacedParts = []
let partsSinceLastReplace = 0
for ( let partIdx = 0; partIdx < literalParts.length; partIdx += 1 ) {
const part = literalParts[partIdx]
replacedParts.push(part)
partsSinceLastReplace += 1
// If this is the last part of the string, we don't need to "replace" at the end
if ( partIdx === literalParts.length - 1 ) break
if ( !rangeArr?.length || partsSinceLastReplace === rangeArr[0] ) {
// This is an occurrence we need to replace.
// Do so, then reset the counter.
replacedParts.push(replace)
partsSinceLastReplace = 0
if ( rangeArr?.length > 1 ) rangeArr.shift()
continue
}
// This isn't an occurrence we need to replace, so stitch it back w/ the original string
replacedParts.push(find)
}
return replacedParts.join('')
},
lsub: () => subject.slice(
this.resolveImmediate(cmd.offset),
this.resolveImmediate(cmd.offset) + (this.resolveImmediate(cmd.len) || subject.length)),
@@ -620,6 +775,7 @@ class VM {
.join(''),
upper: () => subject.toUpperCase(),
lower: () => subject.toLowerCase(),
unique: () => [...(new Set(subject.split('\n')))].join('\n'),
// reparse,
help: () => {