From dd0fdbff3b3cb5261212cb38e81b0672ce63c342 Mon Sep 17 00:00:00 2001 From: garrettmills Date: Wed, 5 Nov 2025 09:25:10 -0600 Subject: [PATCH] Big bang --- .gitignore | 2 + README.md | 4 + str.mjs | 654 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 660 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 str.mjs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7a1537b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea +node_modules diff --git a/README.md b/README.md new file mode 100644 index 0000000..ff85a1f --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# `str`: An interactive string manipulation environment + +WIP + diff --git a/str.mjs b/str.mjs new file mode 100644 index 0000000..a2db05c --- /dev/null +++ b/str.mjs @@ -0,0 +1,654 @@ +/* + * Requires sudo dnf install wl-clipboard +TODOs: +- Replace - limit occurrences +- FIX - escape chars when building Regs +- Case coverters + */ + + +import readline from 'node:readline' +import { promisify } from 'node:util' +import { homedir } from 'node:os' +import fs from 'node:fs' +import crypto from 'node:crypto' + +const PRESERVE_SUBJECT = Symbol('preserve-subject') +const PRESERVE_SUBJECT_NO_PRINT = Symbol('preserve-subject-no-print') +const EXIT = Symbol('should-exit') +const TERM = Symbol('terminator') +const tempFile = () => `/tmp/str-${crypto.randomBytes(4).readUInt32LE(0)}.txt` + +const makeState = () => ({ + quote: "'", + escape: "\\", + terminator: '\n', + session: `${homedir()}/.str.json`, + commonQuotes: ["'", '"', '`'], + debug: 0, + encloses: { + '(': ')', + '[': ']', + '{': '}', + }, +}) + +let state = makeState() + +const logIfDebug = (...out) => { + if ( parseInt(state.debug) ) console.log(...out) +} + +class Lexer { + input = '' + token = '' + tokens = [] + isEscape = false + isQuote = false + isLiteralEmpty = false + + constructor(input) { + this.input = input + } + + advanceToken() { + if ( this.token || this.isLiteralEmpty ) { + this.tokens.push(this.token) + this.token = '' + this.isLiteralEmpty = false + } + } + + run() { + while ( this.input ) this.step() + this.advanceToken() + return this.tokens + } + + step() { + const c = this.input[0] + this.input = this.input.substring(1) + + if ( this.isEscape ) { + this.token += c + this.isEscape = false + return + } + + if ( c === state.escape ) { + this.isEscape = true + return + } + + if ( c === state.terminator ) { + this.advanceToken() + this.token = TERM + this.advanceToken() + return + } + + if ( (c === ' ' || c === '\n' || c === '\t') && !this.isQuote ) { + this.advanceToken() + return + } + + if ( c === state.quote ) { + if ( this.isQuote ) { + this.isQuote = false + if ( !this.token ) this.isLiteralEmpty = true + this.advanceToken() + return + } else { + this.isQuote = true + return + } + } + + this.token += c + } +} + + +class Parser { + tokens = [] + ast = [] + + constructor(tokens) { + this.tokens = tokens + } + + parse() { + while ( this.tokens.length ) this.parseOnce() + return this.ast + } + + parseOnce() { + const cmd = this.parseCommand() + this.popTerm() + this.ast.push(cmd) + } + + parseCommand() { + const token = this.popToken() + + const commandParsers = { + copy: () => ({ command: 'copy' }), + paste: () => ({ command: 'paste' }), + infile: () => ({ command: 'infile', file: this.popToken() }), + outfile: () => ({ command: 'outfile', file: this.popToken() }), + save: () => ({ command: 'save', file: this.popOptionalToken() }), + load: () => ({ command: 'load', file: this.popOptionalToken() }), + edit: () => ({ command: 'edit' }), + history: () => ({ command: 'history' }), + exit: () => ({ command: 'exit' }), + + indent: () => ({ + command: 'indent', + with: this.popTokenInSet(['spaces', 'tabs']), + level: this.popOptionalToken(), + }), + trim: () => ({ + command: 'trim', + type: this.popOptionalTokenInSet(['start', 'end', 'both', 'left', 'right'], 'both'), + char: this.popOptionalToken(), + }), + quote: () => ({ command: 'quote', with: this.popOptionalToken(state.quote) }), + unquote: () => ({ command: 'unquote', mark: this.popOptionalToken() }), + enclose: () => ({ command: 'enclose', with: this.popOptionalToken('(') }), + prefix: () => ({ command: 'prefix', with: this.popToken() }), + suffix: () => ({ command: 'suffix', with: this.popToken() }), + split: () => ({ command: 'split', on: this.popToken(), with: this.popOptionalToken('\n') }), + lines: () => ({ command: 'lines', on: this.popOptionalToken(), with: this.popOptionalToken('\n') }), + join: () => ({ command: 'join', with: this.popOptionalToken(',') }), + replace: () => ({ command: 'replace', find: this.popToken(), with: this.popToken() }), + lsub: () => ({ command: 'lsub', offset: this.popToken(), len: this.popOptionalToken() }), + rsub: () => ({ command: 'rsub', offset: this.popToken(), len: this.popOptionalToken() }), + reparse: () => ({ + command: 'reparse', + fromLang: this.popTokenInSet(['json', 'php']), + toLang: this.popTokenInSet(['json', 'php']), + }), + contains: () => ({ command: 'contains', find: this.popToken() }), + + help: () => ({ command: 'help' }), + show: () => ({ command: 'show' }), + clear: () => ({ command: 'clear' }), + undo: () => ({ command: 'undo', steps: this.popOptionalToken('1') }), + redo: () => ({ command: 'redo', steps: this.popOptionalToken('1') }), + set: () => ({ + command: 'set', + setting: this.popTokenInSet(['quote', 'escape', 'terminator', 'session', 'debug']), + to: this.popOptionalToken(), + }), + + line: () => ({ command: 'line', sub: this.parseCommand() }), + word: () => ({ command: 'word', sub: this.parseCommand() }), + } + + const parser = commandParsers[token] + if ( !parser ) { + throw new Error('Cannot find parser for command: ' + token) + } + + return parser() + } + + popTerm() { + if ( this.tokens.length && this.tokens[0] !== TERM ) throw new Error('Expected TERM; instead found token: ' + this.tokens[0]) + if ( this.tokens.length ) this.tokens.shift() + } + + popToken() { + if ( !this.tokens.length || this.tokens[0] === TERM ) throw new Error('Unexpected end of token stream!') + return this.tokens.shift() + } + + popTokenInSet(set) { + const tok = this.popToken() + if ( !set.includes(tok) ) throw new Error(`Invalid token "${tok}" (expected one of: ${set.join(',')})`) + return tok + } + + popOptionalToken(fallback=undefined) { + if ( !this.tokens.length || this.tokens[0] === TERM ) return fallback + return this.popToken() + } + + popOptionalTokenInSet(set, fallback=undefined) { + if ( !this.tokens.length || this.tokens[0] === TERM ) return fallback + const tok = this.popToken() + if ( !set.includes(tok) ) throw new Error(`Invalid token "${tok}" (expected one of: ${set.join(',')})`) + return tok + } +} + +class VM { + subject = '' + subjectBackHistory = [] + subjectForwardHistory = [] + + rl = undefined + question = undefined + shouldExit = false + + getState() { + return { + subject: this.subject, + subjectBackHistory: this.subjectBackHistory, + subjectForwardHistory: this.subjectForwardHistory, + globalState: state, + } + } + + loadState(saved) { + this.subject = saved.subject + this.subjectBackHistory = saved.subjectBackHistory + this.subjectForwardHistory = saved.subjectForwardHistory + state = saved.globalState + } + + welcome() { + console.log('str : An interactive string manipulation environment') + console.log(' (Type `help` for more info, or `exit` to close.)') + console.log('') + } + + help() { + console.log('str : An interactive string manipulation environment') + console.log(' Copyright (C) 2025 Garrett Mills ') + console.log('') + console.log('----------------------------------------') + console.log('Input / Output') + console.log('----------------------------------------') + console.log('copy') + console.log(' Copy the current string to the clipboard. (Requires wl-clipboard.)') + console.log('') + console.log('paste') + console.log(' Paste the contents of the clipboard to replace the current string. (Requires wl-clipboard.)') + console.log('') + console.log('infile ') + console.log(' Replace the current string with the contents of .') + console.log('') + console.log('outfile ') + console.log(' Write the current string as the contents of .') + console.log('') + console.log('edit') + console.log(' Open the current string in EDITOR.') + console.log('') + console.log('history') + console.log(' Print the undo/redo history') + console.log('') + console.log('----------------------------------------') + console.log('String Manipulation') + console.log('----------------------------------------') + console.log('indent []') + console.log(' Indent the string with the specified number of spaces or tabs.') + console.log(' Default is a single standard indentation level.') + console.log('') + console.log('trim [] []') + console.log(' Remove instances of the given character from either the start/end or both sides of the string.') + console.log(' Default is to trim whitespace from both ends.') + console.log('') + console.log('quote []') + console.log(' Surround the current string in the given quote character.') + console.log(' Default is to use the current "quote" setting, which defaults to a single-quote.') + console.log('') + console.log('unquote []') + console.log(' Try to strip surrounding quotes of the given character from the string.') + console.log(' Will only proceed if the string has the quote mark on both ends.') + console.log(' Default is to try common quote schemes (single/double quotes, backtick).') + console.log('') + console.log('enclose []') + console.log(' Wrap the string in the given character. Tries to match pairs when possible.') + console.log(' Example: Using `(` will wrap with a closing `)`') + console.log(' Default is to wrap with parentheses.') + console.log('') + console.log('prefix ') + console.log(' Prepend to the current string.') + console.log('') + console.log('suffix ') + console.log(' Append to the current string.') + console.log('') + console.log('split []') + console.log(' Split the current string using the given separator, and rejoin it using the given separator.') + console.log(' Default is to rejoin on newlines.') + console.log('') + console.log('lines [] []') + console.log(' Like `split`, but defaults to splitting on chunks of whitespace.') + console.log('') + console.log('join []') + console.log(' Join separate lines in the string using the given separator.') + console.log('') + console.log('replace ') + console.log(' Replace all instances of with .') + console.log('') + console.log('lsub []') + console.log(' Replace the current string with a substring from the left, starting at .') + console.log(' Optionally, limit to characters.') + console.log('') + console.log('rsub []') + console.log(' Like `lsub`, but works on the string from right-to-left.') + console.log('') + console.log('reparse ') + console.log(' Assuming the string is a valid expression, parse it and convert it to encoding.') + console.log(' Example: reparse json php') + console.log('') + console.log('----------------------------------------') + console.log('Advanced Manipulation') + console.log('----------------------------------------') + console.log('line ') + console.log(' Runs the given command for every line in the string (separated by \\n).') + console.log('') + console.log('word ') + console.log(' Runs the given command for every word in the string (separated by whitespace).') + console.log('') + console.log('contains ') + console.log(' Check if the current string contains . If not, replace it with an empty string.') + console.log('') + console.log('----------------------------------------') + console.log('State Management') + console.log('----------------------------------------') + console.log('show') + console.log(' Print out the current string.') + console.log('') + console.log('clear') + console.log(' Replace the current string with an empty string.') + console.log('') + console.log('undo []') + console.log(' Undo the past operations on the string. Defaults to a single operation.') + console.log('') + console.log('redo') + console.log(' Redo the past operations that were undone. Defaults to a single operation.') + console.log('') + console.log('save []') + console.log(' Store the current state of the interpreter to the given file path.') + console.log(' Defaults to ~/.str.json') + console.log('') + console.log('load []') + console.log(' Restore a saved state from the given file path to the interpreter.') + console.log(' Defaults to ~/.str.json') + console.log('exit') + console.log(' Exit the interpreter.') + console.log('') + console.log('set []') + console.log(' Change the given interpreter to the given .') + console.log(' If no value is given, will reset it back to the default.') + console.log(' Supported settings:') + console.log(' quote (default: \') - What character is used to parse quoted strings.') + console.log(' Also sets the default for the `quote` command.') + console.log(' escape (default: \\) - What character is used to parse escape sequences.') + console.log(' session (default: ~/.str.json) - Default file used by `save`/`load` commands.') + console.log(' debug (default: 0) - Set to 1 to enable debug mode') + console.log(' terminator (default: \\n) - What character is used to parse sequential commands.') + console.log(' Example: `set terminator ;` will begin parsing commands') + console.log(' separated by a ; instead of when enter is pressed.') + } + + replacePrompt() { + this.closePrompt() + + this.rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + this.question = promisify(this.rl.question).bind(this.rl) + } + + getQuestionPrompt() { + this.replacePrompt() + return this.question + } + + closePrompt() { + this.rl?.close?.() + this.rl = undefined + this.question = undefined + } + + replaceSubject(subject) { + this.subjectBackHistory.push(this.subject) + this.subjectForwardHistory = [] + this.subject = subject + } + + undoSubject() { + if ( this.subjectBackHistory.length ) { + this.subjectForwardHistory.push(this.subject) + this.subject = this.subjectBackHistory.pop() + } + } + + redoSubject() { + if ( this.subjectForwardHistory.length ) { + this.subjectBackHistory.push(this.subject) + this.subject = this.subjectForwardHistory.pop() || '' + } + } + + async runCommands(cmds) { + for ( const cmd of cmds ) { + try { + await this.runCommand(cmd) + if ( this.shouldExit ) break + } catch (e) { + console.log('ERROR: ' + e.message) + break + } + } + } + + async runCommand(cmd) { + const result = await this.runCommandOnSubject(cmd, this.subject) + if ( result === EXIT ) { + this.shouldExit = true + return this + } + + if ( result !== PRESERVE_SUBJECT && result !== PRESERVE_SUBJECT_NO_PRINT ) this.replaceSubject(result) + if ( result !== PRESERVE_SUBJECT_NO_PRINT ) console.log(`\n---------------\n${this.subject}\n---------------\n`) + return this + } + + async runCommandOnSubject(cmd, subject) { + const runners = { + copy: async () => { + const childProcess = await import('node:child_process') + const tmp = tempFile() + await fs.writeFileSync(tmp, subject) + const proc = childProcess.spawn('sh', ['-c', `wl-copy < "${tmp}"`]) + await new Promise(res => { + proc.on('close', () => res()) + }) + return PRESERVE_SUBJECT + }, + paste: async () => { + const childProcess = await import('node:child_process') + const tmp = tempFile() + await fs.writeFileSync(tmp, subject) + const proc = childProcess.spawn('sh', ['-c', `wl-paste > "${tmp}"`]) + await new Promise(res => { + proc.on('close', () => res()) + }) + return fs.readFileSync(tmp).toString('utf-8') + }, + edit: async () => { + this.closePrompt() + const childProcess = await import('node:child_process') + const tmp = tempFile() + await fs.writeFileSync(tmp, this.subject) + const proc = childProcess.spawn(process.env.EDITOR || 'vim', [tmp], { stdio: 'inherit' }) + await new Promise(res => { + proc.on('close', () => res()) + }) + return fs.readFileSync(tmp).toString('utf-8') + }, + infile: () => fs.readFileSync(cmd.file).toString('utf-8'), + outfile: () => { + fs.writeFileSync(cmd.file, subject) + return subject + }, + save: () => { + fs.writeFileSync(cmd.file || state.session, JSON.stringify(this.getState())) + return PRESERVE_SUBJECT + }, + load: () => { + this.loadState(JSON.parse(fs.readFileSync(cmd.file || state.session))) + return PRESERVE_SUBJECT + }, + history: () => { + for ( let i = 0; i < this.subjectBackHistory.length; i += 1 ) { + console.log(`--------------- UNDO ${(this.subjectBackHistory.length - i).toString().padStart(2)} ---------------`) + console.log(this.subjectBackHistory[i]) + console.log('---------------------------------------') + } + console.log('') + console.log('--------------- CURRENT ---------------') + console.log(this.subject) + console.log('---------------------------------------') + console.log('') + for ( let i = 0; i < this.subjectForwardHistory.length; i += 1 ) { + console.log(`--------------- REDO ${i.toString().padStart(2)} ---------------`) + console.log(this.subjectForwardHistory[i]) + console.log('---------------------------------------') + } + return PRESERVE_SUBJECT_NO_PRINT + }, + exit: () => EXIT, + + indent: () => { + const dent = cmd.with === 'spaces' + ? ''.padStart(parseInt(String(cmd.level || '4')), ' ') + : ''.padStart(parseInt(String(cmd.level || '1')), '\t') + + return `${dent}${subject.replace(/^\s*/, '')}` + }, + trim: () => { + if ( cmd.type === 'start' || cmd.type === 'left' || cmd.type === 'both' ) { + const leftRex = new RegExp(`^${cmd.char || '\\s'}*`, 's') + subject = subject.replace(leftRex, '') + } + + if ( cmd.type === 'end' || cmd.type === 'right' || cmd.type === 'both' ) { + const rightRex = new RegExp(`${cmd.char || '\\s'}*$`, 's') + subject = subject.replace(rightRex, '') + } + + return subject + }, + quote: () => { + for ( const mark of state.commonQuotes ) { + if ( !subject.startsWith(mark) || !subject.endsWith(mark) ) continue + subject = subject.substring(1, subject.length - 1) + break + } + + return `${cmd.with || state.quote}${subject}${cmd.with || state.quote}` + }, + unquote: () => { + const marks = state.commonQuotes + if ( cmd.mark ) marks.unshift(cmd.mark) + for ( const mark of marks ) { + if ( !subject.startsWith(mark) || !subject.endsWith(mark) ) continue + subject = subject.substring(1, subject.length - 1) + break + } + + return subject + }, + enclose: () => `${cmd.with}${subject}${state.encloses[cmd.with] || cmd.with}`, + prefix: () => `${cmd.with}${subject}`, + suffix: () => `${subject}${cmd.with}`, + split: () => subject.split(cmd.on).join(cmd.with), + lines: () => subject.split(new RegExp(`${cmd.on || '\\s'}+`, 's')).join(cmd.with), + join: () => subject.split('\n').join(cmd.with), + replace: () => subject.replaceAll(cmd.find, cmd.with), + lsub: () => subject.slice(cmd.offset, cmd.offset + (cmd.len || subject.length)), + rsub: () => subject.split('').reverse().slice(cmd.offset, cmd.offset + (cmd.len || subject.length)).reverse().join(''), + // reparse, + + help: () => { + this.help() + return PRESERVE_SUBJECT_NO_PRINT + }, + show: () => PRESERVE_SUBJECT, + clear: () => '', + undo: () => { + for ( let i = 0; i < parseInt(cmd.steps); i += 1 ) { + this.undoSubject() + } + return PRESERVE_SUBJECT + }, + redo: () => { + for ( let i = 0; i < parseInt(cmd.steps); i += 1 ) { + this.redoSubject() + } + return PRESERVE_SUBJECT + }, + set: () => { + state[cmd.setting] = cmd.to || (makeState()[cmd.setting]) + return subject + }, + + line: async () => (await Promise.all(subject + .split('\n') + .map(line => this.runCommandOnSubject(cmd.sub, line)))) + .join('\n'), + word: async () => { + const separators = [...subject.matchAll(/\s+/sg)] + const words = await Promise.all(subject.split(/\s+/sg) + .map(word => this.runCommandOnSubject(cmd.sub, word))) + + const parts = [] + for ( let i = 0; i < words.length; i += 1 ) { + parts.push(words[i]) + if ( separators[i] ) parts.push(separators[i][0]) + } + return parts.join('') + }, + contains: () => subject.includes(cmd.find) ? subject : '', + } + + const runner = runners[cmd.command] + if ( !runner ) throw new Error('Invalid command: ' + cmd.command) + return runner() + } +} + +(async () => { + + + + +const vm = new VM() +vm.welcome() +while ( true ) { + const question = vm.getQuestionPrompt() + let ans = (await question('str %> ')) + while ( state.terminator !== '\n' && !ans.trim().endsWith(state.terminator) ) { + ans += '\n' + (await question('str |> ')) + } + logIfDebug('raw input:', ans) + + try { + const tokens = (new Lexer(ans)).run() + logIfDebug('lexed tokens:', tokens) + try { + const cmds = (new Parser(tokens)).parse() + logIfDebug('parsed commands:', cmds) + await vm.runCommands(cmds) + if ( vm.shouldExit ) break + } catch (parseErr) { + console.log('ERROR: ' + parseErr.message) + } + } catch (lexErr) { + console.log('ERROR: ' + lexErr.message) + } +} + +vm.closePrompt() + + + + +})();