gristlabs_grist-core/app/server/lib/NSandbox.ts
Alex Hall 84ddbc448b (core) Add test_replay for easily replaying data sent to the sandbox purely within python
Summary:
Run JS with a value for SANDBOX_BUFFERS_DIR, then run test_replay in python with the same value to replay just the python code.

See test_replay.py for more info.

Test Plan:
Record some data, e.g. `SANDBOX_BUFFERS_DIR=manual npm start` or `SANDBOX_BUFFERS_DIR=server ./test/testrun.sh server`.

Then run `SANDBOX_BUFFERS_DIR=server python -m unittest test_replay` from within `core/sandbox/grist` to replay the input from the JS.

Sample of the output will look like this:

```
Checking /tmp/sandbox_buffers/server/2021-06-16T15:13:59.958Z
True
Checking /tmp/sandbox_buffers/server/2021-06-16T15:16:37.170Z
True
Checking /tmp/sandbox_buffers/server/2021-06-16T15:14:22.378Z
True
```

Reviewers: paulfitz, dsagal

Reviewed By: dsagal

Differential Revision: https://phab.getgrist.com/D2866
2021-06-30 16:56:09 +02:00

386 lines
14 KiB
TypeScript

/**
* JS controller for the pypy sandbox.
*/
import * as pidusage from '@gristlabs/pidusage';
import * as marshal from 'app/common/marshal';
import {ISandbox, ISandboxCreationOptions, ISandboxCreator} from 'app/server/lib/ISandbox';
import * as log from 'app/server/lib/log';
import * as sandboxUtil from 'app/server/lib/sandboxUtil';
import * as shutdown from 'app/server/lib/shutdown';
import {Throttle} from 'app/server/lib/Throttle';
import {ChildProcess, spawn, SpawnOptions} from 'child_process';
import * as path from 'path';
import {Stream, Writable} from 'stream';
import * as _ from 'lodash';
import * as fs from "fs";
type SandboxMethod = (...args: any[]) => any;
export interface ISandboxOptions {
args: string[]; // The arguments to pass to the python process.
exports?: {[name: string]: SandboxMethod}; // Functions made available to the sandboxed process.
logCalls?: boolean; // (Not implemented) Whether to log all system calls from the python sandbox.
logTimes?: boolean; // Whether to log time taken by calls to python sandbox.
unsilenceLog?: boolean; // Don't silence the sel_ldr logging.
selLdrArgs?: string[]; // Arguments passed to selLdr, for instance the following sets an
// environment variable `{ ... selLdrArgs: ['-E', 'PYTHONPATH=grist'] ... }`.
logMeta?: log.ILogMeta; // Log metadata (e.g. including docId) to report in all log messages.
command?: string;
env?: NodeJS.ProcessEnv;
}
type ResolveRejectPair = [(value?: any) => void, (reason?: unknown) => void];
// Type for basic message identifiers, available as constants in sandboxUtil.
type MsgCode = null | true | false;
// Optional root folder to store binary data sent to and from the sandbox
// See test_replay.py
const recordBuffersRoot = process.env.RECORD_SANDBOX_BUFFERS_DIR;
export class NSandbox implements ISandbox {
/**
* Helper function to run the nacl sandbox. It takes care of most arguments, similarly to
* nacl/bin/run script, but without the reliance on bash. We can't use bash when -r/-w options
* because on Windows it doesn't pass along the open file descriptors. Bash is also unavailable
* when installing a standalone version on Windows.
*/
public static spawn(options: ISandboxOptions): ChildProcess {
const {command, args: pythonArgs, unsilenceLog, env} = options;
const spawnOptions: SpawnOptions = {
stdio: ['pipe', 'pipe', 'pipe', 'pipe', 'pipe'],
env,
};
if (command) {
return spawn(command, pythonArgs,
{cwd: path.join(process.cwd(), 'sandbox'), ...spawnOptions});
}
const noLog = unsilenceLog ? [] :
(process.env.OS === 'Windows_NT' ? ['-l', 'NUL'] : ['-l', '/dev/null']);
// We use these options to set up communication with the sandbox:
// -r 3:3 to associate a file descriptor 3 on the outside of the sandbox with FD 3 on the
// inside, for reading from the inside. This becomes `this._streamToSandbox`.
// -w 4:4 to associate FD 4 on the outside with FD 4 on the inside for writing from the inside.
// This becomes `this._streamFromSandbox`
const selLdrArgs = ['-r', '3:3', '-w', '4:4', ...options.selLdrArgs || []];
for (const [key, value] of _.toPairs(env)) {
selLdrArgs.push("-E");
selLdrArgs.push(`${key}=${value}`);
}
return spawn('sandbox/nacl/bin/sel_ldr', [
'-B', './sandbox/nacl/lib/irt_core.nexe', '-m', './sandbox/nacl/root:/:ro',
...noLog,
...selLdrArgs,
'./sandbox/nacl/lib/runnable-ld.so',
'--library-path', '/slib', '/python/bin/python2.7.nexe',
...pythonArgs
],
spawnOptions,
);
}
public readonly childProc: ChildProcess;
private _logTimes: boolean;
private _exportedFunctions: {[name: string]: SandboxMethod};
private _marshaller = new marshal.Marshaller({stringToBuffer: false, version: 2});
private _unmarshaller = new marshal.Unmarshaller({ bufferToString: false });
// Members used for reading from the sandbox process.
private _pendingReads: ResolveRejectPair[] = [];
private _isReadClosed = false;
private _isWriteClosed = false;
private _logMeta: log.ILogMeta;
private _streamToSandbox: Writable;
private _streamFromSandbox: Stream;
private _throttle: Throttle | undefined;
// Create a unique subdirectory for each sandbox process so they can be replayed separately
private _recordBuffersDir = recordBuffersRoot ? path.resolve(recordBuffersRoot, new Date().toISOString()) : null;
/*
* Callers may listen to events from sandbox.childProc (a ChildProcess), e.g. 'close' and 'error'.
* The sandbox listens for 'aboutToExit' event on the process, to properly shut down.
*/
constructor(options: ISandboxOptions) {
this._logTimes = Boolean(options.logTimes || options.logCalls);
this._exportedFunctions = options.exports || {};
this.childProc = NSandbox.spawn(options);
this._logMeta = {sandboxPid: this.childProc.pid, ...options.logMeta};
log.rawDebug("Sandbox started", this._logMeta);
this._streamToSandbox = (this.childProc.stdio as Stream[])[3] as Writable;
this._streamFromSandbox = (this.childProc.stdio as Stream[])[4];
this.childProc.on('close', this._onExit.bind(this));
this.childProc.on('error', this._onError.bind(this));
this.childProc.stdout.on('data', sandboxUtil.makeLinePrefixer('Sandbox stdout: ', this._logMeta));
this.childProc.stderr.on('data', sandboxUtil.makeLinePrefixer('Sandbox stderr: ', this._logMeta));
this._streamFromSandbox.on('data', (data) => this._onSandboxData(data));
this._streamFromSandbox.on('end', () => this._onSandboxClose());
this._streamFromSandbox.on('error', (err) => {
log.rawError(`Sandbox error reading: ${err}`, this._logMeta);
this._onSandboxClose();
});
this._streamToSandbox.on('error', (err) => {
if (!this._isWriteClosed) {
log.rawError(`Sandbox error writing: ${err}`, this._logMeta);
}
});
// On shutdown, shutdown the child process cleanly, and wait for it to exit.
shutdown.addCleanupHandler(this, this.shutdown);
if (process.env.GRIST_THROTTLE_CPU) {
this._throttle = new Throttle({
pid: this.childProc.pid,
logMeta: this._logMeta,
});
}
if (this._recordBuffersDir) {
log.rawDebug(`Recording sandbox buffers in ${this._recordBuffersDir}`, this._logMeta);
fs.mkdirSync(this._recordBuffersDir, {recursive: true});
}
}
/**
* Shuts down the sandbox process cleanly, and wait for it to exit.
* @return {Promise} Promise that's resolved with [code, signal] when the sandbox exits.
*/
public async shutdown() {
log.rawDebug("Sandbox shutdown starting", this._logMeta);
shutdown.removeCleanupHandlers(this);
// The signal ensures the sandbox process exits even if it's hanging in an infinite loop or
// long computation. It doesn't get a chance to clean up, but since it is sandboxed, there is
// nothing it needs to clean up anyway.
const timeoutID = setTimeout(() => {
log.rawWarn("Sandbox sending SIGKILL", this._logMeta);
this.childProc.kill('SIGKILL');
}, 1000);
const result = await new Promise((resolve, reject) => {
if (this._isWriteClosed) { resolve(); }
this.childProc.on('error', reject);
this.childProc.on('close', resolve);
this.childProc.on('exit', resolve);
this._close();
});
// In the normal case, the kill timer is pending when the process exits, and we can clear it. If
// the process got killed, the timer is invalid, and clearTimeout() does nothing.
clearTimeout(timeoutID);
return result;
}
/**
* Makes a call to the python process implementing our calling convention on stdin/stdout.
* @param funcName The name of the python RPC function to call.
* @param args Arguments to pass to the given function.
* @returns A promise for the return value from the Python function.
*/
public pyCall(funcName: string, ...varArgs: unknown[]): Promise<any> {
const startTime = Date.now();
this._sendData(sandboxUtil.CALL, Array.from(arguments));
return this._pyCallWait(funcName, startTime);
}
/**
* Returns the RSS (resident set size) of the sandbox process, in bytes.
*/
public async reportMemoryUsage() {
const memory = (await pidusage(this.childProc.pid)).memory;
log.rawDebug('Sandbox memory', {memory, ...this._logMeta});
}
private async _pyCallWait(funcName: string, startTime: number): Promise<any> {
try {
return await new Promise((resolve, reject) => {
this._pendingReads.push([resolve, reject]);
});
} finally {
if (this._logTimes) {
log.rawDebug(`Sandbox pyCall[${funcName}] took ${Date.now() - startTime} ms`, this._logMeta);
}
}
}
private _close() {
if (this._throttle) { this._throttle.stop(); }
if (!this._isWriteClosed) {
// Close the pipe to the sandbox, which should cause the sandbox to exit cleanly.
this._streamToSandbox.end();
this._isWriteClosed = true;
}
}
private _onExit(code: number, signal: string) {
this._close();
log.rawDebug(`Sandbox exited with code ${code} signal ${signal}`, this._logMeta);
}
private _onError(err: Error) {
this._close();
log.rawWarn(`Sandbox could not be spawned: ${err}`, this._logMeta);
}
/**
* Send a message to the sandbox process with the given message code and data.
*/
private _sendData(msgCode: MsgCode, data: any) {
if (this._isReadClosed) {
throw new sandboxUtil.SandboxError("PipeToSandbox is closed");
}
this._marshaller.marshal(msgCode);
this._marshaller.marshal(data);
const buf = this._marshaller.dumpAsBuffer();
if (this._recordBuffersDir) {
fs.appendFileSync(path.resolve(this._recordBuffersDir, "input"), buf);
}
return this._streamToSandbox.write(buf);
}
/**
* Process a buffer of data received from the sandbox process.
*/
private _onSandboxData(data: any) {
this._unmarshaller.parse(data, buf => {
const value = marshal.loads(buf, { bufferToString: true });
if (this._recordBuffersDir) {
fs.appendFileSync(path.resolve(this._recordBuffersDir, "output"), buf);
}
this._onSandboxMsg(value[0], value[1]);
});
}
/**
* Process the closing of the pipe by the sandboxed process.
*/
private _onSandboxClose() {
if (this._throttle) { this._throttle.stop(); }
this._isReadClosed = true;
// Clear out all reads pending on PipeFromSandbox, rejecting them with the given error.
const err = new sandboxUtil.SandboxError("PipeFromSandbox is closed");
this._pendingReads.forEach(resolvePair => resolvePair[1](err));
this._pendingReads = [];
}
/**
* Process a parsed message from the sandboxed process.
*/
private _onSandboxMsg(msgCode: MsgCode, data: any) {
if (msgCode === sandboxUtil.CALL) {
// Handle calls FROM the sandbox.
if (!Array.isArray(data) || data.length === 0) {
log.rawWarn("Sandbox invalid call from the sandbox", this._logMeta);
} else {
const fname = data[0];
const args = data.slice(1);
log.rawDebug(`Sandbox got call to ${fname} (${args.length} args)`, this._logMeta);
Promise.resolve()
.then(() => {
const func = this._exportedFunctions[fname];
if (!func) { throw new Error("No such exported function: " + fname); }
return func(...args);
})
.then((ret) => {
this._sendData(sandboxUtil.DATA, ret);
}, (err) => {
this._sendData(sandboxUtil.EXC, err.toString());
})
.catch((err) => {
log.rawDebug(`Sandbox sending response failed: ${err}`, this._logMeta);
});
}
} else {
// Handle return values for calls made to the sandbox.
const resolvePair = this._pendingReads.shift();
if (resolvePair) {
if (msgCode === sandboxUtil.EXC) {
resolvePair[1](new sandboxUtil.SandboxError(data));
} else if (msgCode === sandboxUtil.DATA) {
resolvePair[0](data);
} else {
log.rawWarn("Sandbox invalid message from sandbox", this._logMeta);
}
}
}
}
}
export class NSandboxCreator implements ISandboxCreator {
public constructor(private _flavor: 'pynbox' | 'unsandboxed') {
}
public create(options: ISandboxCreationOptions): ISandbox {
const pynbox = this._flavor === 'pynbox';
// Main script to run.
const defaultEntryPoint = pynbox ? 'grist/main.pyc' : 'grist/main.py';
const args = [options.entryPoint || defaultEntryPoint];
if (!options.entryPoint && options.comment) {
// When using default entry point, we can add on a comment as an argument - it isn't
// used, but will show up in `ps` output for the sandbox process. Comment is intended
// to be a document name/id.
args.push(options.comment);
}
const selLdrArgs: string[] = [];
if (options.sandboxMount) {
selLdrArgs.push(
// TODO: Only modules that we share with plugins should be mounted. They could be gathered in
// a "$APPROOT/sandbox/plugin" folder, only which get mounted.
'-m', `${options.sandboxMount}:/sandbox:ro`);
}
if (options.importMount) {
selLdrArgs.push('-m', `${options.importMount}:/importdir:ro`);
}
const pythonVersion = 'python2.7';
const env: NodeJS.ProcessEnv = {
// Python library path is only configurable when flavor is unsandboxed.
// In this case, expect to find library files in a virtualenv built by core
// buildtools/prepare_python.sh
PYTHONPATH: pynbox ? 'grist:thirdparty' :
path.join(process.cwd(), 'sandbox', 'grist') + ':' +
path.join(process.cwd(), 'venv', 'lib', pythonVersion, 'site-packages'),
DOC_URL: (options.docUrl || '').replace(/[^-a-zA-Z0-9_:/?&.]/, ''),
// Making time and randomness act deterministically for testing purposes.
// See test/utils/recordPyCalls.ts
...(process.env.LIBFAKETIME_PATH ? { // path to compiled binary
DETERMINISTIC_MODE: '1', // tells python to seed the random module
FAKETIME: "2020-01-01 00:00:00", // setting for libfaketime
// For Linux
LD_PRELOAD: process.env.LIBFAKETIME_PATH,
// For Mac (https://github.com/wolfcw/libfaketime/blob/master/README.OSX)
DYLD_INSERT_LIBRARIES: process.env.LIBFAKETIME_PATH,
DYLD_FORCE_FLAT_NAMESPACE: '1',
} : {}),
};
return new NSandbox({
args,
logCalls: options.logCalls,
logMeta: options.logMeta,
logTimes: options.logTimes,
selLdrArgs,
env,
...(pynbox ? {} : {command: pythonVersion}),
});
}
}