add a pyodide-based "sandbox" flavor (#437)

This adds a new `GRIST_SANDBOX_FLAVOR=pyodide` option where the
version of Python used for the data engine is wasm, and so can
be run by node like the rest of the back end. It still runs as
a separate process.

There are a few small version changes made to packages to avoid
various awkwardnesses present in the current versions. All existing
tests pass.

This is very experimental. To use, you'll need something with
a bash shell and make. First do:
```
cd sandbox/pyodide
make setup           # README.md and Makefile have details
cd ..
```

Then running Grist as:
```
GRIST_SANDBOX_FLAVOR=pyodide yarn start
```
should work. Adding a formula with content:
```
import sys; return sys.version
```
should return a different Python version than other sandboxes.

The motivation for this work is to have a form of sandboxing
that will work on Windows for Grist Electron (for Linux we have
gvisor/runsc, for Mac we have sandbox-exec, but I haven't found
anything comparable for Windows).

It also brings a back-end-free version of Grist a bit closer, for
use-cases where that would make sense - such as serving a report
(in the form of a Grist document) on a static site.
This commit is contained in:
Paul Fitzpatrick
2023-03-06 16:56:25 -05:00
committed by GitHub
parent a1259139f6
commit 66643a5e6b
12 changed files with 497 additions and 53 deletions

View File

@@ -15,7 +15,7 @@ import {
} from 'app/server/lib/SandboxControl';
import * as sandboxUtil from 'app/server/lib/sandboxUtil';
import * as shutdown from 'app/server/lib/shutdown';
import {ChildProcess, spawn} from 'child_process';
import {ChildProcess, fork, spawn} from 'child_process';
import * as fs from 'fs';
import * as _ from 'lodash';
import * as path from 'path';
@@ -73,6 +73,8 @@ export interface ISandboxOptions {
interface SandboxProcess {
child: ChildProcess;
control: ISandboxControl;
dataToSandboxDescriptor?: number; // override sandbox's 'stdin' for data
dataFromSandboxDescriptor?: number; // override sandbox's 'stdout' for data
}
type ResolveRejectPair = [(value?: any) => void, (reason?: unknown) => void];
@@ -131,10 +133,23 @@ export class NSandbox implements ISandbox {
if (options.minimalPipeMode) {
log.rawDebug("3-pipe Sandbox started", this._logMeta);
this._streamToSandbox = this.childProc.stdin!;
this._streamFromSandbox = this.childProc.stdout!;
if (sandboxProcess.dataToSandboxDescriptor) {
this._streamToSandbox =
(this.childProc.stdio as Stream[])[sandboxProcess.dataToSandboxDescriptor] as Writable;
} else {
this._streamToSandbox = this.childProc.stdin!;
}
if (sandboxProcess.dataFromSandboxDescriptor) {
this._streamFromSandbox =
(this.childProc.stdio as Stream[])[sandboxProcess.dataFromSandboxDescriptor];
} else {
this._streamFromSandbox = this.childProc.stdout!;
}
} else {
log.rawDebug("5-pipe Sandbox started", this._logMeta);
if (sandboxProcess.dataFromSandboxDescriptor || sandboxProcess.dataToSandboxDescriptor) {
throw new Error('cannot override file descriptors in 5 pipe mode');
}
this._streamToSandbox = (this.childProc.stdio as Stream[])[3] as Writable;
this._streamFromSandbox = (this.childProc.stdio as Stream[])[4];
this.childProc.stdout!.on('data', sandboxUtil.makeLinePrefixer('Sandbox stdout: ', this._logMeta));
@@ -206,10 +221,17 @@ export class NSandbox implements ISandbox {
* @param args Arguments to pass to the given function.
* @returns A promise for the return value from the Python function.
*/
public pyCall(funcName: string, ...varArgs: unknown[]): Promise<any> {
public async pyCall(funcName: string, ...varArgs: unknown[]): Promise<any> {
const startTime = Date.now();
this._sendData(sandboxUtil.CALL, Array.from(arguments));
return this._pyCallWait(funcName, startTime);
const slowCallCheck = setTimeout(() => {
// Log calls that take some time, can be a useful symptom of misconfiguration
// (or just benign if the doc is big).
log.rawWarn('Slow pyCall', {...this._logMeta, funcName});
}, 10000);
const result = await this._pyCallWait(funcName, startTime);
clearTimeout(slowCallCheck);
return result;
}
/**
@@ -371,6 +393,7 @@ const spawners = {
docker, // Run sandboxes in distinct docker containers.
gvisor, // Gvisor's runsc sandbox.
macSandboxExec, // Use "sandbox-exec" on Mac.
pyodide, // Run data engine using pyodide.
};
function isFlavor(flavor: string): flavor is keyof typeof spawners {
@@ -528,6 +551,44 @@ function unsandboxed(options: ISandboxOptions): SandboxProcess {
return {child, control: new DirectProcessControl(child, options.logMeta)};
}
function pyodide(options: ISandboxOptions): SandboxProcess {
const paths = getAbsolutePaths(options);
// We will fork with three regular pipes (stdin, stdout, stderr), then
// ipc (mandatory for calling fork), and a replacement pipe for stdin
// and for stdout.
// The regular stdin always opens non-blocking in node, which is a pain
// in this case, so we just use a different pipe. There's a different
// problem with stdout, with the same solution.
const spawnOptions = {
stdio: ['ignore', 'ignore', 'pipe', 'ipc', 'pipe', 'pipe'] as Array<'pipe'|'ipc'>,
env: {
PYTHONPATH: paths.engine,
IMPORTDIR: options.importDir,
...getInsertedEnv(options),
...getWrappingEnv(options),
}
};
const base = getUnpackedAppRoot();
const child = fork(path.join(base, 'sandbox', 'pyodide', 'pipe.js'),
{cwd: path.join(process.cwd(), 'sandbox'), ...spawnOptions});
return {
child,
control: new DirectProcessControl(child, options.logMeta),
dataToSandboxDescriptor: 4, // Cannot use normal descriptor, node
// makes it non-blocking. Can be worked around in linux and osx, but
// for windows just using a different file descriptor seems simplest.
// In the sandbox, calling async methods from emscripten code is
// possible but would require more changes to the data engine code
// than seems reasonable at this time. The top level sandbox.run
// can be tweaked to step operations, which actually works for a
// lot of things, but not for cases where the sandbox calls back
// into node (e.g. for column type guessing). TLDR: just switching
// to FD 4 and reading synchronously is more practical solution.
dataFromSandboxDescriptor: 5, // There's an equally long but different
// story about why stdout is a bit messed up under pyodide right now.
};
}
/**
* Helper function to run python in gvisor's runsc, with multiple
* sandboxes run within the same container. GRIST_SANDBOX should