add a pyodide-based "sandbox" flavor (#437)
This adds a new `GRIST_SANDBOX_FLAVOR=pyodide` option where the version of Python used for the data engine is wasm, and so can be run by node like the rest of the back end. It still runs as a separate process. There are a few small version changes made to packages to avoid various awkwardnesses present in the current versions. All existing tests pass. This is very experimental. To use, you'll need something with a bash shell and make. First do: ``` cd sandbox/pyodide make setup # README.md and Makefile have details cd .. ``` Then running Grist as: ``` GRIST_SANDBOX_FLAVOR=pyodide yarn start ``` should work. Adding a formula with content: ``` import sys; return sys.version ``` should return a different Python version than other sandboxes. The motivation for this work is to have a form of sandboxing that will work on Windows for Grist Electron (for Linux we have gvisor/runsc, for Mac we have sandbox-exec, but I haven't found anything comparable for Windows). It also brings a back-end-free version of Grist a bit closer, for use-cases where that would make sense - such as serving a report (in the form of a Grist document) on a static site.pull/456/head
parent
a1259139f6
commit
66643a5e6b
@ -0,0 +1,19 @@
|
||||
default:
|
||||
echo "Welcome to the pyodide sandbox"
|
||||
echo "make fetch_packages # gets python packages prepared earlier"
|
||||
echo "make build_packages # build python packages from scratch"
|
||||
echo "make save_packages # upload python packages to fetch later"
|
||||
echo "setup # get pyodide node package"
|
||||
|
||||
fetch_packages:
|
||||
node ./packages.js https://s3.amazonaws.com/grist-pynbox/pyodide/packages/ _build/packages/
|
||||
|
||||
build_packages:
|
||||
./build_packages.sh
|
||||
|
||||
save_packages:
|
||||
aws s3 sync _build/packages s3://grist-pynbox/pyodide/packages/
|
||||
|
||||
setup:
|
||||
./setup.sh
|
||||
make fetch_packages
|
@ -0,0 +1,28 @@
|
||||
This is a collection of scripts for running a pyodide-based "sandbox" for
|
||||
Grist.
|
||||
|
||||
I put "sandbox" in quotes since pyodide isn't built with sandboxing
|
||||
in mind. It was written to run in a browser, where the browser does
|
||||
sandboxing. I don't know how much of node's API ends up being exposed
|
||||
to the "sandbox" - in previous versions of pyodide it seems the answer is
|
||||
"a lot". See the back-and-forth between dalcde and hoodmane in:
|
||||
https://github.com/pyodide/pyodide/issues/960
|
||||
See specifically:
|
||||
https://github.com/pyodide/pyodide/issues/960#issuecomment-752305257
|
||||
I looked at hiwire and its treatment of js globals has changed a
|
||||
lot. On the surface it looks like there is good control of what is
|
||||
exposed, but there may be other routes.
|
||||
|
||||
Still, some wasm-based solution is likely to be helpful, whether from
|
||||
pyodide or elsewhere, and this is good practice for that.
|
||||
|
||||
***
|
||||
|
||||
To run, we need specific versions of the Python packages that Grist uses
|
||||
to be prepared. It should suffice to do:
|
||||
|
||||
```
|
||||
make setup
|
||||
```
|
||||
|
||||
In this directory. See the `Makefile` for other options.
|
@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
echo ""
|
||||
echo "###############################################################"
|
||||
echo "## Get pyodide repository, for transpiling python packages"
|
||||
|
||||
if [[ ! -e _build/pyodide ]]; then
|
||||
cd _build
|
||||
git clone https://github.com/pyodide/pyodide
|
||||
cd ..
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "###############################################################"
|
||||
echo "## Prepare python packages"
|
||||
|
||||
cd _build/pyodide
|
||||
./run_docker make
|
||||
cp ../../../requirements3.txt .
|
||||
./run_docker pyodide build -r requirements3.txt --output-lockfile result.txt
|
||||
cat result.txt
|
||||
cd ../..
|
||||
|
||||
echo ""
|
||||
echo "###############################################################"
|
||||
echo "## Copy out python packages"
|
||||
|
||||
node ./packages.js _build/pyodide/dist/ _build/packages/
|
@ -0,0 +1,117 @@
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
const fetch = require('node-fetch');
|
||||
|
||||
async function listLibs(src) {
|
||||
const txt = fs.readFileSync(path.join(__dirname, '..', 'requirements3.txt'), 'utf8');
|
||||
const libs = {};
|
||||
for (const line of txt.split(/\r?\n/)) {
|
||||
const raw = line.split('#')[0];
|
||||
if (!raw.includes('==')) { continue; }
|
||||
const [name, version] = line.split('==');
|
||||
libs[name] = version;
|
||||
}
|
||||
const hits = [];
|
||||
const misses = [];
|
||||
const toLoad = [];
|
||||
const material = fs.readdirSync(src);
|
||||
for (const [lib, version] of Object.entries(libs)) {
|
||||
const nlib = lib.replace(/-/g, '_');
|
||||
const info = {
|
||||
name: lib,
|
||||
standardName: nlib,
|
||||
version: version,
|
||||
}
|
||||
try {
|
||||
const found = material.filter(m => m.startsWith(`${nlib}-${version}-`));
|
||||
if (found.length !== 1) {
|
||||
throw new Error('did not find 1');
|
||||
}
|
||||
const fname = found[0];
|
||||
info.fullName = path.join(src, fname);
|
||||
info.fileName = fname;
|
||||
toLoad.push(info);
|
||||
hits.push(lib);
|
||||
} catch (e) {
|
||||
misses.push(info);
|
||||
}
|
||||
}
|
||||
return {
|
||||
available: toLoad,
|
||||
misses,
|
||||
};
|
||||
}
|
||||
exports.listLibs = listLibs;
|
||||
|
||||
async function findOnDisk(src, dest) {
|
||||
console.log(`Organizing packages on disk`, {src, dest});
|
||||
fs.mkdirSync(dest, {recursive: true});
|
||||
let libs = (await listLibs(src));
|
||||
for (const lib of libs.available) {
|
||||
fs.copyFileSync(lib.fullName, path.join(dest, lib.fileName));
|
||||
fs.writeFileSync(path.join(dest, `${lib.name}-${lib.version}.json`),
|
||||
JSON.stringify({
|
||||
name: lib.name,
|
||||
version: lib.version,
|
||||
fileName: lib.fileName,
|
||||
}, null, 2));
|
||||
console.log("Copied", {
|
||||
content: path.join(dest, lib.fileName),
|
||||
meta: path.join(dest, `${lib.name}-${lib.version}.json`),
|
||||
});
|
||||
}
|
||||
libs = await listLibs(dest);
|
||||
console.log(`Cached`, {libs: libs.available.map(lib => lib.name)});
|
||||
console.log(`Missing`, {libs: libs.misses.map(lib => lib.name)});
|
||||
}
|
||||
|
||||
async function findOnNet(src, dest) {
|
||||
console.log(`Caching packages on disk`, {src, dest});
|
||||
fs.mkdirSync(dest, {recursive: true});
|
||||
let libs = await listLibs(dest);
|
||||
console.log(`Cached`, {libs: libs.available.map(lib => lib.name)});
|
||||
for (const lib of libs.misses) {
|
||||
console.log('Fetching', lib);
|
||||
const url = new URL(src);
|
||||
url.pathname = url.pathname + lib.name + '-' + lib.version + '.json';
|
||||
const result = await fetch(url.href);
|
||||
if (result.status === 200) {
|
||||
const data = await result.json();
|
||||
const url2 = new URL(src);
|
||||
url2.pathname = url2.pathname + data.fileName;
|
||||
const result2 = await fetch(url2.href);
|
||||
if (result2.status === 200) {
|
||||
fs.writeFileSync(path.join(dest, `${lib.name}-${lib.version}.json`),
|
||||
JSON.stringify(data, null, 2));
|
||||
fs.writeFileSync(path.join(dest, data.fileName),
|
||||
await result2.buffer());
|
||||
} else {
|
||||
console.error("No payload available", {lib});
|
||||
}
|
||||
} else {
|
||||
console.error("No metadata available", {lib});
|
||||
}
|
||||
}
|
||||
libs = await listLibs(dest);
|
||||
console.log(`Missing`, {libs: libs.misses.map(lib => lib.name)});
|
||||
}
|
||||
|
||||
async function main(src, dest) {
|
||||
if (!src) {
|
||||
console.error('please supply a source');
|
||||
process.exit(1);
|
||||
}
|
||||
if (!dest) {
|
||||
console.error('please supply a destination');
|
||||
process.exit(1);
|
||||
}
|
||||
if (src.startsWith('http:') || src.startsWith('https:')) {
|
||||
await findOnNet(src, dest);
|
||||
return;
|
||||
}
|
||||
await findOnDisk(src, dest);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main(...process.argv.slice(2)).catch(e => console.error(e));
|
||||
}
|
@ -0,0 +1,147 @@
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
|
||||
const { loadPyodide } = require('./_build/worker/node_modules/pyodide');
|
||||
const { listLibs } = require('./packages');
|
||||
|
||||
const INCOMING_FD = 4;
|
||||
const OUTGOING_FD = 5;
|
||||
|
||||
class GristPipe {
|
||||
constructor() {
|
||||
this.pyodide = null;
|
||||
this.incomingBuffer = Buffer.alloc(65536);
|
||||
this.addedBlob = false;
|
||||
this.adminMode = false;
|
||||
}
|
||||
|
||||
async init() {
|
||||
const self = this;
|
||||
this.setAdminMode(true);
|
||||
this.pyodide = await loadPyodide({
|
||||
jsglobals: {
|
||||
Object: {},
|
||||
setTimeout: function(code, delay) {
|
||||
if (self.adminMode) {
|
||||
setTimeout(code, delay);
|
||||
// Seems to be OK not to return anything, so we don't.
|
||||
} else {
|
||||
throw new Error('setTimeout not available');
|
||||
}
|
||||
},
|
||||
sendFromSandbox: (data) => {
|
||||
return fs.writeSync(OUTGOING_FD, Buffer.from(data.toJs()));
|
||||
}
|
||||
},
|
||||
});
|
||||
this.setAdminMode(false);
|
||||
this.pyodide.setStdin({
|
||||
stdin: () => {
|
||||
const result = fs.readSync(INCOMING_FD, this.incomingBuffer, 0,
|
||||
this.incomingBuffer.byteLength);
|
||||
if (result > 0) {
|
||||
const buf = Buffer.allocUnsafe(result, 0, 0, result);
|
||||
this.incomingBuffer.copy(buf);
|
||||
return buf;
|
||||
}
|
||||
return null;
|
||||
},
|
||||
});
|
||||
this.pyodide.setStderr({
|
||||
batched: (data) => {
|
||||
this.log("[py]", data);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async loadCode() {
|
||||
// Load python packages.
|
||||
const src = path.join(__dirname, '_build', 'packages');
|
||||
const lsty = (await listLibs(src)).available.map(item => item.fullName);
|
||||
await this.pyodide.loadPackage(lsty, {
|
||||
messageCallback: (msg) => this.log('[package]', msg),
|
||||
});
|
||||
|
||||
// Load Grist data engine code.
|
||||
// We mount it as /grist_src, copy to /grist, then unmount.
|
||||
// Note that path to source must be a realpath.
|
||||
const root = fs.realpathSync(path.join(__dirname, '../grist'));
|
||||
await this.pyodide.FS.mkdir("/grist_src");
|
||||
// careful, needs to be a realpath
|
||||
await this.pyodide.FS.mount(this.pyodide.FS.filesystems.NODEFS, { root }, "/grist_src");
|
||||
// Now want to copy /grist_src to /grist.
|
||||
// For some reason shutil.copytree doesn't work on Windows in this situation, so
|
||||
// we reimplement it crudely.
|
||||
await this.pyodide.runPython(`
|
||||
import os, shutil
|
||||
def copytree(src, dst):
|
||||
os.makedirs(dst, exist_ok=True)
|
||||
for item in os.listdir(src):
|
||||
s = os.path.join(src, item)
|
||||
d = os.path.join(dst, item)
|
||||
if os.path.isdir(s):
|
||||
copytree(s, d)
|
||||
else:
|
||||
shutil.copy2(s, d)
|
||||
copytree('/grist_src', '/grist')`);
|
||||
await this.pyodide.FS.unmount("/grist_src");
|
||||
await this.pyodide.FS.rmdir("/grist_src");
|
||||
}
|
||||
|
||||
async mountImportDirIfNeeded() {
|
||||
if (process.env.IMPORTDIR) {
|
||||
this.log("Setting up import from", process.env.IMPORTDIR);
|
||||
// Ideally would be read-only; don't see a way to do that,
|
||||
// other than copying like for Grist code.
|
||||
await this.pyodide.FS.mkdir("/import");
|
||||
await this.pyodide.FS.mount(this.pyodide.FS.filesystems.NODEFS, {
|
||||
root: process.env.IMPORTDIR,
|
||||
}, "/import");
|
||||
}
|
||||
}
|
||||
|
||||
async runCode() {
|
||||
await this.pyodide.runPython(`
|
||||
import sys
|
||||
sys.path.append('/')
|
||||
sys.path.append('/grist')
|
||||
import grist
|
||||
import main
|
||||
import os
|
||||
os.environ['PIPE_MODE'] = 'pyodide'
|
||||
os.environ['IMPORTDIR'] = '/import'
|
||||
main.main()
|
||||
`);
|
||||
}
|
||||
|
||||
setAdminMode(active) {
|
||||
this.adminMode = active;
|
||||
// Lack of Blob may result in a message on console.log that hurts us.
|
||||
if (active && !globalThis.Blob) {
|
||||
globalThis.Blob = String;
|
||||
this.addedBlob = true;
|
||||
}
|
||||
if (!active && this.addedBlob) {
|
||||
delete globalThis.Blob;
|
||||
this.addedBlob = false;
|
||||
}
|
||||
}
|
||||
|
||||
log(...args) {
|
||||
console.error("[pyodide sandbox]", ...args);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const pipe = new GristPipe();
|
||||
await pipe.init();
|
||||
await pipe.loadCode();
|
||||
await pipe.mountImportDirIfNeeded();
|
||||
await pipe.runCode();
|
||||
} finally {
|
||||
process.stdin.removeAllListeners();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(err => console.error("[pyodide error]", err));
|
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
echo ""
|
||||
echo "###############################################################"
|
||||
echo "## Get pyodide node package"
|
||||
|
||||
if [[ ! -e _build/worker ]]; then
|
||||
mkdir -p _build/worker
|
||||
cd _build/worker
|
||||
yarn init --yes
|
||||
yarn add pyodide@0.22.1
|
||||
cd ../..
|
||||
fi
|
Loading…
Reference in new issue