gristlabs_grist-core/sandbox/gvisor/run.py

298 lines
10 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# Run a command under gvisor, setting environment variables and sharing certain
# directories in read only mode. Specialized for running python, and (for testing)
# bash. Does not change directory structure, for unprivileged operation.
# Contains plenty of hard-coded paths that assume we are running within
# a container.
import argparse
import glob
import json
import os
import subprocess
import sys
import tempfile
# Separate arguments before and after a -- divider.
from itertools import groupby
all_args = sys.argv[1:]
all_args = [list(group) for k, group in groupby(all_args, lambda x: x == "--") if not k]
main_args = all_args[0] # args before the -- divider, for this script.
more_args = all_args[1] if len(all_args) > 1 else [] # args after the -- divider
# to pass on to python/bash.
# Set up options.
parser = argparse.ArgumentParser(description='Run something in gvisor (runsc).')
parser.add_argument('command', choices=['bash', 'python2', 'python3'])
parser.add_argument('--dry-run', '-d', action='store_true',
help="print config")
parser.add_argument('--env', '-E', action='append')
parser.add_argument('--mount', '-m', action='append')
parser.add_argument('--restore', '-r')
parser.add_argument('--checkpoint', '-c')
parser.add_argument('--start', '-s') # allow overridding the entrypoint
parser.add_argument('--faketime')
# If CHECK_FOR_TERMINAL is set, just determine whether we will be running bash, and
# exit with success if so. This is so if we are being wrapped in docker, it can be
# started in interactive mode.
if os.environ.get('CHECK_FOR_TERMINAL') == '1':
args = parser.parse_args(main_args)
exit(0 if args.command == 'bash' else -1)
args = parser.parse_args(main_args)
sys.stderr.write('run.py: ' + ' '.join(sys.argv) + "\n")
sys.stderr.flush()
include_bash = args.command == 'bash'
include_python2 = args.command == 'python2'
include_python3 = args.command == 'python3'
# Basic settings for gvisor's runsc. This follows the standard OCI specification:
# https://github.com/opencontainers/runtime-spec/blob/master/config.md
cmd_args = []
mounts = [ # These will be filled in more fully programmatically below.
{
"destination": "/proc", # gvisor virtualizes /proc
"source": "/proc",
"type": "/procfs"
},
{
"destination": "/sys", # gvisor virtualizes /sys
"source": "/sys",
"type": "/sysfs",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
]
}
]
preserved = set()
env = [
"PATH=/usr/local/bin:/usr/bin:/bin",
"LD_LIBRARY_PATH=/usr/local/lib" # Assumes python version in /usr/local
] + (args.env or [])
settings = {
"ociVersion": "1.0.0",
"process": {
"terminal": include_bash,
# Match current user id, for convenience with mounts. For some versions of
# gvisor, default behavior may be better - if you see "access denied" problems
# during imports, try commenting this section out. We could make imports work
# for any version of gvisor by setting mode when using tmp.dir to allow
# others to list directory contents.
"user": {
"uid": os.getuid(),
"gid": 0
},
"args": cmd_args,
"env": env,
"cwd": "/"
},
"root": {
"path": "/", # The fork of gvisor we use shares paths with host.
"readonly": True # Read-only access by default, and we will blank out most
# of the host with empty "tmpfs" mounts.
},
"hostname": "gristland",
"mounts": mounts,
"linux": {
"namespaces": [
{
"type": "pid"
},
{
"type": "network"
},
{
"type": "ipc"
},
{
"type": "uts"
},
{
"type": "mount"
}
]
}
}
memory_limit = os.environ.get('GVISOR_LIMIT_MEMORY')
if memory_limit:
settings['process']['rlimits'] = [
{
"type": "RLIMIT_AS",
"hard": int(memory_limit),
"soft": int(memory_limit)
}
]
# Helper for preparing a mount.
def preserve(*locations, short_failure=False):
for location in locations:
# Check the requested directory is visible on the host, and that there hasn't been a
# muddle. For Grist, this could happen if a parent directory of a temporary import
# directory hasn't been made available to the container this code runs in, for example.
if not os.path.exists(location):
if short_failure:
raise Exception('cannot find: ' + location)
raise Exception('cannot find: ' + location + ' ' +
'(if tmp path, make sure TMPDIR when running grist and GRIST_TMP line up)')
mounts.append({
"destination": location,
"source": location,
"options": ["ro"],
"type": "bind"
})
preserved.add(location)
# Prepare the file system - blank out everything that need not be shared.
exceptions = ["lib", "lib64"] # to be shared (read-only)
exceptions += ["proc", "sys"] # already virtualized
# retain /bin and /usr/bin for utilities
start = args.start
if include_bash or start:
exceptions.append("bin")
preserve("/usr/bin")
preserve("/usr/local/lib")
if os.path.exists('/lib64'):
preserve("/lib64")
if os.path.exists('/usr/lib64'):
preserve("/usr/lib64")
preserve("/usr/lib")
# include python3 for bash and python3
best = None
if not include_python2:
# We expect python3 in /usr/bin or /usr/local/bin.
candidates = [
path
# Pick the most generic python if not matching python3.9.
# Sorry this is delicate because of restores, mounts, symlinks.
for pattern in ['python3.9', 'python3', 'python3*']
for root in ['/usr/local', '/usr']
for path in glob.glob(f'{root}/bin/{pattern}')
if os.path.exists(path)
]
if not candidates:
raise Exception('could not find python3')
best = os.path.realpath(candidates[0])
preserve(best)
# include python2 for bash and python2
if not include_python3:
# Try to include python2 only if it is present or we were specifically asked for it.
# This is to facilitate testing on a python3-only container.
if os.path.exists("/usr/bin/python2.7") or include_python2:
preserve("/usr/bin/python2.7", short_failure=True)
best = "/usr/bin/python2.7"
preserve("/usr/lib")
# Set up any specific shares requested.
if args.mount:
preserve(*args.mount)
for directory in os.listdir('/'):
if directory not in exceptions and ("/" + directory) not in preserved:
mounts.insert(0, {
# This places an empty directory at this destination.
# Follow any symlinks since otherwise there is an error.
"destination": os.path.realpath("/" + directory),
"type": "tmpfs"
})
# Set up faketime inside the sandbox if requested. Can't be set up outside the sandbox,
# because gvisor is written in Go and doesn't use the standard library that faketime
# tweaks.
if args.faketime:
preserve('/usr/lib/x86_64-linux-gnu/faketime')
cmd_args.append('faketime')
cmd_args.append('-f')
cmd_args.append('2020-01-01 00:00:00' if args.faketime == 'default' else args.faketime)
preserve('/usr/bin/faketime')
preserve('/bin/date')
# Pick and set an initial entry point (bash or python).
if start:
cmd_args.append(start)
else:
cmd_args.append('bash' if include_bash else best)
# Add any requested arguments for the program that will be run.
cmd_args += more_args
# Helper for assembling a runsc command.
# Takes the directory to work in and a list of arguments to append.
def make_command(root_dir, action):
flag_string = os.environ.get('GVISOR_FLAGS') or '-rootless'
flags = flag_string.split(' ')
command = ["runsc",
"-root", "/tmp/runsc", # Place container information somewhere writable.
] + flags + [
"-network",
"none"] + action + [
root_dir.replace('/', '_')] # Derive an arbitrary container name.
return command
# Generate the OCI spec as config.json in a temporary directory, and either show
# it (if --dry-run) or pass it on to gvisor runsc.
with tempfile.TemporaryDirectory() as root: # pylint: disable=no-member
config_filename = os.path.join(root, 'config.json')
with open(config_filename, 'w') as fout:
json.dump(settings, fout, indent=2)
if args.dry_run:
with open(config_filename, 'r') as fin:
spec = json.load(fin)
print(json.dumps(spec, indent=2))
else:
if not args.checkpoint:
if args.restore:
command = make_command(root, ["restore", "--image-path=" + args.restore])
else:
command = make_command(root, ["run"])
result = subprocess.run(command, cwd=root) # pylint: disable=no-member
if result.returncode != 0:
raise Exception('gvisor runsc problem: ' + json.dumps(command))
else:
# We've been asked to make a checkpoint.
# Start up the sandbox, and wait for it to emit a message on stderr ('Ready').
command = make_command(root, ["run"])
process = subprocess.Popen(command, cwd=root, stderr=subprocess.PIPE)
text = process.stderr.readline().decode('utf-8') # wait for ready
if 'Ready' in text:
sys.stderr.write('Ready message: ' + text)
sys.stderr.flush()
else:
# Something unexpected has happened, echo the full error and hang.
while True:
sys.stderr.write('Problem: ' + text)
sys.stderr.flush()
text = process.stderr.readline().decode('utf-8')
# Remove existing checkpoint if present.
if os.path.exists(os.path.join(args.checkpoint, 'checkpoint.img')):
os.remove(os.path.join(args.checkpoint, 'checkpoint.img'))
if os.path.exists(os.path.join(args.checkpoint, 'checkpoint.json')):
os.remove(os.path.join(args.checkpoint, 'checkpoint.json'))
# Make the directory, so we will later have the right to delete the checkpoint if
# we wish to replace it. Otherwise there is a muddle around permissions.
if not os.path.exists(args.checkpoint):
os.makedirs(args.checkpoint)
# Go ahead and run the runsc checkpoint command.
# This is destructive, it will kill the sandbox we are checkpointing.
command = make_command(root, ["checkpoint", "--image-path=" + args.checkpoint])
result = subprocess.run(command, cwd=root) # pylint: disable=no-member
if result.returncode != 0:
raise Exception('gvisor runsc checkpointing problem: ' + json.dumps(command))
# Save the configuration of the checkpoint for easy reference.
with open(config_filename, 'r', encoding='utf-8') as fin:
with open(os.path.join(args.checkpoint, 'checkpoint.json'), 'w', encoding='utf-8') as fout:
spec = json.load(fin)
json.dump(spec, fout, indent=2)
# We are done!