(core) Support adjusting OOM score for child sandbox processes.

Summary:
Also update error handling in NSandbox initialization to avoid node
exiting when sandbox can't be created.

Info on oom_score and choom: https://man7.org/linux/man-pages/man1/choom.1.html

Empirically, in docker and under fargate, oom_score (visible in
/proc/PID/oom_score) seems to be calculated approximately as

    ((ProcessRSS / TotalMemory * 1000 + 999 + oom_score_adj) * 2/3)

though this doesn't correspond to any documentation I could find.

In addition, under docker / fargate it does not work to set oom_score_adj (with choom or via /proc/PID/oom_score_adj) to a negative value to give priority to a node process. That's why this diff adjust the score up for sandbox processes instead.

Test Plan:
Checked that grist-omnibus built with this change respects the
variable, and sandbox processes end up with higher oom_score values.

Reviewers: paulfitz

Reviewed By: paulfitz

Subscribers: paulfitz

Differential Revision: https://phab.getgrist.com/D4112
This commit is contained in:
Dmitry S 2023-11-12 20:04:34 -05:00
parent c9bba5207e
commit 2eec48b685
2 changed files with 36 additions and 22 deletions

View File

@ -16,7 +16,7 @@ import {
} from 'app/server/lib/SandboxControl'; } from 'app/server/lib/SandboxControl';
import * as sandboxUtil from 'app/server/lib/sandboxUtil'; import * as sandboxUtil from 'app/server/lib/sandboxUtil';
import * as shutdown from 'app/server/lib/shutdown'; import * as shutdown from 'app/server/lib/shutdown';
import {ChildProcess, fork, spawn} from 'child_process'; import {ChildProcess, fork, spawn, SpawnOptionsWithoutStdio} from 'child_process';
import * as fs from 'fs'; import * as fs from 'fs';
import * as _ from 'lodash'; import * as _ from 'lodash';
import * as path from 'path'; import * as path from 'path';
@ -77,7 +77,7 @@ export interface ISandboxOptions {
*/ */
export interface SandboxProcess { export interface SandboxProcess {
child?: ChildProcess; child?: ChildProcess;
control: ISandboxControl; control: () => ISandboxControl;
dataToSandboxDescriptor?: number; // override sandbox's 'stdin' for data dataToSandboxDescriptor?: number; // override sandbox's 'stdin' for data
dataFromSandboxDescriptor?: number; // override sandbox's 'stdout' for data dataFromSandboxDescriptor?: number; // override sandbox's 'stdout' for data
getData?: (cb: (data: any) => void) => void; // use a callback instead of a pipe to get data getData?: (cb: (data: any) => void) => void; // use a callback instead of a pipe to get data
@ -134,11 +134,15 @@ export class NSandbox implements ISandbox {
this._exportedFunctions = options.exports || {}; this._exportedFunctions = options.exports || {};
const sandboxProcess = spawner(options); const sandboxProcess = spawner(options);
this._control = sandboxProcess.control;
this.childProc = sandboxProcess.child; this.childProc = sandboxProcess.child;
this._logMeta = {sandboxPid: this.childProc?.pid, ...options.logMeta}; this._logMeta = {sandboxPid: this.childProc?.pid, ...options.logMeta};
// Handle childProc events early, especially the 'error' event which may lead to node exiting.
this.childProc?.on('close', this._onExit.bind(this));
this.childProc?.on('error', this._onError.bind(this));
this._control = sandboxProcess.control();
if (this.childProc) { if (this.childProc) {
if (options.minimalPipeMode) { if (options.minimalPipeMode) {
this._initializeMinimalPipeMode(sandboxProcess); this._initializeMinimalPipeMode(sandboxProcess);
@ -285,9 +289,6 @@ export class NSandbox implements ISandbox {
sandboxStderrLogger(data); sandboxStderrLogger(data);
}); });
this.childProc.on('close', this._onExit.bind(this));
this.childProc.on('error', this._onError.bind(this));
this._streamFromSandbox.on('data', (data) => this._onSandboxData(data)); this._streamFromSandbox.on('data', (data) => this._onSandboxData(data));
this._streamFromSandbox.on('end', () => this._onSandboxClose()); this._streamFromSandbox.on('end', () => this._onSandboxClose());
this._streamFromSandbox.on('error', (err) => { this._streamFromSandbox.on('error', (err) => {
@ -318,7 +319,7 @@ export class NSandbox implements ISandbox {
private _close() { private _close() {
this._control.prepareToClose(); this._control?.prepareToClose(); // ?. operator in case _control failed to get initialized.
if (!this._isWriteClosed) { if (!this._isWriteClosed) {
// Close the pipe to the sandbox, which should cause the sandbox to exit cleanly. // Close the pipe to the sandbox, which should cause the sandbox to exit cleanly.
this._streamToSandbox?.end(); this._streamToSandbox?.end();
@ -584,7 +585,7 @@ function pynbox(options: ISandboxOptions): SandboxProcess {
const noLog = unsilenceLog ? [] : const noLog = unsilenceLog ? [] :
(process.env.OS === 'Windows_NT' ? ['-l', 'NUL'] : ['-l', '/dev/null']); (process.env.OS === 'Windows_NT' ? ['-l', 'NUL'] : ['-l', '/dev/null']);
const child = spawn('sandbox/nacl/bin/sel_ldr', [ const child = adjustedSpawn('sandbox/nacl/bin/sel_ldr', [
'-B', './sandbox/nacl/lib/irt_core.nexe', '-m', './sandbox/nacl/root:/:ro', '-B', './sandbox/nacl/lib/irt_core.nexe', '-m', './sandbox/nacl/root:/:ro',
...noLog, ...noLog,
...wrapperArgs.get(), ...wrapperArgs.get(),
@ -592,7 +593,7 @@ function pynbox(options: ISandboxOptions): SandboxProcess {
'--library-path', '/slib', '/python/bin/python2.7.nexe', '--library-path', '/slib', '/python/bin/python2.7.nexe',
...pythonArgs ...pythonArgs
], spawnOptions); ], spawnOptions);
return {child, control: new DirectProcessControl(child, options.logMeta)}; return {child, control: () => new DirectProcessControl(child, options.logMeta)};
} }
/** /**
@ -621,9 +622,9 @@ function unsandboxed(options: ISandboxOptions): SandboxProcess {
spawnOptions.stdio.push('pipe', 'pipe'); spawnOptions.stdio.push('pipe', 'pipe');
} }
const command = findPython(options.command, options.preferredPythonVersion); const command = findPython(options.command, options.preferredPythonVersion);
const child = spawn(command, pythonArgs, const child = adjustedSpawn(command, pythonArgs,
{cwd: path.join(process.cwd(), 'sandbox'), ...spawnOptions}); {cwd: path.join(process.cwd(), 'sandbox'), ...spawnOptions});
return {child, control: new DirectProcessControl(child, options.logMeta)}; return {child, control: () => new DirectProcessControl(child, options.logMeta)};
} }
function pyodide(options: ISandboxOptions): SandboxProcess { function pyodide(options: ISandboxOptions): SandboxProcess {
@ -648,7 +649,7 @@ function pyodide(options: ISandboxOptions): SandboxProcess {
{cwd: path.join(process.cwd(), 'sandbox'), ...spawnOptions}); {cwd: path.join(process.cwd(), 'sandbox'), ...spawnOptions});
return { return {
child, child,
control: new DirectProcessControl(child, options.logMeta), control: () => new DirectProcessControl(child, options.logMeta),
dataToSandboxDescriptor: 4, // Cannot use normal descriptor, node dataToSandboxDescriptor: 4, // Cannot use normal descriptor, node
// makes it non-blocking. Can be worked around in linux and osx, but // makes it non-blocking. Can be worked around in linux and osx, but
// for windows just using a different file descriptor seems simplest. // for windows just using a different file descriptor seems simplest.
@ -728,16 +729,17 @@ function gvisor(options: ISandboxOptions): SandboxProcess {
if (options.useGristEntrypoint && pythonVersion === '3' && process.env.GRIST_CHECKPOINT && !paths.importDir) { if (options.useGristEntrypoint && pythonVersion === '3' && process.env.GRIST_CHECKPOINT && !paths.importDir) {
if (process.env.GRIST_CHECKPOINT_MAKE) { if (process.env.GRIST_CHECKPOINT_MAKE) {
const child = const child =
spawn(command, [...wrapperArgs.get(), '--checkpoint', process.env.GRIST_CHECKPOINT!, adjustedSpawn(command, [...wrapperArgs.get(), '--checkpoint', process.env.GRIST_CHECKPOINT!,
`python${pythonVersion}`, '--', ...pythonArgs]); `python${pythonVersion}`, '--', ...pythonArgs]);
// We don't want process control for this. // We don't want process control for this.
return {child, control: new NoProcessControl(child)}; return {child, control: () => new NoProcessControl(child)};
} }
wrapperArgs.push('--restore'); wrapperArgs.push('--restore');
wrapperArgs.push(process.env.GRIST_CHECKPOINT!); wrapperArgs.push(process.env.GRIST_CHECKPOINT!);
} }
const child = spawn(command, [...wrapperArgs.get(), `python${pythonVersion}`, '--', ...pythonArgs]); const child = adjustedSpawn(command, [...wrapperArgs.get(), `python${pythonVersion}`, '--', ...pythonArgs]);
if (!child.pid) { const childPid = child.pid;
if (!childPid) {
throw new Error(`failed to spawn python${pythonVersion}`); throw new Error(`failed to spawn python${pythonVersion}`);
} }
@ -751,8 +753,8 @@ function gvisor(options: ISandboxOptions): SandboxProcess {
return p.label.includes('runsc-sandbox'); return p.label.includes('runsc-sandbox');
}; };
// If docker is in use, this process control will log a warning message and do nothing. // If docker is in use, this process control will log a warning message and do nothing.
return {child, control: new SubprocessControl({ return {child, control: () => new SubprocessControl({
pid: child.pid, pid: childPid,
recognizers: { recognizers: {
sandbox: recognizeSandboxProcess, // this process we start and stop sandbox: recognizeSandboxProcess, // this process we start and stop
memory: recognizeTracedProcess, // measure memory for the ptraced process memory: recognizeTracedProcess, // measure memory for the ptraced process
@ -800,7 +802,7 @@ function docker(options: ISandboxOptions): SandboxProcess {
...pythonArgs, ...pythonArgs,
]); ]);
log.rawDebug("cannot do process control via docker yet", {...options.logMeta}); log.rawDebug("cannot do process control via docker yet", {...options.logMeta});
return {child, control: new NoProcessControl(child)}; return {child, control: () => new NoProcessControl(child)};
} }
/** /**
@ -890,7 +892,7 @@ function macSandboxExec(options: ISandboxOptions): SandboxProcess {
const profileString = profile.join('\n'); const profileString = profile.join('\n');
const child = spawn('/usr/bin/sandbox-exec', ['-p', profileString, command, ...pythonArgs], const child = spawn('/usr/bin/sandbox-exec', ['-p', profileString, command, ...pythonArgs],
{cwd, env}); {cwd, env});
return {child, control: new DirectProcessControl(child, options.logMeta)}; return {child, control: () => new DirectProcessControl(child, options.logMeta)};
} }
/** /**
@ -1082,3 +1084,12 @@ function realpathSync(src: string) {
return src; return src;
} }
} }
function adjustedSpawn(cmd: string, args: string[], options?: SpawnOptionsWithoutStdio) {
const oomScoreAdj = process.env.GRIST_SANDBOX_OOM_SCORE_ADJ;
if (oomScoreAdj) {
return spawn('choom', ['-n', oomScoreAdj, '--', cmd, ...args], options);
} else {
return spawn(cmd, args, options);
}
}

View File

@ -1,4 +1,5 @@
import { BulkColValues, TableColValues, TableDataAction, toTableDataAction } from 'app/common/DocActions'; import { BulkColValues, TableColValues, TableDataAction, toTableDataAction } from 'app/common/DocActions';
import log from 'app/server/lib/log';
import fromPairs = require('lodash/fromPairs'); import fromPairs = require('lodash/fromPairs');
@ -185,7 +186,9 @@ export class TableMetadataLoader {
// Be careful to do the core push first, once we can. // Be careful to do the core push first, once we can.
if (!this._corePushed) { if (!this._corePushed) {
if (this._corePush === undefined && newPushes.has('_grist_Tables') && newPushes.has('_grist_Tables_column')) { if (this._corePush === undefined && newPushes.has('_grist_Tables') && newPushes.has('_grist_Tables_column')) {
this._corePush = this._counted(this.opCorePush()); this._corePush = this._counted(this.opCorePush()).catch(e => {
log.warn(`TableMetadataLoader opCorePush failed: ${e}`);
});
} }
return; return;
} }