From dd0f1be1173f5ae6beca1b267534a8a310864c63 Mon Sep 17 00:00:00 2001 From: Paul Fitzpatrick Date: Mon, 18 Oct 2021 13:37:51 -0400 Subject: [PATCH] (core) get all tests working under python3/gvisor Summary: This verifies that all existing tests are capable of running under python3/gvisor, and fixes the small issues that came up. It does not yet activate python3 tests on all diffs, only diffs that specifically request them. * Adds a suffix in test names and output directories for tests run with PYTHON_VERSION=3, so that results of the same test run with and without the flag can be aggregated cleanly. * Adds support for checkpointing to the gvisor sandbox adapter. * Prepares a checkpoint made after grist python code has loaded in the gvisor sandbox. * Changes how `DOC_URL` is passed to the sandbox, since it can no longer be passed in as an environment variable when using checkpoints. * Uses the checkpoint to speed up tests using the gvisor sandbox, otherwise a lot of tests need more time (especially on mac under docker). * Directs jenkins to run all tests with python2 and python3 when a new file `buildtools/changelogs/python.txt` is touched (this diff counts as touching that file). * Tweaks miscellaneous tests - some needed fixes exposed by slightly different timing - a small number actually give different results in py3 (removal of `u` prefixes). - some needed a little more time The DOC_URL change is not the ultimate solution we want for DOC_URL. Eventually it should be a variable that gets updated, like the date perhaps. This is just a small pragmatic change to preserve existing behavior. Tests are run mindlessly as py3, and for some tests it won't change anything (e.g. if they do not use NSandbox). Tests are not run in parallel, doubling overall test time. Checkpoints could be useful in deployment, though this diff doesn't use them there. The application of checkpoints doesn't check for other configuration like 3-versus-5-pipe that we don't actually use. Python2 tests run using pynbox as always for now. The diff got sufficiently bulky that I didn't tackle running py3 on "regular" diffs in it. My preference, given that most tests don't appear to stress the python side of things, would be to make a selection of the tests that do and a few wild cards, and run those tests on both pythons rather then all of them. For diffs making a significant python change, I'd propose touching buildtools/changelogs/python.txt for full tests. But this is a conversation in progress. A total of 6886 tests ran on this diff. Test Plan: this is a step in preparing tests for py3 transition Reviewers: dsagal Reviewed By: dsagal Subscribers: dsagal Differential Revision: https://phab.getgrist.com/D3066 --- app/common/UserAPI.ts | 19 +++++++++++++++---- app/server/generateCheckpoint.ts | 28 ++++++++++++++++++++++++++++ app/server/generateInitialDocSql.ts | 2 +- app/server/lib/ActiveDoc.ts | 5 ++++- app/server/lib/DocApi.ts | 4 +++- app/server/lib/ISandbox.ts | 2 -- app/server/lib/NSandbox.ts | 20 ++++++++++++++++---- sandbox/grist/main.py | 6 ++++++ 8 files changed, 73 insertions(+), 13 deletions(-) create mode 100644 app/server/generateCheckpoint.ts diff --git a/app/common/UserAPI.ts b/app/common/UserAPI.ts index c991352c..ed95b6b4 100644 --- a/app/common/UserAPI.ts +++ b/app/common/UserAPI.ts @@ -337,7 +337,11 @@ export interface UserAPI { * reasons, such as downloads. */ export interface DocAPI { - getRows(tableId: string, options?: { filters?: QueryFilters }): Promise; + // Immediate flag is a currently not-advertised feature, allowing a query to proceed without + // waiting for a document to be initialized. This is useful if the calculations done when + // opening a document are irrelevant. + getRows(tableId: string, options?: { filters?: QueryFilters, + immediate?: boolean }): Promise; updateRows(tableId: string, changes: TableColValues): Promise; addRows(tableId: string, additions: BulkColValues): Promise; removeRows(tableId: string, removals: number[]): Promise; @@ -728,9 +732,16 @@ export class DocAPIImpl extends BaseAPI implements DocAPI { this._url = `${url}/api/docs/${docId}`; } - public async getRows(tableId: string, options?: { filters?: QueryFilters }): Promise { - const query = options?.filters ? ("?filter=" + encodeURIComponent(JSON.stringify(options.filters))) : ''; - return this.requestJson(`${this._url}/tables/${tableId}/data${query}`); + public async getRows(tableId: string, options?: { filters?: QueryFilters, + immediate?: boolean }): Promise { + const url = new URL(`${this._url}/tables/${tableId}/data`); + if (options?.filters) { + url.searchParams.append('filter', JSON.stringify(options.filters)); + } + if (options?.immediate) { + url.searchParams.append('immediate', 'true'); + } + return this.requestJson(url.href); } public async updateRows(tableId: string, changes: TableColValues): Promise { diff --git a/app/server/generateCheckpoint.ts b/app/server/generateCheckpoint.ts new file mode 100644 index 00000000..d79e21cc --- /dev/null +++ b/app/server/generateCheckpoint.ts @@ -0,0 +1,28 @@ +/** + * + * This opens a sandbox in order to capture a checkpoint of the sandbox after Grist + * python code has been loaded within it. This helps run Grist's 1000s of tests under + * gvisor on a ptrace platform, for which all the file accesses on sandbox startup + * are relatively slow, adding about a second relative to pynbox. + * + */ + +import { create } from 'app/server/lib/create'; + +export async function main() { + if (!process.env.GRIST_CHECKPOINT) { + throw new Error('GRIST_CHECKPOINT must be defined'); + } + if (!process.env.GRIST_CHECKPOINT_MAKE) { + throw new Error('GRIST_CHECKPOINT_MAKE must be defined'); + } + create.NSandbox({ + preferredPythonVersion: '3' + }); +} + +if (require.main === module) { + main().catch(e => { + console.error(e); + }); +} diff --git a/app/server/generateInitialDocSql.ts b/app/server/generateInitialDocSql.ts index 5c3ee553..26cfbce0 100644 --- a/app/server/generateInitialDocSql.ts +++ b/app/server/generateInitialDocSql.ts @@ -58,6 +58,6 @@ export async function main(baseName: string) { if (require.main === module) { main(process.argv[2]).catch(e => { - console.log(e); + console.error(e); }); } diff --git a/app/server/lib/ActiveDoc.ts b/app/server/lib/ActiveDoc.ts index 386dac2e..447f9d9f 100644 --- a/app/server/lib/ActiveDoc.ts +++ b/app/server/lib/ActiveDoc.ts @@ -1572,6 +1572,10 @@ export class ActiveDoc extends EventEmitter { await this._tableMetadataLoader.wait(); await this._tableMetadataLoader.clean(); await this._loadTables(docSession, pendingTableNames); + if (this._options?.docUrl) { + await this._pyCall('set_doc_url', this._options.docUrl); + } + // Calculations are not associated specifically with the user opening the document. // TODO: be careful with which users can create formulas. await this._applyUserActions(makeExceptionalDocSession('system'), [['Calculate']]); @@ -1705,7 +1709,6 @@ export class ActiveDoc extends EventEmitter { logCalls: false, logTimes: true, logMeta: {docId: this._docName}, - docUrl: this._options?.docUrl, preferredPythonVersion, }); } diff --git a/app/server/lib/DocApi.ts b/app/server/lib/DocApi.ts index 0f265593..9489bbe8 100644 --- a/app/server/lib/DocApi.ts +++ b/app/server/lib/DocApi.ts @@ -147,12 +147,14 @@ export class DocWorkerApi { async function getTableData(activeDoc: ActiveDoc, req: RequestWithLogin) { const filters = req.query.filter ? JSON.parse(String(req.query.filter)) : {}; + // Option to skip waiting for document initialization. + const immediate = isAffirmative(req.query.immediate); if (!Object.keys(filters).every(col => Array.isArray(filters[col]))) { throw new ApiError("Invalid query: filter values must be arrays", 400); } const tableId = req.params.tableId; const tableData = await handleSandboxError(tableId, [], activeDoc.fetchQuery( - docSessionFromRequest(req), {tableId, filters}, true)); + docSessionFromRequest(req), {tableId, filters}, !immediate)); // Apply sort/limit parameters, if set. TODO: move sorting/limiting into data engine // and sql. const params = getQueryParameters(req); diff --git a/app/server/lib/ISandbox.ts b/app/server/lib/ISandbox.ts index 19d75c56..4083f267 100644 --- a/app/server/lib/ISandbox.ts +++ b/app/server/lib/ISandbox.ts @@ -16,8 +16,6 @@ export interface ISandboxCreationOptions { sandboxMount?: string; // if defined, make this path available read-only as "/sandbox" importMount?: string; // if defined, make this path available read-only as "/importdir" - docUrl?: string; // to support SELF_HYPERLINK. - preferredPythonVersion?: '2' | '3'; } diff --git a/app/server/lib/NSandbox.ts b/app/server/lib/NSandbox.ts index 87dac008..86d0bc4b 100644 --- a/app/server/lib/NSandbox.ts +++ b/app/server/lib/NSandbox.ts @@ -45,7 +45,6 @@ interface ISandboxOptions { // mounts (e.g. for unsandboxed operation). importDir?: string; // a directory containing data file(s) to import by plugins - docUrl?: string; // URL to the document, for SELF_HYPERLINK minimalPipeMode?: boolean; // Whether to use newer 3-pipe operation deterministicMode?: boolean; // Whether to override time + randomness @@ -391,7 +390,6 @@ export class NSandboxCreator implements ISandboxCreator { const translatedOptions: ISandboxOptions = { minimalPipeMode: true, deterministicMode: Boolean(process.env.LIBFAKETIME_PATH), - docUrl: options.docUrl, args, logCalls: options.logCalls, logMeta: {flavor: this._flavor, command: this._command, @@ -523,6 +521,21 @@ function gvisor(options: ISandboxOptions): ChildProcess { if (pythonVersion !== '2' && pythonVersion !== '3') { throw new Error("PYTHON_VERSION must be set to 2 or 3"); } + // For a regular sandbox not being used for importing, if GRIST_CHECKPOINT is set + // try to restore from it. If GRIST_CHECKPOINT_MAKE is set, try to recreate the + // checkpoint (this is an awkward place to do it, but avoids mismatches + // between the checkpoint and how it gets used later). + // If a sandbox is being used for import, it will have a special mount we can't + // deal with easily right now. Should be possible to do in future if desired. + if (options.useGristEntrypoint && pythonVersion === '3' && !paths.importDir && + process.env.GRIST_CHECKPOINT) { + if (process.env.GRIST_CHECKPOINT_MAKE) { + return spawn(command, [...wrapperArgs.get(), '--checkpoint', process.env.GRIST_CHECKPOINT, + `python${pythonVersion}`, '--', ...pythonArgs]); + } + wrapperArgs.push('--restore'); + wrapperArgs.push(process.env.GRIST_CHECKPOINT); + } return spawn(command, [...wrapperArgs.get(), `python${pythonVersion}`, '--', ...pythonArgs]); } @@ -589,6 +602,7 @@ function macSandboxExec(options: ISandboxOptions): ChildProcess { }; const command = findPython(options.command); const realPath = fs.realpathSync(command); + log.rawDebug("macSandboxExec found a python", {...options.logMeta, command: realPath}); // Prepare sandbox profile const profile: string[] = []; @@ -656,8 +670,6 @@ function macSandboxExec(options: ISandboxOptions): ChildProcess { */ export function getInsertedEnv(options: ISandboxOptions) { const env: NodeJS.ProcessEnv = { - DOC_URL: (options.docUrl || '').replace(/[^-a-zA-Z0-9_:/?&.~]/g, ''), - // use stdin/stdout/stderr only. PIPE_MODE: options.minimalPipeMode ? 'minimal' : 'classic', }; diff --git a/sandbox/grist/main.py b/sandbox/grist/main.py index 2f730711..e1bba677 100644 --- a/sandbox/grist/main.py +++ b/sandbox/grist/main.py @@ -2,6 +2,7 @@ This module defines what sandbox functions are made available to the Node controller, and starts the grist sandbox. See engine.py for the API documentation. """ +import os import sys sys.path.append('thirdparty') # pylint: disable=wrong-import-position @@ -100,6 +101,10 @@ def run(sandbox): def get_version(): return schema.SCHEMA_VERSION + @export + def set_doc_url(doc_url): + os.environ['DOC_URL'] = doc_url + @export def get_formula_error(table_id, col_id, row_id): return objtypes.encode_object(eng.get_formula_error(table_id, col_id, row_id)) @@ -110,6 +115,7 @@ def run(sandbox): register_import_parsers(sandbox) + log.info("Ready") # This log message is significant for checkpointing. sandbox.run() def main():