(core) External requests

Summary:
Adds a Python function `REQUEST` which makes an HTTP GET request. Behind the scenes it:

- Raises a special exception to stop trying to evaluate the current cell and just keep the existing value.
- Notes the request arguments which will be returned by `apply_user_actions`.
- Makes the actual request in NodeJS, which sends back the raw response data in a new action `RespondToRequests` which reevaluates the cell(s) that made the request.
- Wraps the response data in a class which mimics the `Response` class of the `requests` library.

In certain cases, this asynchronous flow doesn't work and the sandbox will instead synchronously call an exported JS method:

- When reevaluating a single cell to get a formula error, the request is made synchronously.
- When a formula makes multiple requests, the earlier responses are retrieved synchronously from files which store responses as long as needed to complete evaluating formulas. See https://grist.slack.com/archives/CL1LQ8AT0/p1653399747810139

Test Plan: Added Python and nbrowser tests.

Reviewers: georgegevoian

Reviewed By: georgegevoian

Subscribers: paulfitz, dsagal

Differential Revision: https://phab.getgrist.com/D3429
This commit is contained in:
Alex Hall
2022-06-17 20:49:18 +02:00
parent abebe812db
commit 9fffb491f9
17 changed files with 582 additions and 24 deletions

View File

@@ -62,6 +62,16 @@ export interface SandboxActionBundle {
undo: Array<EnvContent<DocAction>>; // Inverse actions for all 'stored' actions.
retValues: any[]; // Contains retValue for each of userActions.
rowCount: number;
// Mapping of keys (hashes of request args) to all unique requests made in a round of calculation
requests?: Record<string, SandboxRequest>;
}
// Represents a unique call to the Python REQUEST function
export interface SandboxRequest {
url: string;
params: Record<string, string> | null;
headers: Record<string, string> | null;
deps: unknown; // pass back to the sandbox unchanged in the response
}
// Local action that's been applied. It now has an actionNum, and includes doc actions packaged

View File

@@ -144,6 +144,9 @@ export interface TableRecordValue {
export type UserAction = Array<string|number|object|boolean|null|undefined>;
// Actions that trigger formula calculations in the data engine
export const CALCULATING_USER_ACTIONS = new Set(['Calculate', 'UpdateCurrentTime', 'RespondToRequests']);
/**
* Gives a description for an action which involves setting values to a selection.
* @param {Array} action - The (Bulk)AddRecord/(Bulk)UpdateRecord action to describe.

View File

@@ -4,7 +4,13 @@
* change events.
*/
import {getEnvContent, LocalActionBundle, SandboxActionBundle, UserActionBundle} from 'app/common/ActionBundle';
import {
getEnvContent,
LocalActionBundle,
SandboxActionBundle,
SandboxRequest,
UserActionBundle
} from 'app/common/ActionBundle';
import {ActionGroup, MinimalActionGroup} from 'app/common/ActionGroup';
import {ActionSummary} from "app/common/ActionSummary";
import {
@@ -33,12 +39,7 @@ import {
UserAction
} from 'app/common/DocActions';
import {DocData} from 'app/common/DocData';
import {
getDataLimitRatio,
getDataLimitStatus,
getSeverity,
LimitExceededError,
} from 'app/common/DocLimits';
import {getDataLimitRatio, getDataLimitStatus, getSeverity, LimitExceededError} from 'app/common/DocLimits';
import {DocSnapshots} from 'app/common/DocSnapshot';
import {DocumentSettings} from 'app/common/DocumentSettings';
import {
@@ -75,6 +76,7 @@ import {GRIST_DOC_SQL, GRIST_DOC_WITH_TABLE1_SQL} from 'app/server/lib/initialDo
import {ISandbox} from 'app/server/lib/ISandbox';
import * as log from 'app/server/lib/log';
import {LogMethods} from "app/server/lib/LogMethods";
import {DocRequests} from 'app/server/lib/Requests';
import {shortDesc} from 'app/server/lib/shortDesc';
import {TableMetadataLoader} from 'app/server/lib/TableMetadataLoader';
import {DocTriggers} from "app/server/lib/Triggers";
@@ -182,6 +184,7 @@ export class ActiveDoc extends EventEmitter {
private _log = new LogMethods('ActiveDoc ', (s: OptDocSession) => this.getLogMeta(s));
private _triggers: DocTriggers;
private _requests: DocRequests;
private _dataEngine: Promise<ISandbox>|undefined;
private _activeDocImport: ActiveDocImport;
private _onDemandActions: OnDemandActions;
@@ -270,6 +273,7 @@ export class ActiveDoc extends EventEmitter {
this.docStorage = new DocStorage(docManager.storageManager, docName);
this.docClients = new DocClients(this);
this._triggers = new DocTriggers(this);
this._requests = new DocRequests(this);
this._actionHistory = new ActionHistoryImpl(this.docStorage);
this.docPluginManager = new DocPluginManager(docManager.pluginManager.getPlugins(),
docManager.pluginManager.appRoot!, this, this._docManager.gristServer);
@@ -1095,7 +1099,7 @@ export class ActiveDoc extends EventEmitter {
this.dataLimitStatus === "deleteOnly" &&
!actions.every(action => [
'RemoveTable', 'RemoveColumn', 'RemoveRecord', 'BulkRemoveRecord',
'RemoveViewSection', 'RemoveView', 'ApplyUndoActions',
'RemoveViewSection', 'RemoveView', 'ApplyUndoActions', 'RespondToRequests',
].includes(action[0] as string))
) {
throw new Error("Document is in delete-only mode");
@@ -1420,6 +1424,10 @@ export class ActiveDoc extends EventEmitter {
}
const user = docSession ? await this._granularAccess.getCachedUser(docSession) : undefined;
sandboxActionBundle = await this._rawPyCall('apply_user_actions', normalActions, user?.toJSON());
const {requests} = sandboxActionBundle;
if (requests) {
this._requests.handleRequestsBatchFromUserActions(requests).catch(e => console.error(e));
}
await this._reportDataEngineMemory();
} else {
// Create default SandboxActionBundle to use if the data engine is not called.
@@ -2087,6 +2095,7 @@ export class ActiveDoc extends EventEmitter {
preferredPythonVersion,
sandboxOptions: {
exports: {
request: (key: string, args: SandboxRequest) => this._requests.handleSingleRequestWithCache(key, args),
guessColInfo: (values: Array<string | null>) =>
guessColInfoWithDocData(values, this.docData!),
convertFromColumn: (...args: Parameters<ReturnType<typeof convertFromColumn>>) =>

View File

@@ -4,7 +4,13 @@ import { ActionGroup } from 'app/common/ActionGroup';
import { createEmptyActionSummary } from 'app/common/ActionSummary';
import { ServerQuery } from 'app/common/ActiveDocAPI';
import { ApiError } from 'app/common/ApiError';
import { AddRecord, BulkAddRecord, BulkColValues, BulkRemoveRecord, BulkUpdateRecord } from 'app/common/DocActions';
import {
AddRecord,
BulkAddRecord,
BulkColValues,
BulkRemoveRecord,
BulkUpdateRecord,
} from 'app/common/DocActions';
import { RemoveRecord, ReplaceTableData, UpdateRecord } from 'app/common/DocActions';
import { CellValue, ColValues, DocAction, getTableId, isSchemaAction } from 'app/common/DocActions';
import { TableDataAction, UserAction } from 'app/common/DocActions';

128
app/server/lib/Requests.ts Normal file
View File

@@ -0,0 +1,128 @@
import {SandboxRequest} from 'app/common/ActionBundle';
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
import {makeExceptionalDocSession} from 'app/server/lib/DocSession';
import {httpEncoding} from 'app/server/lib/httpEncoding';
import {HttpsProxyAgent} from 'https-proxy-agent';
import {HttpProxyAgent} from 'http-proxy-agent';
import fetch from 'node-fetch';
import * as path from 'path';
import * as tmp from 'tmp';
import chunk = require('lodash/chunk');
import fromPairs = require('lodash/fromPairs');
import zipObject = require('lodash/zipObject');
import * as fse from 'fs-extra';
import * as log from 'app/server/lib/log';
export class DocRequests {
// Request responses are briefly cached in files only to handle multiple requests in a formula
// and only as long as needed to finish calculating all formulas.
// When _numPending reaches 0 again, _cacheDir is deleted.
private _numPending: number = 0;
private _cacheDir: tmp.SynchrounousResult | null = null;
constructor(private readonly _activeDoc: ActiveDoc) {}
public async handleRequestsBatchFromUserActions(requests: Record<string, SandboxRequest>) {
const numRequests = Object.keys(requests).length;
this._numPending += numRequests;
try {
// Perform batches of requests in parallel for speed, and hope it doesn't cause rate limiting...
for (const keys of chunk(Object.keys(requests), 10)) {
const responses: Response[] = await Promise.all(keys.map(async key => {
const request = requests[key];
const response = await this.handleSingleRequestWithCache(key, request);
return {
...response,
// Tells the engine which cell(s) made the request and should be recalculated to use the response
deps: request.deps,
};
}));
// Tell the sandbox which previous responses we have cached in files.
// This lets it know it can immediately and synchronously get those responses again.
const cachedRequestKeys = await fse.readdir(this._cacheDir!.name);
// Recalculate formulas using this batch of responses.
const action = ["RespondToRequests", zipObject(keys, responses), cachedRequestKeys];
await this._activeDoc.applyUserActions(makeExceptionalDocSession("system"), [action]);
}
} finally {
this._numPending -= numRequests;
if (this._numPending === 0) {
log.debug(`Removing DocRequests._cacheDir: ${this._cacheDir!.name}`);
this._cacheDir!.removeCallback();
this._cacheDir = null;
}
}
}
public async handleSingleRequestWithCache(key: string, request: SandboxRequest): Promise<Response> {
if (!this._cacheDir) {
// Use the sync API because otherwise multiple requests being handled at the same time
// all reach this point, `await`, and create different dirs.
// `unsafeCleanup: true` means the directory can be deleted even if it's not empty, which is what we expect.
this._cacheDir = tmp.dirSync({unsafeCleanup: true});
log.debug(`Created DocRequests._cacheDir: ${this._cacheDir.name}`);
}
const cachePath = path.resolve(this._cacheDir.name, key);
try {
const result = await fse.readJSON(cachePath);
result.content = Buffer.from(result.content, "base64");
return result;
} catch {
const result = await this._handleSingleRequestRaw(request);
const resultForJson = {...result} as any;
if ('content' in result) {
resultForJson.content = result.content.toString("base64");
}
fse.writeJSON(cachePath, resultForJson).catch(e => log.warn(`Failed to save response to cache file: ${e}`));
return result;
}
}
private async _handleSingleRequestRaw(request: SandboxRequest): Promise<Response> {
try {
if (process.env.GRIST_EXPERIMENTAL_PLUGINS != '1') {
throw new Error("REQUEST is not enabled");
}
const {url, params, headers} = request;
const urlObj = new URL(url);
log.rawInfo("Handling sandbox request", {host: urlObj.host, docId: this._activeDoc.docName});
for (const [param, value] of Object.entries(params || {})) {
urlObj.searchParams.append(param, value);
}
const response = await fetch(urlObj.toString(), {headers: headers || {}, agent: proxyAgent(urlObj)});
const content = await response.buffer();
const {status, statusText} = response;
const encoding = httpEncoding(response.headers.get('content-type'), content);
return {
content, status, statusText, encoding,
headers: fromPairs([...response.headers]),
};
} catch (e) {
return {error: String(e)};
}
}
}
interface SuccessfulResponse {
content: Buffer;
status: number;
statusText: string;
encoding?: string;
headers: Record<string, string>;
}
interface RequestError {
error: string;
}
type Response = RequestError | SuccessfulResponse;
function proxyAgent(requestUrl: URL) {
const proxy = process.env.GRIST_HTTPS_PROXY;
if (!proxy) {
return undefined;
}
const ProxyAgent = requestUrl.protocol === "https:" ? HttpsProxyAgent : HttpProxyAgent;
return new ProxyAgent(proxy);
}

View File

@@ -6,7 +6,7 @@ import {
LocalActionBundle,
UserActionBundle
} from 'app/common/ActionBundle';
import {DocAction, getNumRows, UserAction} from 'app/common/DocActions';
import {CALCULATING_USER_ACTIONS, DocAction, getNumRows, UserAction} from 'app/common/DocActions';
import {allToken} from 'app/common/sharing';
import * as log from 'app/server/lib/log';
import {LogMethods} from "app/server/lib/LogMethods";
@@ -215,8 +215,7 @@ export class Sharing {
try {
const isCalculate = (userActions.length === 1 &&
(userActions[0][0] === 'Calculate' || userActions[0][0] === 'UpdateCurrentTime'));
const isCalculate = (userActions.length === 1 && CALCULATING_USER_ACTIONS.has(userActions[0][0] as string));
// `internal` is true if users shouldn't be able to undo the actions. Applies to:
// - Calculate/UpdateCurrentTime because it's not considered as performed by a particular client.
// - Adding attachment metadata when uploading attachments,

View File

@@ -0,0 +1,43 @@
// Based on the source code of the Body.textConverted method in node-fetch
export function httpEncoding(header: string | null, content: Buffer): string | undefined {
let res: RegExpExecArray | null = null;
// header
if (header) {
res = /charset=([^;]*)/i.exec(header);
}
// no charset in content type, peek at response body for at most 1024 bytes
const str = content.slice(0, 1024).toString();
// html5
if (!res && str) {
res = /<meta.+?charset=(['"])(.+?)\1/i.exec(str);
}
// html4
if (!res && str) {
res = /<meta\s+?http-equiv=(['"])content-type\1\s+?content=(['"])(.+?)\2/i.exec(str);
if (res) {
res = /charset=(.*)/i.exec(res.pop()!);
}
}
// xml
if (!res && str) {
res = /<\?xml.+?encoding=(['"])(.+?)\1/i.exec(str);
}
// found charset
if (res) {
let charset = res.pop();
// prevent decode issues when sites use incorrect encoding
// ref: https://hsivonen.fi/encoding-menu/
if (charset === 'gb2312' || charset === 'gbk') {
charset = 'gb18030';
}
return charset;
}
}