Summary: Adds a Python function `REQUEST` which makes an HTTP GET request. Behind the scenes it: - Raises a special exception to stop trying to evaluate the current cell and just keep the existing value. - Notes the request arguments which will be returned by `apply_user_actions`. - Makes the actual request in NodeJS, which sends back the raw response data in a new action `RespondToRequests` which reevaluates the cell(s) that made the request. - Wraps the response data in a class which mimics the `Response` class of the `requests` library. In certain cases, this asynchronous flow doesn't work and the sandbox will instead synchronously call an exported JS method: - When reevaluating a single cell to get a formula error, the request is made synchronously. - When a formula makes multiple requests, the earlier responses are retrieved synchronously from files which store responses as long as needed to complete evaluating formulas. See https://grist.slack.com/archives/CL1LQ8AT0/p1653399747810139 Test Plan: Added Python and nbrowser tests. Reviewers: georgegevoian Reviewed By: georgegevoian Subscribers: paulfitz, dsagal Differential Revision: https://phab.getgrist.com/D3429pull/214/head
parent
abebe812db
commit
9fffb491f9
@ -0,0 +1,128 @@
|
||||
import {SandboxRequest} from 'app/common/ActionBundle';
|
||||
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
|
||||
import {makeExceptionalDocSession} from 'app/server/lib/DocSession';
|
||||
import {httpEncoding} from 'app/server/lib/httpEncoding';
|
||||
import {HttpsProxyAgent} from 'https-proxy-agent';
|
||||
import {HttpProxyAgent} from 'http-proxy-agent';
|
||||
import fetch from 'node-fetch';
|
||||
import * as path from 'path';
|
||||
import * as tmp from 'tmp';
|
||||
import chunk = require('lodash/chunk');
|
||||
import fromPairs = require('lodash/fromPairs');
|
||||
import zipObject = require('lodash/zipObject');
|
||||
import * as fse from 'fs-extra';
|
||||
import * as log from 'app/server/lib/log';
|
||||
|
||||
export class DocRequests {
|
||||
// Request responses are briefly cached in files only to handle multiple requests in a formula
|
||||
// and only as long as needed to finish calculating all formulas.
|
||||
// When _numPending reaches 0 again, _cacheDir is deleted.
|
||||
private _numPending: number = 0;
|
||||
private _cacheDir: tmp.SynchrounousResult | null = null;
|
||||
|
||||
constructor(private readonly _activeDoc: ActiveDoc) {}
|
||||
|
||||
public async handleRequestsBatchFromUserActions(requests: Record<string, SandboxRequest>) {
|
||||
const numRequests = Object.keys(requests).length;
|
||||
this._numPending += numRequests;
|
||||
try {
|
||||
// Perform batches of requests in parallel for speed, and hope it doesn't cause rate limiting...
|
||||
for (const keys of chunk(Object.keys(requests), 10)) {
|
||||
const responses: Response[] = await Promise.all(keys.map(async key => {
|
||||
const request = requests[key];
|
||||
const response = await this.handleSingleRequestWithCache(key, request);
|
||||
return {
|
||||
...response,
|
||||
// Tells the engine which cell(s) made the request and should be recalculated to use the response
|
||||
deps: request.deps,
|
||||
};
|
||||
}));
|
||||
// Tell the sandbox which previous responses we have cached in files.
|
||||
// This lets it know it can immediately and synchronously get those responses again.
|
||||
const cachedRequestKeys = await fse.readdir(this._cacheDir!.name);
|
||||
// Recalculate formulas using this batch of responses.
|
||||
const action = ["RespondToRequests", zipObject(keys, responses), cachedRequestKeys];
|
||||
await this._activeDoc.applyUserActions(makeExceptionalDocSession("system"), [action]);
|
||||
}
|
||||
} finally {
|
||||
this._numPending -= numRequests;
|
||||
if (this._numPending === 0) {
|
||||
log.debug(`Removing DocRequests._cacheDir: ${this._cacheDir!.name}`);
|
||||
this._cacheDir!.removeCallback();
|
||||
this._cacheDir = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async handleSingleRequestWithCache(key: string, request: SandboxRequest): Promise<Response> {
|
||||
if (!this._cacheDir) {
|
||||
// Use the sync API because otherwise multiple requests being handled at the same time
|
||||
// all reach this point, `await`, and create different dirs.
|
||||
// `unsafeCleanup: true` means the directory can be deleted even if it's not empty, which is what we expect.
|
||||
this._cacheDir = tmp.dirSync({unsafeCleanup: true});
|
||||
log.debug(`Created DocRequests._cacheDir: ${this._cacheDir.name}`);
|
||||
}
|
||||
|
||||
const cachePath = path.resolve(this._cacheDir.name, key);
|
||||
try {
|
||||
const result = await fse.readJSON(cachePath);
|
||||
result.content = Buffer.from(result.content, "base64");
|
||||
return result;
|
||||
} catch {
|
||||
const result = await this._handleSingleRequestRaw(request);
|
||||
const resultForJson = {...result} as any;
|
||||
if ('content' in result) {
|
||||
resultForJson.content = result.content.toString("base64");
|
||||
}
|
||||
fse.writeJSON(cachePath, resultForJson).catch(e => log.warn(`Failed to save response to cache file: ${e}`));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private async _handleSingleRequestRaw(request: SandboxRequest): Promise<Response> {
|
||||
try {
|
||||
if (process.env.GRIST_EXPERIMENTAL_PLUGINS != '1') {
|
||||
throw new Error("REQUEST is not enabled");
|
||||
}
|
||||
const {url, params, headers} = request;
|
||||
const urlObj = new URL(url);
|
||||
log.rawInfo("Handling sandbox request", {host: urlObj.host, docId: this._activeDoc.docName});
|
||||
for (const [param, value] of Object.entries(params || {})) {
|
||||
urlObj.searchParams.append(param, value);
|
||||
}
|
||||
const response = await fetch(urlObj.toString(), {headers: headers || {}, agent: proxyAgent(urlObj)});
|
||||
const content = await response.buffer();
|
||||
const {status, statusText} = response;
|
||||
const encoding = httpEncoding(response.headers.get('content-type'), content);
|
||||
return {
|
||||
content, status, statusText, encoding,
|
||||
headers: fromPairs([...response.headers]),
|
||||
};
|
||||
} catch (e) {
|
||||
return {error: String(e)};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface SuccessfulResponse {
|
||||
content: Buffer;
|
||||
status: number;
|
||||
statusText: string;
|
||||
encoding?: string;
|
||||
headers: Record<string, string>;
|
||||
}
|
||||
|
||||
interface RequestError {
|
||||
error: string;
|
||||
}
|
||||
|
||||
type Response = RequestError | SuccessfulResponse;
|
||||
|
||||
function proxyAgent(requestUrl: URL) {
|
||||
const proxy = process.env.GRIST_HTTPS_PROXY;
|
||||
if (!proxy) {
|
||||
return undefined;
|
||||
}
|
||||
const ProxyAgent = requestUrl.protocol === "https:" ? HttpsProxyAgent : HttpProxyAgent;
|
||||
return new ProxyAgent(proxy);
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
// Based on the source code of the Body.textConverted method in node-fetch
|
||||
export function httpEncoding(header: string | null, content: Buffer): string | undefined {
|
||||
let res: RegExpExecArray | null = null;
|
||||
|
||||
// header
|
||||
if (header) {
|
||||
res = /charset=([^;]*)/i.exec(header);
|
||||
}
|
||||
|
||||
// no charset in content type, peek at response body for at most 1024 bytes
|
||||
const str = content.slice(0, 1024).toString();
|
||||
|
||||
// html5
|
||||
if (!res && str) {
|
||||
res = /<meta.+?charset=(['"])(.+?)\1/i.exec(str);
|
||||
}
|
||||
|
||||
// html4
|
||||
if (!res && str) {
|
||||
res = /<meta\s+?http-equiv=(['"])content-type\1\s+?content=(['"])(.+?)\2/i.exec(str);
|
||||
|
||||
if (res) {
|
||||
res = /charset=(.*)/i.exec(res.pop()!);
|
||||
}
|
||||
}
|
||||
|
||||
// xml
|
||||
if (!res && str) {
|
||||
res = /<\?xml.+?encoding=(['"])(.+?)\1/i.exec(str);
|
||||
}
|
||||
|
||||
// found charset
|
||||
if (res) {
|
||||
let charset = res.pop();
|
||||
|
||||
// prevent decode issues when sites use incorrect encoding
|
||||
// ref: https://hsivonen.fi/encoding-menu/
|
||||
if (charset === 'gb2312' || charset === 'gbk') {
|
||||
charset = 'gb18030';
|
||||
}
|
||||
return charset;
|
||||
}
|
||||
}
|
@ -0,0 +1,136 @@
|
||||
# coding=utf-8
|
||||
import unittest
|
||||
|
||||
import test_engine
|
||||
import testutil
|
||||
from functions import CaseInsensitiveDict, Response, HTTPError
|
||||
|
||||
|
||||
class TestCaseInsensitiveDict(unittest.TestCase):
|
||||
def test_case_insensitive_dict(self):
|
||||
d = CaseInsensitiveDict({"FOO": 1})
|
||||
for key in ["foo", "FOO", "Foo"]:
|
||||
self.assertEqual(d, {"foo": 1})
|
||||
self.assertEqual(list(d), ["foo"])
|
||||
self.assertEqual(d, CaseInsensitiveDict({key: 1}))
|
||||
self.assertIn(key, d)
|
||||
self.assertEqual(d[key], 1)
|
||||
self.assertEqual(d.get(key), 1)
|
||||
self.assertEqual(d.get(key, 2), 1)
|
||||
self.assertEqual(d.get(key + "2", 2), 2)
|
||||
self.assertEqual(d.pop(key), 1)
|
||||
self.assertEqual(d, {})
|
||||
self.assertEqual(d.setdefault(key, 3), 3)
|
||||
self.assertEqual(d, {"foo": 3})
|
||||
self.assertEqual(d.setdefault(key, 4), 3)
|
||||
self.assertEqual(d, {"foo": 3})
|
||||
del d[key]
|
||||
self.assertEqual(d, {})
|
||||
d[key] = 1
|
||||
|
||||
|
||||
class TestResponse(unittest.TestCase):
|
||||
def test_ok_response(self):
|
||||
r = Response(b"foo", 200, "OK", {"X-header": "hi"}, None)
|
||||
self.assertEqual(r.content, b"foo")
|
||||
self.assertEqual(r.text, u"foo")
|
||||
self.assertEqual(r.status_code, 200)
|
||||
self.assertEqual(r.ok, True)
|
||||
self.assertEqual(r.reason, "OK")
|
||||
self.assertEqual(r.headers, {"x-header": "hi"})
|
||||
self.assertEqual(r.encoding, "ascii")
|
||||
self.assertEqual(r.apparent_encoding, "ascii")
|
||||
r.raise_for_status()
|
||||
r.close()
|
||||
|
||||
def test_error_response(self):
|
||||
r = Response(b"foo", 500, "Server error", {}, None)
|
||||
self.assertEqual(r.status_code, 500)
|
||||
self.assertEqual(r.ok, False)
|
||||
self.assertEqual(r.reason, "Server error")
|
||||
with self.assertRaises(HTTPError) as cm:
|
||||
r.raise_for_status()
|
||||
self.assertEqual(str(cm.exception), "Request failed with status 500")
|
||||
|
||||
def test_json(self):
|
||||
r = Response(b'{"foo": "bar"}', 200, "OK", {}, None)
|
||||
self.assertEqual(r.json(), {"foo": "bar"})
|
||||
|
||||
def test_encoding_direct(self):
|
||||
r = Response(b"foo", 200, "OK", {}, "some encoding")
|
||||
self.assertEqual(r.encoding, "some encoding")
|
||||
self.assertEqual(r.apparent_encoding, "ascii")
|
||||
|
||||
def test_apparent_encoding(self):
|
||||
text = u"编程"
|
||||
encoding = "utf-8"
|
||||
content = text.encode(encoding)
|
||||
self.assertEqual(content.decode(encoding), text)
|
||||
r = Response(content, 200, "OK", {}, "")
|
||||
self.assertEqual(r.encoding, encoding)
|
||||
self.assertEqual(r.apparent_encoding, encoding)
|
||||
self.assertEqual(r.content, content)
|
||||
self.assertEqual(r.text, text)
|
||||
|
||||
|
||||
class TestRequestFunction(test_engine.EngineTestCase):
|
||||
sample = testutil.parse_test_sample({
|
||||
"SCHEMA": [
|
||||
[1, "Table1", [
|
||||
[2, "Request", "Any", True, "$id", "", ""],
|
||||
[3, "Other", "Any", True, "", "", ""],
|
||||
]],
|
||||
],
|
||||
"DATA": {
|
||||
"Table1": [
|
||||
["id"],
|
||||
[1],
|
||||
[2],
|
||||
],
|
||||
}
|
||||
})
|
||||
|
||||
def test_request_function(self):
|
||||
self.load_sample(self.sample)
|
||||
|
||||
formula = """
|
||||
r = REQUEST('my_url', headers={'foo': 'bar'}, params={'b': 1, 'a': 2})
|
||||
r.__dict__
|
||||
"""
|
||||
out_actions = self.modify_column("Table1", "Request", formula=formula)
|
||||
key = '9d305be9664924aaaf7ebb0bab2e4155d1fa1b9dcde53e417f1a9f9a2c7e09b9'
|
||||
deps = {'Table1': {'Request': [1, 2]}}
|
||||
args = {
|
||||
'url': 'my_url',
|
||||
'headers': {'foo': 'bar'},
|
||||
'params': {'a': 2, 'b': 1},
|
||||
'deps': deps,
|
||||
}
|
||||
self.assertEqual(out_actions.requests, {key: args})
|
||||
self.assertTableData("Table1", cols="subset", data=[
|
||||
["id", "Request"],
|
||||
[1, 1],
|
||||
[2, 2],
|
||||
])
|
||||
|
||||
response = {
|
||||
'status': 200,
|
||||
'statusText': 'OK',
|
||||
'content': b'body',
|
||||
'headers': {'h1': 'h2'},
|
||||
'encoding': 'utf16',
|
||||
'deps': deps,
|
||||
}
|
||||
self.apply_user_action(["RespondToRequests", {key: response.copy()}, [key]])
|
||||
|
||||
# Translate names from JS `fetch` API to Python `requests`-style API
|
||||
response["status_code"] = response.pop("status")
|
||||
response["reason"] = response.pop("statusText")
|
||||
# This is sent in the user action but not kept for the response object
|
||||
del response["deps"]
|
||||
|
||||
self.assertTableData("Table1", cols="subset", data=[
|
||||
["id", "Request"],
|
||||
[1, response],
|
||||
[2, response],
|
||||
])
|
Loading…
Reference in new issue