gristlabs_grist-core/app/gen-server/lib/DocApiForwarder.ts
Dmitry S d191859be7 (core) For exporting XLSX, do it memory-efficiently in a worker thread.
Summary:
- Excel exports were awfully memory-inefficient, causing occasional docWorker
  crashes. The fix is to use the "streaming writer" option of ExcelJS
  https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically
  on one example, max memory went down from 3G to 100M)
- It's also CPU intensive and synchronous, and can block node for tens of
  seconds. The fix is to use a worker-thread. This diff uses "piscina" library
  for a pool of threads.
- Additionally, adds ProcessMonitor that logs memory and cpu usage,
  particularly when those change significantly.
- Also introduces request cancellation, so that a long download cancelled by
  the user will cancel the work being done in the worker thread.

Test Plan:
Updated previous export tests; memory and CPU performance tested
manually by watching output of ProcessMonitor.

Difference visible in these log excerpts:

Before (total time to serve request 22 sec):
```
Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005
Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002
```
After (total time to server request 18 sec):
```
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001
```
Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time.

Reviewers: jarek

Reviewed By: jarek

Differential Revision: https://phab.getgrist.com/D3906
2023-06-01 12:06:48 -04:00

130 lines
5.6 KiB
TypeScript

import * as express from "express";
import fetch, { RequestInit } from 'node-fetch';
import {AbortController} from 'node-abort-controller';
import { ApiError } from 'app/common/ApiError';
import { removeTrailingSlash } from 'app/common/gutil';
import { HomeDBManager } from "app/gen-server/lib/HomeDBManager";
import { assertAccess, getOrSetDocAuth, getTransitiveHeaders, RequestWithLogin } from 'app/server/lib/Authorizer';
import { IDocWorkerMap } from "app/server/lib/DocWorkerMap";
import { expressWrap } from "app/server/lib/expressWrap";
import { GristServer } from "app/server/lib/GristServer";
import { getAssignmentId } from "app/server/lib/idUtils";
/**
* Forwards all /api/docs/:docId/tables requests to the doc worker handling the :docId document. Makes
* sure the user has at least view access to the document otherwise rejects the request. For
* performance reason we stream the body directly from the request, which requires that no-one reads
* the req before, in particular you should register DocApiForwarder before bodyParser.
*
* Use:
* const home = new ApiServer(false);
* const docApiForwarder = new DocApiForwarder(getDocWorkerMap(), home);
* app.use(docApiForwarder.getMiddleware());
*
* Note that it expects userId, and jsonErrorHandler middleware to be set up outside
* to apply to these routes.
*/
export class DocApiForwarder {
constructor(private _docWorkerMap: IDocWorkerMap, private _dbManager: HomeDBManager,
private _gristServer: GristServer) {
}
public addEndpoints(app: express.Application) {
// Middleware to forward a request about an existing document that user has access to.
// We do not check whether the document has been soft-deleted; that will be checked by
// the worker if needed.
const withDoc = expressWrap(this._forwardToDocWorker.bind(this, true, 'viewers'));
// Middleware to forward a request without a pre-existing document (for imports/uploads).
const withoutDoc = expressWrap(this._forwardToDocWorker.bind(this, false, null));
const withDocWithoutAuth = expressWrap(this._forwardToDocWorker.bind(this, true, null));
app.use('/api/docs/:docId/tables', withDoc);
app.use('/api/docs/:docId/force-reload', withDoc);
app.use('/api/docs/:docId/recover', withDoc);
app.use('/api/docs/:docId/remove', withDoc);
app.delete('/api/docs/:docId', withDoc);
app.use('/api/docs/:docId/download', withDoc);
app.use('/api/docs/:docId/send-to-drive', withDoc);
app.use('/api/docs/:docId/fork', withDoc);
app.use('/api/docs/:docId/create-fork', withDoc);
app.use('/api/docs/:docId/apply', withDoc);
app.use('/api/docs/:docId/attachments', withDoc);
app.use('/api/docs/:docId/snapshots', withDoc);
app.use('/api/docs/:docId/usersForViewAs', withDoc);
app.use('/api/docs/:docId/replace', withDoc);
app.use('/api/docs/:docId/flush', withDoc);
app.use('/api/docs/:docId/states', withDoc);
app.use('/api/docs/:docId/compare', withDoc);
app.use('/api/docs/:docId/assign', withDocWithoutAuth);
app.use('/api/docs/:docId/webhooks/queue', withDoc);
app.use('/api/docs/:docId/webhooks', withDoc);
app.use('^/api/docs$', withoutDoc);
}
private async _forwardToDocWorker(
withDocId: boolean, role: 'viewers'|null, req: express.Request, res: express.Response,
): Promise<void> {
let docId: string|null = null;
if (withDocId) {
const docAuth = await getOrSetDocAuth(req as RequestWithLogin, this._dbManager,
this._gristServer, req.params.docId);
if (role) {
assertAccess(role, docAuth, {allowRemoved: true});
}
docId = docAuth.docId;
}
// Use the docId for worker assignment, rather than req.params.docId, which could be a urlId.
const assignmentId = getAssignmentId(this._docWorkerMap, docId === null ? 'import' : docId);
if (!this._docWorkerMap) {
throw new ApiError('no worker map', 404);
}
const docStatus = await this._docWorkerMap.assignDocWorker(assignmentId);
// Construct new url by keeping only origin and path prefixes of `docWorker.internalUrl`,
// and otherwise reflecting fully the original url (remaining path, and query params).
const docWorkerUrl = new URL(docStatus.docWorker.internalUrl);
const url = new URL(req.originalUrl, docWorkerUrl.origin);
url.pathname = removeTrailingSlash(docWorkerUrl.pathname) + url.pathname;
const headers: {[key: string]: string} = {
...getTransitiveHeaders(req),
'Content-Type': req.get('Content-Type') || 'application/json',
};
for (const key of ['X-Sort', 'X-Limit']) {
const hdr = req.get(key);
if (hdr) { headers[key] = hdr; }
}
const controller = new AbortController();
// If the original request is aborted, abort the forwarded request too. (Currently this only
// affects some export/download requests which can abort long-running work.)
req.on('close', () => controller.abort());
const options: RequestInit = {
method: req.method,
headers,
signal: controller.signal,
};
if (['POST', 'PATCH', 'PUT'].includes(req.method)) {
// uses `req` as a stream
options.body = req;
}
const docWorkerRes = await fetch(url.href, options);
res.status(docWorkerRes.status);
for (const key of ['content-type', 'content-disposition', 'cache-control']) {
const value = docWorkerRes.headers.get(key);
if (value) { res.set(key, value); }
}
return new Promise<void>((resolve, reject) => {
docWorkerRes.body.on('error', reject);
res.on('error', reject);
res.on('finish', resolve);
docWorkerRes.body.pipe(res);
});
}
}