2021-07-21 08:46:03 +00:00
|
|
|
import {drive} from '@googleapis/drive';
|
|
|
|
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
|
|
|
|
import {RequestWithLogin} from 'app/server/lib/Authorizer';
|
(core) For exporting XLSX, do it memory-efficiently in a worker thread.
Summary:
- Excel exports were awfully memory-inefficient, causing occasional docWorker
crashes. The fix is to use the "streaming writer" option of ExcelJS
https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically
on one example, max memory went down from 3G to 100M)
- It's also CPU intensive and synchronous, and can block node for tens of
seconds. The fix is to use a worker-thread. This diff uses "piscina" library
for a pool of threads.
- Additionally, adds ProcessMonitor that logs memory and cpu usage,
particularly when those change significantly.
- Also introduces request cancellation, so that a long download cancelled by
the user will cancel the work being done in the worker thread.
Test Plan:
Updated previous export tests; memory and CPU performance tested
manually by watching output of ProcessMonitor.
Difference visible in these log excerpts:
Before (total time to serve request 22 sec):
```
Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005
Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002
```
After (total time to server request 18 sec):
```
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001
```
Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time.
Reviewers: jarek
Reviewed By: jarek
Differential Revision: https://phab.getgrist.com/D3906
2023-06-01 13:09:50 +00:00
|
|
|
import {streamXLSX} from 'app/server/lib/ExportXLSX';
|
2022-07-04 14:14:55 +00:00
|
|
|
import log from 'app/server/lib/log';
|
2024-09-30 17:11:01 +00:00
|
|
|
import {getDocId, optStringParam} from 'app/server/lib/requestUtils';
|
2021-07-21 08:46:03 +00:00
|
|
|
import {Request, Response} from 'express';
|
(core) For exporting XLSX, do it memory-efficiently in a worker thread.
Summary:
- Excel exports were awfully memory-inefficient, causing occasional docWorker
crashes. The fix is to use the "streaming writer" option of ExcelJS
https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically
on one example, max memory went down from 3G to 100M)
- It's also CPU intensive and synchronous, and can block node for tens of
seconds. The fix is to use a worker-thread. This diff uses "piscina" library
for a pool of threads.
- Additionally, adds ProcessMonitor that logs memory and cpu usage,
particularly when those change significantly.
- Also introduces request cancellation, so that a long download cancelled by
the user will cancel the work being done in the worker thread.
Test Plan:
Updated previous export tests; memory and CPU performance tested
manually by watching output of ProcessMonitor.
Difference visible in these log excerpts:
Before (total time to serve request 22 sec):
```
Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005
Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002
```
After (total time to server request 18 sec):
```
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001
```
Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time.
Reviewers: jarek
Reviewed By: jarek
Differential Revision: https://phab.getgrist.com/D3906
2023-06-01 13:09:50 +00:00
|
|
|
import {PassThrough, Stream} from 'stream';
|
2021-07-21 08:46:03 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Endpoint logic for sending grist document to Google Drive. Grist document is first exported as an
|
|
|
|
* excel file and then pushed to Google Drive as a Google Spreadsheet.
|
|
|
|
*/
|
|
|
|
export async function exportToDrive(
|
|
|
|
activeDoc: ActiveDoc,
|
|
|
|
req: Request,
|
|
|
|
res: Response
|
|
|
|
) {
|
|
|
|
// Token should come from auth middleware
|
2023-09-05 18:27:35 +00:00
|
|
|
const access_token = optStringParam(req.query.access_token, 'access_token');
|
2021-07-21 08:46:03 +00:00
|
|
|
if (!access_token) {
|
|
|
|
throw new Error("No access token - Can't send file to Google Drive");
|
|
|
|
}
|
|
|
|
|
2024-09-30 17:11:01 +00:00
|
|
|
const docId = getDocId(req);
|
2022-04-08 18:00:43 +00:00
|
|
|
const mreq = req as RequestWithLogin;
|
2021-07-21 08:46:03 +00:00
|
|
|
const meta = {
|
2022-04-08 18:00:43 +00:00
|
|
|
docId: activeDoc.docName,
|
|
|
|
userId: mreq.userId,
|
|
|
|
altSessionId: mreq.altSessionId,
|
2021-07-21 08:46:03 +00:00
|
|
|
};
|
|
|
|
// Prepare file for exporting.
|
|
|
|
log.debug(`Export to drive - Preparing file for export`, meta);
|
2023-09-05 18:27:35 +00:00
|
|
|
const name = (optStringParam(req.query.title, 'title') || activeDoc.docName);
|
(core) For exporting XLSX, do it memory-efficiently in a worker thread.
Summary:
- Excel exports were awfully memory-inefficient, causing occasional docWorker
crashes. The fix is to use the "streaming writer" option of ExcelJS
https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically
on one example, max memory went down from 3G to 100M)
- It's also CPU intensive and synchronous, and can block node for tens of
seconds. The fix is to use a worker-thread. This diff uses "piscina" library
for a pool of threads.
- Additionally, adds ProcessMonitor that logs memory and cpu usage,
particularly when those change significantly.
- Also introduces request cancellation, so that a long download cancelled by
the user will cancel the work being done in the worker thread.
Test Plan:
Updated previous export tests; memory and CPU performance tested
manually by watching output of ProcessMonitor.
Difference visible in these log excerpts:
Before (total time to serve request 22 sec):
```
Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005
Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002
```
After (total time to server request 18 sec):
```
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001
```
Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time.
Reviewers: jarek
Reviewed By: jarek
Differential Revision: https://phab.getgrist.com/D3906
2023-06-01 13:09:50 +00:00
|
|
|
const stream = new PassThrough();
|
|
|
|
|
2021-07-21 08:46:03 +00:00
|
|
|
try {
|
|
|
|
// Send file to GDrive and get the url for a preview.
|
(core) For exporting XLSX, do it memory-efficiently in a worker thread.
Summary:
- Excel exports were awfully memory-inefficient, causing occasional docWorker
crashes. The fix is to use the "streaming writer" option of ExcelJS
https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically
on one example, max memory went down from 3G to 100M)
- It's also CPU intensive and synchronous, and can block node for tens of
seconds. The fix is to use a worker-thread. This diff uses "piscina" library
for a pool of threads.
- Additionally, adds ProcessMonitor that logs memory and cpu usage,
particularly when those change significantly.
- Also introduces request cancellation, so that a long download cancelled by
the user will cancel the work being done in the worker thread.
Test Plan:
Updated previous export tests; memory and CPU performance tested
manually by watching output of ProcessMonitor.
Difference visible in these log excerpts:
Before (total time to serve request 22 sec):
```
Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005
Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002
```
After (total time to server request 18 sec):
```
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001
```
Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time.
Reviewers: jarek
Reviewed By: jarek
Differential Revision: https://phab.getgrist.com/D3906
2023-06-01 13:09:50 +00:00
|
|
|
const [, url] = await Promise.all([
|
|
|
|
streamXLSX(activeDoc, req, stream, {tableId: ''}),
|
|
|
|
sendFileToDrive(name, stream, access_token),
|
|
|
|
]);
|
2024-09-30 17:11:01 +00:00
|
|
|
activeDoc.logAuditEvent(mreq, {
|
|
|
|
event: {
|
|
|
|
name: 'sendToGoogleDrive',
|
|
|
|
details: {id: docId},
|
|
|
|
context: {documentId: docId},
|
|
|
|
},
|
|
|
|
});
|
2021-07-21 08:46:03 +00:00
|
|
|
log.debug(`Export to drive - File exported, redirecting to Google Spreadsheet ${url}`, meta);
|
|
|
|
res.json({ url });
|
|
|
|
} catch (err) {
|
|
|
|
log.error("Export to drive - Error while sending file to GDrive", meta, err);
|
|
|
|
// Test if google returned a valid error message.
|
|
|
|
if (err.errors && err.errors.length) {
|
|
|
|
throw new Error(err.errors[0].message);
|
|
|
|
} else {
|
|
|
|
throw err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates spreadsheet file in a Google drive, by sending an excel and requesting for conversion.
|
(core) For exporting XLSX, do it memory-efficiently in a worker thread.
Summary:
- Excel exports were awfully memory-inefficient, causing occasional docWorker
crashes. The fix is to use the "streaming writer" option of ExcelJS
https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically
on one example, max memory went down from 3G to 100M)
- It's also CPU intensive and synchronous, and can block node for tens of
seconds. The fix is to use a worker-thread. This diff uses "piscina" library
for a pool of threads.
- Additionally, adds ProcessMonitor that logs memory and cpu usage,
particularly when those change significantly.
- Also introduces request cancellation, so that a long download cancelled by
the user will cancel the work being done in the worker thread.
Test Plan:
Updated previous export tests; memory and CPU performance tested
manually by watching output of ProcessMonitor.
Difference visible in these log excerpts:
Before (total time to serve request 22 sec):
```
Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005
Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002
```
After (total time to server request 18 sec):
```
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001
```
Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time.
Reviewers: jarek
Reviewed By: jarek
Differential Revision: https://phab.getgrist.com/D3906
2023-06-01 13:09:50 +00:00
|
|
|
async function sendFileToDrive(fileNameNoExt: string, stream: Stream, oauth_token: string): Promise<string> {
|
2021-07-21 08:46:03 +00:00
|
|
|
// Here we are asking google drive to convert excel file to a google spreadsheet
|
|
|
|
const requestBody = {
|
|
|
|
// name of the spreadsheet to create
|
|
|
|
name: fileNameNoExt,
|
|
|
|
// mime type of the google spreadsheet
|
|
|
|
mimeType: 'application/vnd.google-apps.spreadsheet'
|
|
|
|
};
|
|
|
|
// Define what gets send - excel file
|
|
|
|
const media = {
|
|
|
|
mimeType: 'application/vnd.ms-excel',
|
|
|
|
body: stream
|
|
|
|
};
|
|
|
|
const googleDrive = drive("v3");
|
|
|
|
const fileRes = await googleDrive.files.create({
|
|
|
|
requestBody, // what to do with file - convert to spreadsheet
|
|
|
|
oauth_token, // access token
|
|
|
|
media, // file
|
|
|
|
fields: "webViewLink" // return webViewLink after creating file
|
|
|
|
});
|
|
|
|
const url = fileRes.data.webViewLink;
|
|
|
|
if (!url) {
|
|
|
|
throw new Error("Google Api has not returned valid response");
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|