2021-11-29 20:12:45 +00:00
|
|
|
import {ApiError} from 'app/common/ApiError';
|
2021-07-21 08:46:03 +00:00
|
|
|
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
|
2023-07-19 17:37:22 +00:00
|
|
|
import {FilterColValues} from "app/common/ActiveDocAPI";
|
2023-10-16 00:17:43 +00:00
|
|
|
import {DownloadOptions, ExportData, ExportHeader, exportSection, exportTable, Filter} from 'app/server/lib/Export';
|
2022-07-04 14:14:55 +00:00
|
|
|
import log from 'app/server/lib/log';
|
2021-07-21 08:46:03 +00:00
|
|
|
import * as bluebird from 'bluebird';
|
2022-07-04 14:14:55 +00:00
|
|
|
import contentDisposition from 'content-disposition';
|
|
|
|
import csv from 'csv';
|
2021-07-21 08:46:03 +00:00
|
|
|
import * as express from 'express';
|
2021-09-01 21:07:53 +00:00
|
|
|
|
2021-07-21 08:46:03 +00:00
|
|
|
// promisify csv
|
|
|
|
bluebird.promisifyAll(csv);
|
|
|
|
|
|
|
|
/**
|
2021-09-01 21:07:53 +00:00
|
|
|
* Converts `activeDoc` to a CSV and sends the converted data through `res`.
|
|
|
|
*/
|
|
|
|
export async function downloadCSV(activeDoc: ActiveDoc, req: express.Request,
|
2022-09-14 18:55:44 +00:00
|
|
|
res: express.Response, options: DownloadOptions) {
|
2021-09-01 21:07:53 +00:00
|
|
|
log.info('Generating .csv file...');
|
2023-10-16 00:17:43 +00:00
|
|
|
const {filename, tableId, viewSectionId, filters, sortOrder, linkingFilter, header} = options;
|
2021-11-29 20:12:45 +00:00
|
|
|
const data = viewSectionId ?
|
2023-10-16 00:17:43 +00:00
|
|
|
await makeCSVFromViewSection({
|
|
|
|
activeDoc, viewSectionId, sortOrder: sortOrder || null, filters: filters || null,
|
|
|
|
linkingFilter: linkingFilter || null, header, req
|
|
|
|
}) :
|
|
|
|
await makeCSVFromTable({activeDoc, tableId, header, req});
|
2021-11-29 20:12:45 +00:00
|
|
|
res.set('Content-Type', 'text/csv');
|
|
|
|
res.setHeader('Content-Disposition', contentDisposition(filename + '.csv'));
|
|
|
|
res.send(data);
|
2021-09-01 21:07:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a csv stream of a view section that can be transformed or parsed.
|
|
|
|
*
|
|
|
|
* See https://github.com/wdavidw/node-csv for API details.
|
2021-07-21 08:46:03 +00:00
|
|
|
*
|
2023-10-16 00:17:43 +00:00
|
|
|
* @param {Object} options - options for the export.
|
|
|
|
* @param {Object} options.activeDoc - the activeDoc that the table being converted belongs to.
|
|
|
|
* @param {Integer} options.viewSectionId - id of the viewsection to export.
|
|
|
|
* @param {Integer[]} options.activeSortOrder (optional) - overriding sort order.
|
|
|
|
* @param {Filter[]} options.filters (optional) - filters defined from ui.
|
|
|
|
* @param {FilterColValues} options.linkingFilter (optional) - linking filter defined from ui.
|
|
|
|
* @param {string} options.header (optional) - which field of the column to use as header
|
|
|
|
* @param {express.Request} options.req - the request object.
|
|
|
|
*
|
2021-07-21 08:46:03 +00:00
|
|
|
* @return {Promise<string>} Promise for the resulting CSV.
|
|
|
|
*/
|
2023-10-16 00:17:43 +00:00
|
|
|
export async function makeCSVFromViewSection({
|
|
|
|
activeDoc, viewSectionId, sortOrder = null, filters = null, linkingFilter = null, header, req
|
|
|
|
}: {
|
2021-07-21 08:46:03 +00:00
|
|
|
activeDoc: ActiveDoc,
|
|
|
|
viewSectionId: number,
|
(core) For exporting XLSX, do it memory-efficiently in a worker thread.
Summary:
- Excel exports were awfully memory-inefficient, causing occasional docWorker
crashes. The fix is to use the "streaming writer" option of ExcelJS
https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically
on one example, max memory went down from 3G to 100M)
- It's also CPU intensive and synchronous, and can block node for tens of
seconds. The fix is to use a worker-thread. This diff uses "piscina" library
for a pool of threads.
- Additionally, adds ProcessMonitor that logs memory and cpu usage,
particularly when those change significantly.
- Also introduces request cancellation, so that a long download cancelled by
the user will cancel the work being done in the worker thread.
Test Plan:
Updated previous export tests; memory and CPU performance tested
manually by watching output of ProcessMonitor.
Difference visible in these log excerpts:
Before (total time to serve request 22 sec):
```
Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005
Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002
```
After (total time to server request 18 sec):
```
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001
```
Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time.
Reviewers: jarek
Reviewed By: jarek
Differential Revision: https://phab.getgrist.com/D3906
2023-06-01 13:09:50 +00:00
|
|
|
sortOrder: number[] | null,
|
|
|
|
filters: Filter[] | null,
|
2023-07-19 17:37:22 +00:00
|
|
|
linkingFilter: FilterColValues | null,
|
2023-10-16 00:17:43 +00:00
|
|
|
header?: ExportHeader,
|
|
|
|
req: express.Request
|
|
|
|
}) {
|
2021-07-21 08:46:03 +00:00
|
|
|
|
2023-07-19 17:37:22 +00:00
|
|
|
const data = await exportSection(activeDoc, viewSectionId, sortOrder, filters, linkingFilter, req);
|
2023-10-16 00:17:43 +00:00
|
|
|
const file = convertToCsv(data, { header });
|
2021-07-21 08:46:03 +00:00
|
|
|
return file;
|
|
|
|
}
|
|
|
|
|
2021-09-01 21:07:53 +00:00
|
|
|
/**
|
|
|
|
* Returns a csv stream of a table that can be transformed or parsed.
|
|
|
|
*
|
2023-10-16 00:17:43 +00:00
|
|
|
* @param {Object} options - options for the export.
|
|
|
|
* @param {Object} options.activeDoc - the activeDoc that the table being converted belongs to.
|
|
|
|
* @param {Integer} options.tableId - id of the table to export.
|
|
|
|
* @param {string} options.header (optional) - which field of the column to use as header
|
|
|
|
* @param {express.Request} options.req - the request object.
|
|
|
|
*
|
2021-09-01 21:07:53 +00:00
|
|
|
* @return {Promise<string>} Promise for the resulting CSV.
|
|
|
|
*/
|
2023-10-16 00:17:43 +00:00
|
|
|
export async function makeCSVFromTable({ activeDoc, tableId, header, req }: {
|
2021-09-01 21:07:53 +00:00
|
|
|
activeDoc: ActiveDoc,
|
|
|
|
tableId: string,
|
2023-10-16 00:17:43 +00:00
|
|
|
header?: ExportHeader,
|
|
|
|
req: express.Request
|
|
|
|
}) {
|
2021-09-01 21:07:53 +00:00
|
|
|
|
|
|
|
if (!activeDoc.docData) {
|
|
|
|
throw new Error('No docData in active document');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Look up the table to make a CSV from.
|
2021-12-07 11:21:16 +00:00
|
|
|
const tables = activeDoc.docData.getMetaTable('_grist_Tables');
|
2021-09-01 21:07:53 +00:00
|
|
|
const tableRef = tables.findRow('tableId', tableId);
|
|
|
|
|
2021-11-29 20:12:45 +00:00
|
|
|
if (tableRef === 0) {
|
|
|
|
throw new ApiError(`Table ${tableId} not found.`, 404);
|
|
|
|
}
|
|
|
|
|
2021-09-01 21:07:53 +00:00
|
|
|
const data = await exportTable(activeDoc, tableRef, req);
|
2023-10-16 00:17:43 +00:00
|
|
|
const file = convertToCsv(data, { header });
|
2021-09-01 21:07:53 +00:00
|
|
|
return file;
|
|
|
|
}
|
|
|
|
|
2021-07-21 08:46:03 +00:00
|
|
|
function convertToCsv({
|
|
|
|
rowIds,
|
|
|
|
access,
|
2021-08-26 16:35:11 +00:00
|
|
|
columns: viewColumns,
|
2023-10-16 00:17:43 +00:00
|
|
|
}: ExportData, options: { header?: ExportHeader }) {
|
2021-07-21 08:46:03 +00:00
|
|
|
|
|
|
|
// create formatters for columns
|
2023-04-25 21:11:25 +00:00
|
|
|
const formatters = viewColumns.map(col => col.formatter);
|
2021-07-21 08:46:03 +00:00
|
|
|
// Arrange the data into a row-indexed matrix, starting with column headers.
|
2023-10-16 00:17:43 +00:00
|
|
|
const colPropertyAsHeader = options.header ?? 'label';
|
|
|
|
const csvMatrix = [viewColumns.map(col => col[colPropertyAsHeader])];
|
2021-07-21 08:46:03 +00:00
|
|
|
// populate all the rows with values as strings
|
|
|
|
rowIds.forEach(row => {
|
|
|
|
csvMatrix.push(access.map((getter, c) => formatters[c].formatAny(getter(row))));
|
|
|
|
});
|
|
|
|
return csv.stringifyAsync(csvMatrix);
|
|
|
|
}
|