(core) For exporting XLSX, do it memory-efficiently in a worker thread.

Summary:
- Excel exports were awfully memory-inefficient, causing occasional docWorker
  crashes. The fix is to use the "streaming writer" option of ExcelJS
  https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically
  on one example, max memory went down from 3G to 100M)
- It's also CPU intensive and synchronous, and can block node for tens of
  seconds. The fix is to use a worker-thread. This diff uses "piscina" library
  for a pool of threads.
- Additionally, adds ProcessMonitor that logs memory and cpu usage,
  particularly when those change significantly.
- Also introduces request cancellation, so that a long download cancelled by
  the user will cancel the work being done in the worker thread.

Test Plan:
Updated previous export tests; memory and CPU performance tested
manually by watching output of ProcessMonitor.

Difference visible in these log excerpts:

Before (total time to serve request 22 sec):
```
Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005
Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005
Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002
```
After (total time to server request 18 sec):
```
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001
Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000
Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001
```
Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time.

Reviewers: jarek

Reviewed By: jarek

Differential Revision: https://phab.getgrist.com/D3906
This commit is contained in:
Dmitry S 2023-06-01 09:09:50 -04:00
parent 6b338256e0
commit d191859be7
19 changed files with 464 additions and 116 deletions

View File

@ -104,6 +104,10 @@ export function getTableId(action: DocAction): string {
return action[1]; // It happens to always be in the same position in the action tuple.
}
export interface TableDataActionSet {
[tableId: string]: TableDataAction;
}
// Helper types used in the definitions above.
export interface ColValues { [colId: string]: CellValue; }

View File

@ -9,6 +9,7 @@ export const TelemetryEventNames = [
'documentForked',
'documentOpened',
'documentUsage',
'processMonitor',
'sendingWebhooks',
'signupVerified',
'siteMembership',

View File

@ -1,5 +1,6 @@
import * as express from "express";
import fetch, { RequestInit } from 'node-fetch';
import {AbortController} from 'node-abort-controller';
import { ApiError } from 'app/common/ApiError';
import { removeTrailingSlash } from 'app/common/gutil';
@ -95,14 +96,23 @@ export class DocApiForwarder {
const hdr = req.get(key);
if (hdr) { headers[key] = hdr; }
}
const controller = new AbortController();
// If the original request is aborted, abort the forwarded request too. (Currently this only
// affects some export/download requests which can abort long-running work.)
req.on('close', () => controller.abort());
const options: RequestInit = {
method: req.method,
headers,
signal: controller.signal,
};
if (['POST', 'PATCH', 'PUT'].includes(req.method)) {
// uses `req` as a stream
options.body = req;
}
const docWorkerRes = await fetch(url.href, options);
res.status(docWorkerRes.status);
for (const key of ['content-type', 'content-disposition', 'cache-control']) {

View File

@ -1,6 +1,6 @@
import {ApiError} from 'app/common/ApiError';
import {buildColFilter} from 'app/common/ColumnFilterFunc';
import {TableDataAction} from 'app/common/DocActions';
import {TableDataAction, TableDataActionSet} from 'app/common/DocActions';
import {DocData} from 'app/common/DocData';
import {DocumentSettings} from 'app/common/DocumentSettings';
import * as gristTypes from 'app/common/gristTypes';
@ -24,6 +24,30 @@ import * as _ from 'underscore';
// Helper type for Cell Accessor
type Access = (row: number) => any;
// Interface to document data used from an exporter worker thread (workerExporter.ts). Note that
// parameters and returned values are plain data that can be passed over a MessagePort.
export interface ActiveDocSource {
getDocName(): Promise<string>;
fetchMetaTables(): Promise<TableDataActionSet>;
fetchTable(tableId: string): Promise<TableDataAction>;
}
// Implementation of ActiveDocSource using an ActiveDoc directly.
export class ActiveDocSourceDirect implements ActiveDocSource {
private _req: RequestWithLogin;
constructor(private _activeDoc: ActiveDoc, req: express.Request) {
this._req = req as RequestWithLogin;
}
public async getDocName() { return this._activeDoc.docName; }
public fetchMetaTables() { return this._activeDoc.fetchMetaTables(docSessionFromRequest(this._req)); }
public async fetchTable(tableId: string) {
const {tableData} = await this._activeDoc.fetchTable(docSessionFromRequest(this._req), tableId, true);
return tableData;
}
}
// Helper interface with information about the column
export interface ExportColumn {
id: number;
@ -69,25 +93,17 @@ export interface ExportData {
* Export parameters that identifies a section, filters, sort order.
*/
export interface ExportParameters {
tableId: string;
viewSectionId: number | undefined;
sortOrder: number[];
filters: Filter[];
tableId: string; // Value of '' is an instruction to export all tables.
viewSectionId?: number;
sortOrder?: number[];
filters?: Filter[];
}
/**
* Options parameters for CSV and XLSX export functions.
*/
export interface DownloadOptions {
export interface DownloadOptions extends ExportParameters {
filename: string;
tableId: string;
viewSectionId: number | undefined;
filters: Filter[];
sortOrder: number[];
}
interface FilteredMetaTables {
[tableId: string]: TableDataAction;
}
/**
@ -108,9 +124,8 @@ export function parseExportParameters(req: express.Request): ExportParameters {
}
// Helper for getting filtered metadata tables.
async function getMetaTables(activeDoc: ActiveDoc, req: express.Request): Promise<FilteredMetaTables> {
const docSession = docSessionFromRequest(req as RequestWithLogin);
return safe(await activeDoc.fetchMetaTables(docSession), "No metadata available in active document");
async function getMetaTables(activeDocSource: ActiveDocSource): Promise<TableDataActionSet> {
return safe(await activeDocSource.fetchMetaTables(), "No metadata available in active document");
}
// Makes assertion that value does exist or throws an error
@ -120,7 +135,7 @@ function safe<T>(value: T, msg: string) {
}
// Helper for getting table from filtered metadata.
function safeTable<TableId extends keyof SchemaTypes>(metaTables: FilteredMetaTables, tableId: TableId) {
function safeTable<TableId extends keyof SchemaTypes>(metaTables: TableDataActionSet, tableId: TableId) {
const table = safe(metaTables[tableId], `No table '${tableId}' in document`);
const colTypes = safe(schema[tableId], `No table '${tableId}' in document schema`);
return new MetaTableData<TableId>(tableId, table, colTypes);
@ -140,22 +155,21 @@ function checkTableAccess(tables: MetaTableData<"_grist_Tables">, tableRef: numb
/**
* Builds export for all raw tables that are in doc.
* @param activeDoc Active document
* @param req Request
*/
export async function exportDoc(
activeDoc: ActiveDoc,
req: express.Request) {
const metaTables = await getMetaTables(activeDoc, req);
export async function doExportDoc(
activeDocSource: ActiveDocSource,
handleTable: (data: ExportData) => Promise<void>,
): Promise<void> {
const metaTables = await getMetaTables(activeDocSource);
const tables = safeTable(metaTables, '_grist_Tables');
// select raw tables
const tableRefs = tables.filterRowIds({ summarySourceTable: 0 });
const tableExports = await Promise.all(
tableRefs
.filter(tId => !isTableCensored(tables, tId)) // Omit censored tables
.map(tId => exportTable(activeDoc, tId, req, {metaTables}))
);
return tableExports;
for (const tableRef of tableRefs) {
if (!isTableCensored(tables, tableRef)) { // Omit censored tables
const data = await doExportTable(activeDocSource, {metaTables, tableRef});
await handleTable(data);
}
}
}
/**
@ -165,12 +179,31 @@ export async function exportTable(
activeDoc: ActiveDoc,
tableRef: number,
req: express.Request,
{metaTables}: {metaTables?: FilteredMetaTables} = {},
{metaTables}: {metaTables?: TableDataActionSet} = {},
): Promise<ExportData> {
metaTables = metaTables || await getMetaTables(activeDoc, req);
return doExportTable(new ActiveDocSourceDirect(activeDoc, req), {metaTables, tableRef});
}
export async function doExportTable(
activeDocSource: ActiveDocSource,
options: {metaTables?: TableDataActionSet, tableRef?: number, tableId?: string},
) {
const metaTables = options.metaTables || await getMetaTables(activeDocSource);
const docData = new DocData((tableId) => { throw new Error("Unexpected DocData fetch"); }, metaTables);
const tables = safeTable(metaTables, '_grist_Tables');
const metaColumns = safeTable(metaTables, '_grist_Tables_column');
let tableRef: number;
if (options.tableRef) {
tableRef = options.tableRef;
} else {
if (!options.tableId) { throw new Error('doExportTable: tableRef or tableId must be given'); }
tableRef = tables.findRow('tableId', options.tableId);
if (tableRef === 0) {
throw new ApiError(`Table ${options.tableId} not found.`, 404);
}
}
checkTableAccess(tables, tableRef);
const table = safeRecord(tables, tableRef);
@ -197,7 +230,7 @@ export async function exportTable(
});
// fetch actual data
const {tableData} = await activeDoc.fetchTable(docSessionFromRequest(req as RequestWithLogin), table.tableId, true);
const tableData = await activeDocSource.fetchTable(table.tableId);
const rowIds = tableData[2];
const dataByColId = tableData[3];
// sort rows
@ -216,14 +249,15 @@ export async function exportTable(
const docInfo = safeRecord(safeTable(metaTables, '_grist_DocInfo'), 1);
const docSettings = gutil.safeJsonParse(docInfo.documentSettings, {});
return {
const exportData: ExportData = {
tableName,
docName: activeDoc.docName,
docName: await activeDocSource.getDocName(),
rowIds,
access,
columns,
docSettings
};
return exportData;
}
/**
@ -235,9 +269,20 @@ export async function exportSection(
sortSpec: Sort.SortSpec | null,
filters: Filter[] | null,
req: express.Request,
{metaTables}: {metaTables?: FilteredMetaTables} = {},
{metaTables}: {metaTables?: TableDataActionSet} = {},
): Promise<ExportData> {
metaTables = metaTables || await getMetaTables(activeDoc, req);
return doExportSection(new ActiveDocSourceDirect(activeDoc, req), viewSectionId, sortSpec,
filters, {metaTables});
}
export async function doExportSection(
activeDocSource: ActiveDocSource,
viewSectionId: number,
sortSpec: Sort.SortSpec | null,
filters: Filter[] | null,
{metaTables}: {metaTables?: TableDataActionSet} = {},
): Promise<ExportData> {
metaTables = metaTables || await getMetaTables(activeDocSource);
const docData = new DocData((tableId) => { throw new Error("Unexpected DocData fetch"); }, metaTables);
const viewSections = safeTable(metaTables, '_grist_Views_section');
const viewSection = safeRecord(viewSections, viewSectionId);
@ -298,7 +343,7 @@ export async function exportSection(
});
// fetch actual data
const {tableData} = await activeDoc.fetchTable(docSessionFromRequest(req as RequestWithLogin), table.tableId, true);
const tableData = await activeDocSource.fetchTable(table.tableId);
let rowIds = tableData[2];
const dataByColId = tableData[3];
// sort rows
@ -318,14 +363,15 @@ export async function exportSection(
const docInfo = safeRecord(safeTable(metaTables, '_grist_DocInfo'), 1);
const docSettings = gutil.safeJsonParse(docInfo.documentSettings, {});
return {
const exportData: ExportData = {
rowIds,
docSettings,
tableName: table.tableId,
docName: activeDoc.docName,
docName: await activeDocSource.getDocName(),
access: viewColumns.map(col => getters.getColGetter(col.id)!),
columns: viewColumns
};
return exportData;
}
type GristViewsSectionField = MetaRowRecord<'_grist_Views_section_field'>

View File

@ -18,7 +18,7 @@ export async function downloadCSV(activeDoc: ActiveDoc, req: express.Request,
log.info('Generating .csv file...');
const {filename, tableId, viewSectionId, filters, sortOrder} = options;
const data = viewSectionId ?
await makeCSVFromViewSection(activeDoc, viewSectionId, sortOrder, filters, req) :
await makeCSVFromViewSection(activeDoc, viewSectionId, sortOrder || null, filters || null, req) :
await makeCSVFromTable(activeDoc, tableId, req);
res.set('Content-Type', 'text/csv');
res.setHeader('Content-Disposition', contentDisposition(filename + '.csv'));
@ -39,8 +39,8 @@ export async function downloadCSV(activeDoc: ActiveDoc, req: express.Request,
export async function makeCSVFromViewSection(
activeDoc: ActiveDoc,
viewSectionId: number,
sortOrder: number[],
filters: Filter[],
sortOrder: number[] | null,
filters: Filter[] | null,
req: express.Request) {
const data = await exportSection(activeDoc, viewSectionId, sortOrder, filters, req);

View File

@ -1,28 +1,102 @@
/**
* Overview of Excel exports, which now use worker-threads.
*
* 1. The flow starts with downloadXLSX() method called in the main thread (or streamXLSX() used for
* Google Drive export).
* 2. It uses the 'piscina' library to call a makeXLSX* method in a worker thread, registered in
* workerExporter.ts, to export full doc, a table, or a section.
* 3. Each of those methods calls a same-named method that's defined in this file. I.e.
* downloadXLSX() is called in the main thread, but makeXLSX() is called in the worker thread.
* 4. makeXLSX* methods here get data using an ActiveDocSource, which uses Rpc (from grain-rpc
* module) to request data over a message port from the ActiveDoc in the main thread.
* 5. The resulting stream of Excel data is streamed back to the main thread using Rpc too.
*/
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
import {createExcelFormatter} from 'app/server/lib/ExcelFormatter';
import {DownloadOptions, ExportData, exportDoc, exportSection, exportTable, Filter} from 'app/server/lib/Export';
import {Alignment, Border, Fill, Workbook} from 'exceljs';
import * as express from 'express';
import {ActiveDocSource, ActiveDocSourceDirect, DownloadOptions, ExportParameters} from 'app/server/lib/Export';
import {doExportDoc, doExportSection, doExportTable, ExportData, Filter} from 'app/server/lib/Export';
import log from 'app/server/lib/log';
import {Alignment, Border, stream as ExcelWriteStream, Fill} from 'exceljs';
import * as express from 'express';
import contentDisposition from 'content-disposition';
import { ApiError } from 'app/common/ApiError';
import {Rpc} from 'grain-rpc';
import {AbortController} from 'node-abort-controller';
import {Stream, Writable} from 'stream';
import {MessageChannel} from 'worker_threads';
import Piscina from 'piscina';
// Configure the thread-pool to use for exporting XLSX files.
const exportPool = new Piscina({
filename: __dirname + '/workerExporter.js',
minThreads: 0,
maxThreads: 4,
maxQueue: 100, // Fail if this many tasks are already waiting for a thread.
idleTimeout: 10_000, // Drop unused threads after 10s of inactivity.
});
/**
* Converts `activeDoc` to XLSX and sends the converted data through `res`.
*/
export async function downloadXLSX(activeDoc: ActiveDoc, req: express.Request,
res: express.Response, options: DownloadOptions) {
log.debug(`Generating .xlsx file`);
const {filename, tableId, viewSectionId, filters, sortOrder} = options;
// hanlding 3 cases : full XLSX export (full file), view xlsx export, table xlsx export
const data = viewSectionId ? await makeXLSXFromViewSection(activeDoc, viewSectionId, sortOrder, filters, req)
: tableId ? await makeXLSXFromTable(activeDoc, tableId, req)
: await makeXLSX(activeDoc, req);
res.set('Content-Type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
const {filename} = options;
res.setHeader('Content-Type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
res.setHeader('Content-Disposition', contentDisposition(filename + '.xlsx'));
res.send(data);
log.debug('XLSX file generated');
return streamXLSX(activeDoc, req, res, options);
}
/**
* Converts `activeDoc` to XLSX and sends to the given outputStream.
*/
export async function streamXLSX(activeDoc: ActiveDoc, req: express.Request,
outputStream: Writable, options: ExportParameters) {
log.debug(`Generating .xlsx file`);
const {tableId, viewSectionId, filters, sortOrder} = options;
const testDates = (req.hostname === 'localhost');
const { port1, port2 } = new MessageChannel();
try {
const rpc = new Rpc({
sendMessage: async (m) => port1.postMessage(m),
logger: { info: m => {}, warn: m => log.warn(m) },
});
rpc.registerImpl<ActiveDocSource>("activeDocSource", new ActiveDocSourceDirect(activeDoc, req));
rpc.on('message', (chunk) => { outputStream.write(chunk); });
port1.on('message', (m) => rpc.receiveMessage(m));
// When the worker thread is done, it closes the port on its side, and we listen to that to
// end the original request (the incoming HTTP request, in case of a download).
port1.on('close', () => { outputStream.end(); });
// For request cancelling to work, remember that such requests are forwarded via DocApiForwarder.
const abortController = new AbortController();
req.on('close', () => abortController.abort());
const run = (method: string, ...args: any[]) => exportPool.run({port: port2, testDates, args}, {
name: method,
signal: abortController.signal,
transferList: [port2],
});
// hanlding 3 cases : full XLSX export (full file), view xlsx export, table xlsx export
try {
if (viewSectionId) {
await run('makeXLSXFromViewSection', viewSectionId, sortOrder, filters);
} else if (tableId) {
await run('makeXLSXFromTable', tableId);
} else {
await run('makeXLSX');
}
log.debug('XLSX file generated');
} catch (e) {
// We fiddle with errors in workerExporter to preserve extra properties like 'status'. Make
// the result an instance of Error again here (though we won't know the exact class).
throw (e instanceof Error) ? e : Object.assign(new Error(e.message), e);
}
} finally {
port1.close();
port2.close();
}
}
/**
@ -33,17 +107,18 @@ export async function downloadXLSX(activeDoc: ActiveDoc, req: express.Request,
* @param {Integer[]} activeSortOrder (optional) - overriding sort order.
* @param {Filter[]} filters (optional) - filters defined from ui.
*/
export async function makeXLSXFromViewSection(
activeDoc: ActiveDoc,
export async function makeXLSXFromViewSection(
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
viewSectionId: number,
sortOrder: number[],
filters: Filter[],
req: express.Request,
) {
const data = await exportSection(activeDoc, viewSectionId, sortOrder, filters, req);
const xlsx = await convertToExcel([data], req.hostname === 'localhost');
return xlsx;
const data = await doExportSection(activeDocSource, viewSectionId, sortOrder, filters);
const {exportTable, end} = convertToExcel(stream, testDates);
exportTable(data);
await end();
}
/**
@ -53,45 +128,42 @@ export async function downloadXLSX(activeDoc: ActiveDoc, req: express.Request,
* @param {Integer} tableId - id of the table to export.
*/
export async function makeXLSXFromTable(
activeDoc: ActiveDoc,
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
tableId: string,
req: express.Request
) {
if (!activeDoc.docData) {
throw new Error('No docData in active document');
}
// Look up the table to make a XLSX from.
const tables = activeDoc.docData.getMetaTable('_grist_Tables');
const tableRef = tables.findRow('tableId', tableId);
if (tableRef === 0) {
throw new ApiError(`Table ${tableId} not found.`, 404);
}
const data = await exportTable(activeDoc, tableRef, req);
const xlsx = await convertToExcel([data], req.hostname === 'localhost');
return xlsx;
const data = await doExportTable(activeDocSource, {tableId});
const {exportTable, end} = convertToExcel(stream, testDates);
exportTable(data);
await end();
}
/**
* Creates excel document with all tables from an active Grist document.
*/
export async function makeXLSX(
activeDoc: ActiveDoc,
req: express.Request,
): Promise<ArrayBuffer> {
const content = await exportDoc(activeDoc, req);
const data = await convertToExcel(content, req.hostname === 'localhost');
return data;
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
): Promise<void> {
const {exportTable, end} = convertToExcel(stream, testDates);
await doExportDoc(activeDocSource, async (table: ExportData) => exportTable(table));
await end();
}
/**
* Converts export data to an excel file.
*/
async function convertToExcel(tables: ExportData[], testDates: boolean) {
// Create workbook and add single sheet to it.
const wb = new Workbook();
function convertToExcel(stream: Stream, testDates: boolean): {
exportTable: (table: ExportData) => void,
end: () => Promise<void>,
} {
// Create workbook and add single sheet to it. Using the WorkbookWriter interface avoids
// creating the entire Excel file in memory, which can be very memory-heavy. See
// https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (The options useStyles and
// useSharedStrings replicate more closely what was used previously.)
const wb = new ExcelWriteStream.xlsx.WorkbookWriter({useStyles: true, useSharedStrings: true, stream});
if (testDates) {
// HACK: for testing, we will keep static dates
const date = new Date(Date.UTC(2018, 11, 1, 0, 0, 0));
@ -126,7 +198,7 @@ async function convertToExcel(tables: ExportData[], testDates: boolean) {
const centerAlignment: Partial<Alignment> = {
horizontal: 'center'
};
for (const table of tables) {
function exportTable(table: ExportData) {
const { columns, rowIds, access, tableName } = table;
const ws = wb.addWorksheet(sanitizeWorksheetName(tableName));
// Build excel formatters.
@ -134,10 +206,6 @@ async function convertToExcel(tables: ExportData[], testDates: boolean) {
// Generate headers for all columns with correct styles for whole column.
// Actual header style for a first row will be overwritten later.
ws.columns = columns.map((col, c) => ({ header: col.label, style: formatters[c].style() }));
// Populate excel file with data
rowIds.forEach(row => {
ws.addRow(access.map((getter, c) => formatters[c].formatAny(getter(row))));
});
// style up the header row
for (let i = 1; i <= columns.length; i++) {
// apply to all rows (including header)
@ -156,9 +224,14 @@ async function convertToExcel(tables: ExportData[], testDates: boolean) {
// 14 points is about 100 pixels in a default font (point is around 7.5 pixels)
column.width = column.header.length < 14 ? 14 : column.header.length;
});
// Populate excel file with data
for (const row of rowIds) {
ws.addRow(access.map((getter, c) => formatters[c].formatAny(getter(row)))).commit();
}
ws.commit();
}
return await wb.xlsx.writeBuffer();
function end() { return wb.commit(); }
return {exportTable, end};
}
/**

View File

@ -49,6 +49,7 @@ import {IPermitStore} from 'app/server/lib/Permit';
import {getAppPathTo, getAppRoot, getUnpackedAppRoot} from 'app/server/lib/places';
import {addPluginEndpoints, limitToPlugins} from 'app/server/lib/PluginEndpoint';
import {PluginManager} from 'app/server/lib/PluginManager';
import * as ProcessMonitor from 'app/server/lib/ProcessMonitor';
import {adaptServerUrl, getOrgUrl, getOriginUrl, getScope, optStringParam,
RequestWithGristInfo, stringParam, TEST_HTTPS_OFFSET, trustOrigin} from 'app/server/lib/requestUtils';
import {ISendAppPageOptions, makeGristConfig, makeMessagePage, makeSendAppPage} from 'app/server/lib/sendAppPage';
@ -130,6 +131,7 @@ export class FlexServer implements GristServer {
private _sessionStore: SessionStore;
private _storageManager: IDocStorageManager;
private _telemetryManager: TelemetryManager|undefined;
private _processMonitorStop?: () => void; // Callback to stop the ProcessMonitor
private _docWorkerMap: IDocWorkerMap;
private _widgetRepository: IWidgetRepository;
private _notifier: INotifier;
@ -692,6 +694,9 @@ export class FlexServer implements GristServer {
this._telemetryManager = new TelemetryManager(this._dbManager);
// Start up a monitor for memory and cpu usage.
this._processMonitorStop = ProcessMonitor.start(this._telemetryManager);
this.app.post('/api/telemetry', async (req, resp) => {
const mreq = req as RequestWithLogin;
const name = stringParam(req.body.name, 'name', TelemetryEventNames);
@ -705,6 +710,7 @@ export class FlexServer implements GristServer {
}
public async close() {
this._processMonitorStop?.();
if (this.usage) { await this.usage.close(); }
if (this._hosts) { this._hosts.close(); }
if (this._dbManager) {

View File

@ -1,11 +1,11 @@
import {drive} from '@googleapis/drive';
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
import {RequestWithLogin} from 'app/server/lib/Authorizer';
import {makeXLSX} from 'app/server/lib/ExportXLSX';
import {streamXLSX} from 'app/server/lib/ExportXLSX';
import log from 'app/server/lib/log';
import {optStringParam} from 'app/server/lib/requestUtils';
import {Request, Response} from 'express';
import {PassThrough} from 'stream';
import {PassThrough, Stream} from 'stream';
/**
* Endpoint logic for sending grist document to Google Drive. Grist document is first exported as an
@ -30,10 +30,15 @@ export async function exportToDrive(
};
// Prepare file for exporting.
log.debug(`Export to drive - Preparing file for export`, meta);
const { name, data } = await prepareFile(activeDoc, req);
const name = (optStringParam(req.query.title) || activeDoc.docName);
const stream = new PassThrough();
try {
// Send file to GDrive and get the url for a preview.
const url = await sendFileToDrive(name, data, access_token);
const [, url] = await Promise.all([
streamXLSX(activeDoc, req, stream, {tableId: ''}),
sendFileToDrive(name, stream, access_token),
]);
log.debug(`Export to drive - File exported, redirecting to Google Spreadsheet ${url}`, meta);
res.json({ url });
} catch (err) {
@ -48,7 +53,7 @@ export async function exportToDrive(
}
// Creates spreadsheet file in a Google drive, by sending an excel and requesting for conversion.
async function sendFileToDrive(fileNameNoExt: string, data: ArrayBuffer, oauth_token: string): Promise<string> {
async function sendFileToDrive(fileNameNoExt: string, stream: Stream, oauth_token: string): Promise<string> {
// Here we are asking google drive to convert excel file to a google spreadsheet
const requestBody = {
// name of the spreadsheet to create
@ -56,9 +61,6 @@ async function sendFileToDrive(fileNameNoExt: string, data: ArrayBuffer, oauth_t
// mime type of the google spreadsheet
mimeType: 'application/vnd.google-apps.spreadsheet'
};
// wrap buffer into a stream
const stream = new PassThrough();
stream.end(data);
// Define what gets send - excel file
const media = {
mimeType: 'application/vnd.ms-excel',
@ -77,10 +79,3 @@ async function sendFileToDrive(fileNameNoExt: string, data: ArrayBuffer, oauth_t
}
return url;
}
// Makes excel file the same way as export to excel works.
async function prepareFile(doc: ActiveDoc, req: Request) {
const data = await makeXLSX(doc, req);
const name = (optStringParam(req.query.title) || doc.docName);
return { name, data };
}

View File

@ -0,0 +1,79 @@
import { TelemetryManager } from 'app/server/lib/TelemetryManager';
const MONITOR_PERIOD_MS = 5_000; // take a look at memory usage this often
const MEMORY_DELTA_FRACTION = 0.1; // fraction by which usage should change to get reported
const CPU_DELTA_FRACTION = 0.1; // by how much cpu usage should change to get reported
const MONITOR_LOG_PERIOD_MS = 600_000; // log usage at least this often
let _timer: NodeJS.Timeout|undefined;
let _lastTickTime: number = Date.now();
let _lastReportTime: number = 0;
let _lastReportedHeapUsed: number = 0;
let _lastCpuUsage: NodeJS.CpuUsage = {system: 0, user: 0};
let _lastReportedCpuAverage: number = 0;
/**
* Monitor process memory (heap) and CPU usage, reporting as telemetry on an interval, and more
* often when usage ticks up or down by a big enough delta.
*
* There is a single global process monitor, reporting to the telemetryManager passed into the
* first call to start().
*
* Returns a function that stops the monitor, or null if there was already a process monitor
* running, and no new one was started.
*
* Reports:
* - heapUsedMB: Size of JS heap in use, in MiB.
* - heapTotalMB: Total heap size, in MiB, allocated for JS by v8.
* - cpuAverage: Fraction between 0 and 1, cpu usage over the last MONITOR_PERIOD_MS. Note it
* includes usage from all threads, so may exceed 1.
* - intervalMs: Interval (in milliseconds) over which cpuAverage is reported. Being much
* higher than MONITOR_PERIOD_MS is a sign of being CPU bound for that long.
*/
export function start(telemetryManager: TelemetryManager): (() => void) | undefined {
if (!_timer) {
// Initialize variables needed for accurate first-tick measurement.
_lastTickTime = Date.now();
_lastCpuUsage = process.cpuUsage();
_timer = setInterval(() => monitor(telemetryManager), MONITOR_PERIOD_MS);
return function stop() {
clearInterval(_timer);
_timer = undefined;
};
}
}
function monitor(telemetryManager: TelemetryManager) {
const memoryUsage = process.memoryUsage();
const heapUsed = memoryUsage.heapUsed;
const cpuUsage = process.cpuUsage();
const now = Date.now();
const intervalMs = now - _lastTickTime;
// Note that cpuUsage info is in microseconds, while intervalMs is milliseconds.
const cpuAverage = (cpuUsage.system + cpuUsage.user - _lastCpuUsage.system - _lastCpuUsage.user)
/ 1000 / intervalMs;
_lastCpuUsage = cpuUsage;
_lastTickTime = now;
// Report usage when:
// (a) enough time has passed (MONITOR_LOG_PERIOD_MS)
// (b) memory usage ticked up or down enough since the last report
// (c) average cpu usage ticked up or down enough since the last report
if (
now > _lastReportTime + MONITOR_LOG_PERIOD_MS ||
Math.abs(heapUsed - _lastReportedHeapUsed) > _lastReportedHeapUsed * MEMORY_DELTA_FRACTION ||
Math.abs(cpuAverage - _lastReportedCpuAverage) > CPU_DELTA_FRACTION
) {
telemetryManager.logEvent('processMonitor', {
heapUsedMB: Math.round(memoryUsage.heapUsed/1024/1024),
heapTotalMB: Math.round(memoryUsage.heapTotal/1024/1024),
cpuAverage: Math.round(cpuAverage * 100) / 100,
intervalMs,
});
_lastReportedHeapUsed = heapUsed;
_lastReportedCpuAverage = cpuAverage;
_lastReportTime = now;
}
}

View File

@ -36,7 +36,8 @@ const buildJsonErrorHandler = (options: JsonErrorHandlerOptions = {}): express.E
body: shouldLogBody !== false ? req.body : undefined,
params: shouldLogParams !== false ? req.params : undefined,
};
log.rawWarn(`Error during api call to ${meta.path}: ${err.message}`, meta);
const headersNote = res.headersSent ? " (headersSent)" : "";
log.rawWarn(`Error during api call to ${meta.path}${headersNote}: ${err.message}`, meta);
let details = err.details && {...err.details};
const status = details?.status || err.status || 500;
if (details) {
@ -45,7 +46,14 @@ const buildJsonErrorHandler = (options: JsonErrorHandlerOptions = {}): express.E
delete details.status; // TODO: reconcile err.status and details.status, no need for both.
if (Object.keys(details).length === 0) { details = undefined; }
}
res.status(status).json({error: err.message || 'internal error', details});
if (res.headersSent) {
// If we've already sent headers, attempt to set them to something else will fail. E.g. this
// can happen with downloads if a request gets aborted. If so, just close the response; we
// already reported the error above.
res.end();
} else {
res.status(status).json({error: err.message || 'internal error', details});
}
};
};

View File

@ -0,0 +1,72 @@
import {PassThrough} from 'stream';
import {ActiveDocSource} from 'app/server/lib/Export';
import * as ExportXLSX from 'app/server/lib/ExportXLSX';
import * as log from 'app/server/lib/log';
import {Rpc} from 'grain-rpc';
import {Stream} from 'stream';
import {MessagePort, threadId} from 'worker_threads';
export const makeXLSX = handleExport(ExportXLSX.makeXLSX);
export const makeXLSXFromTable = handleExport(ExportXLSX.makeXLSXFromTable);
export const makeXLSXFromViewSection = handleExport(ExportXLSX.makeXLSXFromViewSection);
function handleExport<T extends any[]>(
make: (a: ActiveDocSource, testDates: boolean, output: Stream, ...args: T) => Promise<void>
) {
return async function({port, testDates, args}: {port: MessagePort, testDates: boolean, args: T}) {
try {
const start = Date.now();
log.debug("workerExporter %s %s: started", threadId, make.name);
const rpc = new Rpc({
sendMessage: async (m) => port.postMessage(m),
logger: { info: m => {}, warn: m => log.warn(m) },
});
const activeDocSource = rpc.getStub<ActiveDocSource>("activeDocSource");
port.on('message', (m) => rpc.receiveMessage(m));
const outputStream = new PassThrough();
bufferedPipe(outputStream, (chunk) => rpc.postMessage(chunk));
await make(activeDocSource, testDates, outputStream, ...args);
port.close();
log.debug("workerExporter %s %s: done in %s ms", threadId, make.name, Date.now() - start);
} catch (e) {
log.debug("workerExporter %s %s: error %s", threadId, make.name, String(e));
// When Error objects move across threads, they keep only the 'message' property. We can
// keep other properties (like 'status') if we throw a plain object instead. (Didn't find a
// good reference on this, https://github.com/nodejs/node/issues/35506 is vaguely related.)
throw {message: e.message, ...e};
}
};
}
// ExcelJS's WorkbookWriter produces many tiny writes (even though they pass through zipping). To
// reduce overhead and context switching, buffer them and pass on in chunks. (In practice, this
// helps performance only slightly.)
function bufferedPipe(stream: Stream, callback: (chunk: Buffer) => void, threshold = 64*1024) {
let buffers: Buffer[] = [];
let length = 0;
let flushed = 0;
function flush() {
if (length > 0) {
const data = Buffer.concat(buffers);
flushed += data.length;
callback(data);
buffers = [];
length = 0;
}
}
stream.on('data', (chunk) => {
// Whenever data is written to the stream, add it to the buffer.
buffers.push(chunk);
length += chunk.length;
// If the buffer is large enough, post it to the callback. Also post the very first chunk:
// since this becomes an HTTP response, a quick first chunk lets the browser prompt the user
// more quickly about what to do with the download.
if (length >= threshold || flushed === 0) {
flush();
}
});
stream.on('end', flush);
}

View File

@ -165,6 +165,7 @@
"node-abort-controller": "3.0.1",
"node-fetch": "2.6.7",
"pg": "8.6.0",
"piscina": "3.2.0",
"plotly.js-basic-dist": "2.13.2",
"popper-max-size-modifier": "0.2.0",
"popweasel": "0.1.18",

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -10,6 +10,11 @@
"@jridgewell/gen-mapping" "^0.1.0"
"@jridgewell/trace-mapping" "^0.3.9"
"@assemblyscript/loader@^0.10.1":
version "0.10.1"
resolved "https://registry.yarnpkg.com/@assemblyscript/loader/-/loader-0.10.1.tgz#70e45678f06c72fa2e350e8553ec4a4d72b92e06"
integrity sha512-H71nDOOL8Y7kWRLqf6Sums+01Q5msqBW2KhDUTemh1tvY04eSkSXrK0uj/4mmY0Xr16/3zyZmsrxN7CKuRbNRg==
"@babel/code-frame@^7.16.7", "@babel/code-frame@^7.18.6":
version "7.18.6"
resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.18.6.tgz#3b25d38c89600baa2dcc219edfa88a74eb2c427a"
@ -1672,7 +1677,7 @@ balanced-match@^1.0.0:
resolved "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz"
integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c=
base64-js@^1.3.0, base64-js@^1.3.1:
base64-js@^1.2.0, base64-js@^1.3.0, base64-js@^1.3.1:
version "1.5.1"
resolved "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz"
integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==
@ -3559,6 +3564,11 @@ event-target-shim@^5.0.0:
resolved "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz"
integrity sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==
eventemitter-asyncresource@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/eventemitter-asyncresource/-/eventemitter-asyncresource-1.0.0.tgz#734ff2e44bf448e627f7748f905d6bdd57bdb65b"
integrity sha512-39F7TBIV0G7gTelxwbEqnwhp90eqCPON1k0NwNfwhgKn4Co4ybUbj2pECcXT0B3ztRKZ7Pw1JujUUgmQJHcVAQ==
eventemitter3@^4.0.0:
version "4.0.7"
resolved "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz"
@ -4450,6 +4460,20 @@ hash.js@^1.0.0, hash.js@^1.0.3:
inherits "^2.0.3"
minimalistic-assert "^1.0.1"
hdr-histogram-js@^2.0.1:
version "2.0.3"
resolved "https://registry.yarnpkg.com/hdr-histogram-js/-/hdr-histogram-js-2.0.3.tgz#0b860534655722b6e3f3e7dca7b78867cf43dcb5"
integrity sha512-Hkn78wwzWHNCp2uarhzQ2SGFLU3JY8SBDDd3TAABK4fc30wm+MuPOrg5QVFVfkKOQd6Bfz3ukJEI+q9sXEkK1g==
dependencies:
"@assemblyscript/loader" "^0.10.1"
base64-js "^1.2.0"
pako "^1.0.3"
hdr-histogram-percentiles-obj@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/hdr-histogram-percentiles-obj/-/hdr-histogram-percentiles-obj-3.0.0.tgz#9409f4de0c2dda78e61de2d9d78b1e9f3cba283c"
integrity sha512-7kIufnBqdsBGcSZLPJwqHT3yhk1QTsSlFsVD3kx5ixH/AlgBs9yM1q6DPhXZ8f8gtdqgh7N7/5btRLpQsS2gHw==
he@1.1.1:
version "1.1.1"
resolved "https://registry.npmjs.org/he/-/he-1.1.1.tgz"
@ -6134,6 +6158,14 @@ neo-async@^2.6.0, neo-async@^2.6.2:
resolved "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz"
integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==
nice-napi@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/nice-napi/-/nice-napi-1.0.2.tgz#dc0ab5a1eac20ce548802fc5686eaa6bc654927b"
integrity sha512-px/KnJAJZf5RuBGcfD+Sp2pAKq0ytz8j+1NehvgIGFkvtvFrDM3T8E4x/JJODXK9WZow8RRGrbA9QQ3hs+pDhA==
dependencies:
node-addon-api "^3.0.0"
node-gyp-build "^4.2.2"
nise@^1.4.6:
version "1.5.3"
resolved "https://registry.npmjs.org/nise/-/nise-1.5.3.tgz"
@ -6150,6 +6182,11 @@ node-abort-controller@3.0.1:
resolved "https://registry.yarnpkg.com/node-abort-controller/-/node-abort-controller-3.0.1.tgz#f91fa50b1dee3f909afabb7e261b1e1d6b0cb74e"
integrity sha512-/ujIVxthRs+7q6hsdjHMaj8hRG9NuWmwrz+JdRwZ14jdFoKSkm+vDsCbF9PLpnSqjaWQJuTmVtcWHNLr+vrOFw==
node-addon-api@^3.0.0:
version "3.2.1"
resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-3.2.1.tgz#81325e0a2117789c0128dab65e7e38f07ceba161"
integrity sha512-mmcei9JghVNDYydghQmeDX8KoAm0FAiYyIcUt/N4nhyAipB17pllZQDOJD2fotxABnt4Mdz+dKTO7eftLg4d0A==
node-addon-api@^4.2.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-4.3.0.tgz#52a1a0b475193e0928e98e0426a0d1254782b77f"
@ -6192,6 +6229,11 @@ node-forge@^0.7.0:
resolved "https://registry.npmjs.org/node-forge/-/node-forge-0.7.6.tgz"
integrity sha512-sol30LUpz1jQFBjOKwbjxijiE3b6pjd74YwfD0fJOKPjF+fONKb2Yg8rYgS6+bK6VDl+/wfr4IYpC7jDzLUIfw==
node-gyp-build@^4.2.2:
version "4.6.0"
resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.6.0.tgz#0c52e4cbf54bbd28b709820ef7b6a3c2d6209055"
integrity sha512-NTZVKn9IylLwUzaKjkas1e4u2DLNcV4rdYagA4PWdPwW87Bi7z+BznyKSRwS/761tV/lzCGXplWsiaMjLqP2zQ==
node-gyp@8.x:
version "8.4.1"
resolved "https://registry.yarnpkg.com/node-gyp/-/node-gyp-8.4.1.tgz#3d49308fc31f768180957d6b5746845fbd429937"
@ -6495,7 +6537,7 @@ packet-reader@1.0.0:
resolved "https://registry.npmjs.org/packet-reader/-/packet-reader-1.0.0.tgz"
integrity sha512-HAKu/fG3HpHFO0AA8WE8q2g+gBJaZ9MG7fcKk+IJPLTGAD6Psw4443l+9DGRbOIh3/aXr7Phy0TjilYivJo5XQ==
pako@~1.0.2, pako@~1.0.5:
pako@^1.0.3, pako@~1.0.2, pako@~1.0.5:
version "1.0.11"
resolved "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz"
integrity sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==
@ -6743,6 +6785,17 @@ pinkie@^2.0.0:
resolved "https://registry.npmjs.org/pinkie/-/pinkie-2.0.4.tgz"
integrity sha1-clVrgM+g1IqXToDnckjoDtT3+HA=
piscina@3.2.0:
version "3.2.0"
resolved "https://registry.yarnpkg.com/piscina/-/piscina-3.2.0.tgz#f5a1dde0c05567775690cccefe59d9223924d154"
integrity sha512-yn/jMdHRw+q2ZJhFhyqsmANcbF6V2QwmD84c6xRau+QpQOmtrBCoRGdvTfeuFDYXB5W2m6MfLkjkvQa9lUSmIA==
dependencies:
eventemitter-asyncresource "^1.0.0"
hdr-histogram-js "^2.0.1"
hdr-histogram-percentiles-obj "^3.0.0"
optionalDependencies:
nice-napi "^1.0.2"
pkg-dir@^4.2.0:
version "4.2.0"
resolved "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz"