mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
d191859be7
Summary: - Excel exports were awfully memory-inefficient, causing occasional docWorker crashes. The fix is to use the "streaming writer" option of ExcelJS https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (Empirically on one example, max memory went down from 3G to 100M) - It's also CPU intensive and synchronous, and can block node for tens of seconds. The fix is to use a worker-thread. This diff uses "piscina" library for a pool of threads. - Additionally, adds ProcessMonitor that logs memory and cpu usage, particularly when those change significantly. - Also introduces request cancellation, so that a long download cancelled by the user will cancel the work being done in the worker thread. Test Plan: Updated previous export tests; memory and CPU performance tested manually by watching output of ProcessMonitor. Difference visible in these log excerpts: Before (total time to serve request 22 sec): ``` Telemetry processMonitor heapUsedMB=2187, heapTotalMB=2234, cpuAverage=1.13, intervalMs=17911 Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0.66, intervalMs=5005 Telemetry processMonitor heapUsedMB=2188, heapTotalMB=2234, cpuAverage=0, intervalMs=5005 Telemetry processMonitor heapUsedMB=71, heapTotalMB=75, cpuAverage=0.13, intervalMs=5002 ``` After (total time to server request 18 sec): ``` Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=0.5, intervalMs=5001 Telemetry processMonitor heapUsedMB=109, heapTotalMB=144, cpuAverage=1.39, intervalMs=5002 Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.13, intervalMs=5000 Telemetry processMonitor heapUsedMB=94, heapTotalMB=131, cpuAverage=1.35, intervalMs=5001 ``` Note in "Before" that heapTotalMB goes up to 2GB in the first case, and "intervalMs" of 17 seconds indicates that node was unresponsive for that long. In the second case, heapTotalMB stays low, and the main thread remains responsive the whole time. Reviewers: jarek Reviewed By: jarek Differential Revision: https://phab.getgrist.com/D3906
382 lines
14 KiB
TypeScript
382 lines
14 KiB
TypeScript
import {ApiError} from 'app/common/ApiError';
|
|
import {buildColFilter} from 'app/common/ColumnFilterFunc';
|
|
import {TableDataAction, TableDataActionSet} from 'app/common/DocActions';
|
|
import {DocData} from 'app/common/DocData';
|
|
import {DocumentSettings} from 'app/common/DocumentSettings';
|
|
import * as gristTypes from 'app/common/gristTypes';
|
|
import * as gutil from 'app/common/gutil';
|
|
import {nativeCompare} from 'app/common/gutil';
|
|
import {isTableCensored} from 'app/common/isHiddenTable';
|
|
import {buildRowFilter} from 'app/common/RowFilterFunc';
|
|
import {schema, SchemaTypes} from 'app/common/schema';
|
|
import {SortFunc} from 'app/common/SortFunc';
|
|
import {Sort} from 'app/common/SortSpec';
|
|
import {MetaRowRecord, MetaTableData} from 'app/common/TableData';
|
|
import {BaseFormatter, createFullFormatterFromDocData} from 'app/common/ValueFormatter';
|
|
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
|
|
import {RequestWithLogin} from 'app/server/lib/Authorizer';
|
|
import {docSessionFromRequest} from 'app/server/lib/DocSession';
|
|
import {optIntegerParam, optJsonParam, stringParam} from 'app/server/lib/requestUtils';
|
|
import {ServerColumnGetters} from 'app/server/lib/ServerColumnGetters';
|
|
import * as express from 'express';
|
|
import * as _ from 'underscore';
|
|
|
|
// Helper type for Cell Accessor
|
|
type Access = (row: number) => any;
|
|
|
|
// Interface to document data used from an exporter worker thread (workerExporter.ts). Note that
|
|
// parameters and returned values are plain data that can be passed over a MessagePort.
|
|
export interface ActiveDocSource {
|
|
getDocName(): Promise<string>;
|
|
fetchMetaTables(): Promise<TableDataActionSet>;
|
|
fetchTable(tableId: string): Promise<TableDataAction>;
|
|
}
|
|
|
|
// Implementation of ActiveDocSource using an ActiveDoc directly.
|
|
export class ActiveDocSourceDirect implements ActiveDocSource {
|
|
private _req: RequestWithLogin;
|
|
|
|
constructor(private _activeDoc: ActiveDoc, req: express.Request) {
|
|
this._req = req as RequestWithLogin;
|
|
}
|
|
|
|
public async getDocName() { return this._activeDoc.docName; }
|
|
public fetchMetaTables() { return this._activeDoc.fetchMetaTables(docSessionFromRequest(this._req)); }
|
|
public async fetchTable(tableId: string) {
|
|
const {tableData} = await this._activeDoc.fetchTable(docSessionFromRequest(this._req), tableId, true);
|
|
return tableData;
|
|
}
|
|
}
|
|
|
|
// Helper interface with information about the column
|
|
export interface ExportColumn {
|
|
id: number;
|
|
colId: string;
|
|
label: string;
|
|
type: string;
|
|
formatter: BaseFormatter;
|
|
parentPos: number;
|
|
description: string;
|
|
}
|
|
|
|
/**
|
|
* Bare data that is exported - used to convert to various formats.
|
|
*/
|
|
export interface ExportData {
|
|
/**
|
|
* Table name or table id.
|
|
*/
|
|
tableName: string;
|
|
/**
|
|
* Document name.
|
|
*/
|
|
docName: string;
|
|
/**
|
|
* Row ids (filtered and sorted).
|
|
*/
|
|
rowIds: number[];
|
|
/**
|
|
* Accessor for value in a column.
|
|
*/
|
|
access: Access[];
|
|
/**
|
|
* Columns information (primary used for formatting).
|
|
*/
|
|
columns: ExportColumn[];
|
|
/**
|
|
* Document settings
|
|
*/
|
|
docSettings: DocumentSettings;
|
|
}
|
|
|
|
/**
|
|
* Export parameters that identifies a section, filters, sort order.
|
|
*/
|
|
export interface ExportParameters {
|
|
tableId: string; // Value of '' is an instruction to export all tables.
|
|
viewSectionId?: number;
|
|
sortOrder?: number[];
|
|
filters?: Filter[];
|
|
}
|
|
|
|
/**
|
|
* Options parameters for CSV and XLSX export functions.
|
|
*/
|
|
export interface DownloadOptions extends ExportParameters {
|
|
filename: string;
|
|
}
|
|
|
|
/**
|
|
* Gets export parameters from a request.
|
|
*/
|
|
export function parseExportParameters(req: express.Request): ExportParameters {
|
|
const tableId = stringParam(req.query.tableId, 'tableId');
|
|
const viewSectionId = optIntegerParam(req.query.viewSection);
|
|
const sortOrder = optJsonParam(req.query.activeSortSpec, []) as number[];
|
|
const filters: Filter[] = optJsonParam(req.query.filters, []);
|
|
|
|
return {
|
|
tableId,
|
|
viewSectionId,
|
|
sortOrder,
|
|
filters,
|
|
};
|
|
}
|
|
|
|
// Helper for getting filtered metadata tables.
|
|
async function getMetaTables(activeDocSource: ActiveDocSource): Promise<TableDataActionSet> {
|
|
return safe(await activeDocSource.fetchMetaTables(), "No metadata available in active document");
|
|
}
|
|
|
|
// Makes assertion that value does exist or throws an error
|
|
function safe<T>(value: T, msg: string) {
|
|
if (!value) { throw new ApiError(msg, 404); }
|
|
return value as NonNullable<T>;
|
|
}
|
|
|
|
// Helper for getting table from filtered metadata.
|
|
function safeTable<TableId extends keyof SchemaTypes>(metaTables: TableDataActionSet, tableId: TableId) {
|
|
const table = safe(metaTables[tableId], `No table '${tableId}' in document`);
|
|
const colTypes = safe(schema[tableId], `No table '${tableId}' in document schema`);
|
|
return new MetaTableData<TableId>(tableId, table, colTypes);
|
|
}
|
|
|
|
// Helper for getting record safely: it throws if the record is missing.
|
|
function safeRecord<TableId extends keyof SchemaTypes>(table: MetaTableData<TableId>, id: number) {
|
|
return safe(table.getRecord(id), `No record ${id} in table ${table.tableId}`);
|
|
}
|
|
|
|
// Check that tableRef points to an uncensored table, or throw otherwise.
|
|
function checkTableAccess(tables: MetaTableData<"_grist_Tables">, tableRef: number): void {
|
|
if (isTableCensored(tables, tableRef)) {
|
|
throw new ApiError(`Cannot find or access table`, 404);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Builds export for all raw tables that are in doc.
|
|
*/
|
|
export async function doExportDoc(
|
|
activeDocSource: ActiveDocSource,
|
|
handleTable: (data: ExportData) => Promise<void>,
|
|
): Promise<void> {
|
|
const metaTables = await getMetaTables(activeDocSource);
|
|
const tables = safeTable(metaTables, '_grist_Tables');
|
|
// select raw tables
|
|
const tableRefs = tables.filterRowIds({ summarySourceTable: 0 });
|
|
for (const tableRef of tableRefs) {
|
|
if (!isTableCensored(tables, tableRef)) { // Omit censored tables
|
|
const data = await doExportTable(activeDocSource, {metaTables, tableRef});
|
|
await handleTable(data);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Builds export data for section that can be used to produce files in various formats (csv, xlsx).
|
|
*/
|
|
export async function exportTable(
|
|
activeDoc: ActiveDoc,
|
|
tableRef: number,
|
|
req: express.Request,
|
|
{metaTables}: {metaTables?: TableDataActionSet} = {},
|
|
): Promise<ExportData> {
|
|
return doExportTable(new ActiveDocSourceDirect(activeDoc, req), {metaTables, tableRef});
|
|
}
|
|
|
|
export async function doExportTable(
|
|
activeDocSource: ActiveDocSource,
|
|
options: {metaTables?: TableDataActionSet, tableRef?: number, tableId?: string},
|
|
) {
|
|
const metaTables = options.metaTables || await getMetaTables(activeDocSource);
|
|
const docData = new DocData((tableId) => { throw new Error("Unexpected DocData fetch"); }, metaTables);
|
|
const tables = safeTable(metaTables, '_grist_Tables');
|
|
const metaColumns = safeTable(metaTables, '_grist_Tables_column');
|
|
|
|
let tableRef: number;
|
|
if (options.tableRef) {
|
|
tableRef = options.tableRef;
|
|
} else {
|
|
if (!options.tableId) { throw new Error('doExportTable: tableRef or tableId must be given'); }
|
|
tableRef = tables.findRow('tableId', options.tableId);
|
|
if (tableRef === 0) {
|
|
throw new ApiError(`Table ${options.tableId} not found.`, 404);
|
|
}
|
|
}
|
|
|
|
checkTableAccess(tables, tableRef);
|
|
const table = safeRecord(tables, tableRef);
|
|
|
|
// Select only columns that belong to this table.
|
|
const tableColumns = metaColumns.filterRecords({parentId: tableRef})
|
|
// sort by parentPos and id, which should be the same order as in raw data
|
|
.sort((c1, c2) => nativeCompare(c1.parentPos, c2.parentPos) || nativeCompare(c1.id, c2.id));
|
|
|
|
// Produce a column description matching what user will see / expect to export
|
|
const columns: ExportColumn[] = tableColumns
|
|
.filter(tc => !gristTypes.isHiddenCol(tc.colId)) // Exclude helpers
|
|
.map<ExportColumn>(tc => {
|
|
// for reference columns, return display column, and copy settings from visible column
|
|
const displayCol = metaColumns.getRecord(tc.displayCol) || tc;
|
|
return {
|
|
id: displayCol.id,
|
|
colId: displayCol.colId,
|
|
label: tc.label,
|
|
type: tc.type,
|
|
formatter: createFullFormatterFromDocData(docData, tc.id),
|
|
parentPos: tc.parentPos,
|
|
description: tc.description,
|
|
};
|
|
});
|
|
|
|
// fetch actual data
|
|
const tableData = await activeDocSource.fetchTable(table.tableId);
|
|
const rowIds = tableData[2];
|
|
const dataByColId = tableData[3];
|
|
// sort rows
|
|
const getters = new ServerColumnGetters(rowIds, dataByColId, columns);
|
|
// create cell accessors
|
|
const access = columns.map(col => getters.getColGetter(col.id)!);
|
|
|
|
let tableName = table.tableId;
|
|
// since tables ids are not very friendly, borrow name from a primary view
|
|
if (table.primaryViewId) {
|
|
const viewId = table.primaryViewId;
|
|
const views = safeTable(metaTables, '_grist_Views');
|
|
const view = safeRecord(views, viewId);
|
|
tableName = view.name;
|
|
}
|
|
|
|
const docInfo = safeRecord(safeTable(metaTables, '_grist_DocInfo'), 1);
|
|
const docSettings = gutil.safeJsonParse(docInfo.documentSettings, {});
|
|
const exportData: ExportData = {
|
|
tableName,
|
|
docName: await activeDocSource.getDocName(),
|
|
rowIds,
|
|
access,
|
|
columns,
|
|
docSettings
|
|
};
|
|
return exportData;
|
|
}
|
|
|
|
/**
|
|
* Builds export data for section that can be used to produce files in various formats (csv, xlsx).
|
|
*/
|
|
export async function exportSection(
|
|
activeDoc: ActiveDoc,
|
|
viewSectionId: number,
|
|
sortSpec: Sort.SortSpec | null,
|
|
filters: Filter[] | null,
|
|
req: express.Request,
|
|
{metaTables}: {metaTables?: TableDataActionSet} = {},
|
|
): Promise<ExportData> {
|
|
return doExportSection(new ActiveDocSourceDirect(activeDoc, req), viewSectionId, sortSpec,
|
|
filters, {metaTables});
|
|
}
|
|
|
|
export async function doExportSection(
|
|
activeDocSource: ActiveDocSource,
|
|
viewSectionId: number,
|
|
sortSpec: Sort.SortSpec | null,
|
|
filters: Filter[] | null,
|
|
{metaTables}: {metaTables?: TableDataActionSet} = {},
|
|
): Promise<ExportData> {
|
|
metaTables = metaTables || await getMetaTables(activeDocSource);
|
|
const docData = new DocData((tableId) => { throw new Error("Unexpected DocData fetch"); }, metaTables);
|
|
const viewSections = safeTable(metaTables, '_grist_Views_section');
|
|
const viewSection = safeRecord(viewSections, viewSectionId);
|
|
safe(viewSection.tableRef, `Cannot find or access table`);
|
|
const tables = safeTable(metaTables, '_grist_Tables');
|
|
checkTableAccess(tables, viewSection.tableRef);
|
|
const table = safeRecord(tables, viewSection.tableRef);
|
|
const metaColumns = safeTable(metaTables, '_grist_Tables_column');
|
|
const columns = metaColumns.filterRecords({parentId: table.id});
|
|
const viewSectionFields = safeTable(metaTables, '_grist_Views_section_field');
|
|
const fields = viewSectionFields.filterRecords({parentId: viewSection.id});
|
|
const savedFilters = safeTable(metaTables, '_grist_Filters')
|
|
.filterRecords({viewSectionRef: viewSection.id});
|
|
|
|
const fieldsByColRef = _.indexBy(fields, 'colRef');
|
|
const savedFiltersByColRef = _.indexBy(savedFilters, 'colRef');
|
|
const unsavedFiltersByColRef = _.indexBy(filters ?? [], 'colRef');
|
|
|
|
// Produce a column description matching what user will see / expect to export
|
|
const viewify = (col: GristTablesColumn, field?: GristViewsSectionField): ExportColumn => {
|
|
const displayCol = metaColumns.getRecord(field?.displayCol || col.displayCol) || col;
|
|
return {
|
|
id: displayCol.id,
|
|
colId: displayCol.colId,
|
|
label: col.label,
|
|
type: col.type,
|
|
formatter: createFullFormatterFromDocData(docData, col.id, field?.id),
|
|
parentPos: col.parentPos,
|
|
description: col.description,
|
|
};
|
|
};
|
|
const buildFilters = (col: GristTablesColumn, field?: GristViewsSectionField) => {
|
|
const filterString = unsavedFiltersByColRef[col.id]?.filter || savedFiltersByColRef[col.id]?.filter;
|
|
const filterFunc = buildColFilter(filterString, col.type);
|
|
return {
|
|
filterFunc,
|
|
id: col.id,
|
|
colId: col.colId,
|
|
type: col.type,
|
|
};
|
|
};
|
|
const columnsForFilters = columns
|
|
.filter(column => !gristTypes.isHiddenCol(column.colId))
|
|
.map(column => buildFilters(column, fieldsByColRef[column.id]));
|
|
const viewColumns: ExportColumn[] = _.sortBy(fields, 'parentPos')
|
|
.map((field) => viewify(metaColumns.getRecord(field.colRef)!, field));
|
|
|
|
// The columns named in sort order need to now become display columns
|
|
sortSpec = sortSpec || gutil.safeJsonParse(viewSection.sortColRefs, []);
|
|
sortSpec = sortSpec!.map((colSpec) => {
|
|
const colRef = Sort.getColRef(colSpec);
|
|
const col = metaColumns.getRecord(colRef);
|
|
if (!col) {
|
|
return 0;
|
|
}
|
|
const effectiveColRef = viewify(col, fieldsByColRef[colRef]).id;
|
|
return Sort.swapColRef(colSpec, effectiveColRef);
|
|
});
|
|
|
|
// fetch actual data
|
|
const tableData = await activeDocSource.fetchTable(table.tableId);
|
|
let rowIds = tableData[2];
|
|
const dataByColId = tableData[3];
|
|
// sort rows
|
|
const getters = new ServerColumnGetters(rowIds, dataByColId, columns);
|
|
const sorter = new SortFunc(getters);
|
|
sorter.updateSpec(sortSpec);
|
|
rowIds.sort((a, b) => sorter.compare(a, b));
|
|
// create cell accessors
|
|
const tableAccess = columnsForFilters.map(col => getters.getColGetter(col.id)!);
|
|
// create row filter based on all columns filter
|
|
const rowFilter = columnsForFilters
|
|
.map((col, c) => buildRowFilter(tableAccess[c], col.filterFunc))
|
|
.reduce((prevFilter, curFilter) => (id) => prevFilter(id) && curFilter(id), () => true);
|
|
// filter rows numbers
|
|
rowIds = rowIds.filter(rowFilter);
|
|
|
|
const docInfo = safeRecord(safeTable(metaTables, '_grist_DocInfo'), 1);
|
|
const docSettings = gutil.safeJsonParse(docInfo.documentSettings, {});
|
|
|
|
const exportData: ExportData = {
|
|
rowIds,
|
|
docSettings,
|
|
tableName: table.tableId,
|
|
docName: await activeDocSource.getDocName(),
|
|
access: viewColumns.map(col => getters.getColGetter(col.id)!),
|
|
columns: viewColumns
|
|
};
|
|
return exportData;
|
|
}
|
|
|
|
type GristViewsSectionField = MetaRowRecord<'_grist_Views_section_field'>
|
|
type GristTablesColumn = MetaRowRecord<'_grist_Tables_column'>
|
|
|
|
// Type for filters passed from the client
|
|
export interface Filter { colRef: number, filter: string }
|