(core) Rearrange ExportXLSX code and fix ExportsAccessRules test that became flaky

Summary:
- Move makeXLSX* methods to workerExporter file to avoid the risk of creating a piscina worker pool from a thread.
- Increase request timeout in ExportsAccessRules test that started failing occasionally

Test Plan: Test should succeed more reliably

Reviewers: jarek

Reviewed By: jarek

Differential Revision: https://phab.getgrist.com/D3910
This commit is contained in:
Dmitry S 2023-06-02 09:30:19 -04:00
parent d191859be7
commit e10067ff78
2 changed files with 177 additions and 167 deletions

View File

@ -5,26 +5,29 @@
* Google Drive export).
* 2. It uses the 'piscina' library to call a makeXLSX* method in a worker thread, registered in
* workerExporter.ts, to export full doc, a table, or a section.
* 3. Each of those methods calls a same-named method that's defined in this file. I.e.
* downloadXLSX() is called in the main thread, but makeXLSX() is called in the worker thread.
* 4. makeXLSX* methods here get data using an ActiveDocSource, which uses Rpc (from grain-rpc
* 3. Each of those methods calls a doMakeXLSX* method defined in that file. I.e. downloadXLSX()
* is called in the main thread, but makeXLSX() and doMakeXLSX() are called in the worker thread.
* 4. doMakeXLSX* methods get data using an ActiveDocSource, which uses Rpc (from grain-rpc
* module) to request data over a message port from the ActiveDoc in the main thread.
* 5. The resulting stream of Excel data is streamed back to the main thread using Rpc too.
*/
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
import {createExcelFormatter} from 'app/server/lib/ExcelFormatter';
import {ActiveDocSource, ActiveDocSourceDirect, DownloadOptions, ExportParameters} from 'app/server/lib/Export';
import {doExportDoc, doExportSection, doExportTable, ExportData, Filter} from 'app/server/lib/Export';
import log from 'app/server/lib/log';
import {Alignment, Border, stream as ExcelWriteStream, Fill} from 'exceljs';
import * as express from 'express';
import contentDisposition from 'content-disposition';
import {Rpc} from 'grain-rpc';
import {AbortController} from 'node-abort-controller';
import {Stream, Writable} from 'stream';
import {Writable} from 'stream';
import {MessageChannel} from 'worker_threads';
import Piscina from 'piscina';
// If this file is imported from within a worker thread, we'll create more thread pools from each
// thread, with a potential for an infinite loop of doom. Better to catch that early.
if (Piscina.isWorkerThread) {
throw new Error("ExportXLSX must not be imported from within a worker thread");
}
// Configure the thread-pool to use for exporting XLSX files.
const exportPool = new Piscina({
filename: __dirname + '/workerExporter.js',
@ -64,13 +67,18 @@ export async function streamXLSX(activeDoc: ActiveDoc, req: express.Request,
rpc.on('message', (chunk) => { outputStream.write(chunk); });
port1.on('message', (m) => rpc.receiveMessage(m));
// When the worker thread is done, it closes the port on its side, and we listen to that to
// end the original request (the incoming HTTP request, in case of a download).
port1.on('close', () => { outputStream.end(); });
// For request cancelling to work, remember that such requests are forwarded via DocApiForwarder.
const abortController = new AbortController();
req.on('close', () => abortController.abort());
const cancelWorker = () => abortController.abort();
// When the worker thread is done, it closes the port on its side, and we listen to that to
// end the original request (the incoming HTTP request, in case of a download).
port1.on('close', () => {
outputStream.end();
req.off('close', cancelWorker);
});
req.on('close', cancelWorker);
const run = (method: string, ...args: any[]) => exportPool.run({port: port2, testDates, args}, {
name: method,
@ -99,153 +107,3 @@ export async function streamXLSX(activeDoc: ActiveDoc, req: express.Request,
}
}
/**
* Returns a XLSX stream of a view section that can be transformed or parsed.
*
* @param {Object} activeDoc - the activeDoc that the table being converted belongs to.
* @param {Integer} viewSectionId - id of the viewsection to export.
* @param {Integer[]} activeSortOrder (optional) - overriding sort order.
* @param {Filter[]} filters (optional) - filters defined from ui.
*/
export async function makeXLSXFromViewSection(
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
viewSectionId: number,
sortOrder: number[],
filters: Filter[],
) {
const data = await doExportSection(activeDocSource, viewSectionId, sortOrder, filters);
const {exportTable, end} = convertToExcel(stream, testDates);
exportTable(data);
await end();
}
/**
* Returns a XLSX stream of a table that can be transformed or parsed.
*
* @param {Object} activeDoc - the activeDoc that the table being converted belongs to.
* @param {Integer} tableId - id of the table to export.
*/
export async function makeXLSXFromTable(
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
tableId: string,
) {
const data = await doExportTable(activeDocSource, {tableId});
const {exportTable, end} = convertToExcel(stream, testDates);
exportTable(data);
await end();
}
/**
* Creates excel document with all tables from an active Grist document.
*/
export async function makeXLSX(
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
): Promise<void> {
const {exportTable, end} = convertToExcel(stream, testDates);
await doExportDoc(activeDocSource, async (table: ExportData) => exportTable(table));
await end();
}
/**
* Converts export data to an excel file.
*/
function convertToExcel(stream: Stream, testDates: boolean): {
exportTable: (table: ExportData) => void,
end: () => Promise<void>,
} {
// Create workbook and add single sheet to it. Using the WorkbookWriter interface avoids
// creating the entire Excel file in memory, which can be very memory-heavy. See
// https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (The options useStyles and
// useSharedStrings replicate more closely what was used previously.)
const wb = new ExcelWriteStream.xlsx.WorkbookWriter({useStyles: true, useSharedStrings: true, stream});
if (testDates) {
// HACK: for testing, we will keep static dates
const date = new Date(Date.UTC(2018, 11, 1, 0, 0, 0));
wb.modified = date;
wb.created = date;
wb.lastPrinted = date;
wb.creator = 'test';
wb.lastModifiedBy = 'test';
}
// Prepare border - some of the cells can have background colors, in that case border will
// not be visible
const borderStyle: Border = {
color: { argb: 'FFE2E2E3' }, // dark gray - default border color for gdrive
style: 'thin'
};
const borders = {
left: borderStyle,
right: borderStyle,
top: borderStyle,
bottom: borderStyle
};
const headerBackground: Fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFEEEEEE' } // gray
};
const headerFontColor = {
color: {
argb: 'FF000000' // black
}
};
const centerAlignment: Partial<Alignment> = {
horizontal: 'center'
};
function exportTable(table: ExportData) {
const { columns, rowIds, access, tableName } = table;
const ws = wb.addWorksheet(sanitizeWorksheetName(tableName));
// Build excel formatters.
const formatters = columns.map(col => createExcelFormatter(col.formatter.type, col.formatter.widgetOpts));
// Generate headers for all columns with correct styles for whole column.
// Actual header style for a first row will be overwritten later.
ws.columns = columns.map((col, c) => ({ header: col.label, style: formatters[c].style() }));
// style up the header row
for (let i = 1; i <= columns.length; i++) {
// apply to all rows (including header)
ws.getColumn(i).border = borders;
// apply only to header
const header = ws.getCell(1, i);
header.fill = headerBackground;
header.font = headerFontColor;
header.alignment = centerAlignment;
}
// Make each column a little wider.
ws.columns.forEach(column => {
if (!column.header) {
return;
}
// 14 points is about 100 pixels in a default font (point is around 7.5 pixels)
column.width = column.header.length < 14 ? 14 : column.header.length;
});
// Populate excel file with data
for (const row of rowIds) {
ws.addRow(access.map((getter, c) => formatters[c].formatAny(getter(row)))).commit();
}
ws.commit();
}
function end() { return wb.commit(); }
return {exportTable, end};
}
/**
* Removes invalid characters, see https://github.com/exceljs/exceljs/pull/1484
*/
export function sanitizeWorksheetName(tableName: string): string {
return tableName
// Convert invalid characters to spaces
.replace(/[*?:/\\[\]]/g, ' ')
// Collapse multiple spaces into one
.replace(/\s+/g, ' ')
// Trim spaces and single quotes from the ends
.replace(/^['\s]+/, '')
.replace(/['\s]+$/, '');
}

View File

@ -1,14 +1,15 @@
import {PassThrough} from 'stream';
import {ActiveDocSource} from 'app/server/lib/Export';
import * as ExportXLSX from 'app/server/lib/ExportXLSX';
import {ActiveDocSource, doExportDoc, doExportSection, doExportTable, ExportData, Filter} from 'app/server/lib/Export';
import {createExcelFormatter} from 'app/server/lib/ExcelFormatter';
import * as log from 'app/server/lib/log';
import {Alignment, Border, stream as ExcelWriteStream, Fill} from 'exceljs';
import {Rpc} from 'grain-rpc';
import {Stream} from 'stream';
import {MessagePort, threadId} from 'worker_threads';
export const makeXLSX = handleExport(ExportXLSX.makeXLSX);
export const makeXLSXFromTable = handleExport(ExportXLSX.makeXLSXFromTable);
export const makeXLSXFromViewSection = handleExport(ExportXLSX.makeXLSXFromViewSection);
export const makeXLSX = handleExport(doMakeXLSX);
export const makeXLSXFromTable = handleExport(doMakeXLSXFromTable);
export const makeXLSXFromViewSection = handleExport(doMakeXLSXFromViewSection);
function handleExport<T extends any[]>(
make: (a: ActiveDocSource, testDates: boolean, output: Stream, ...args: T) => Promise<void>
@ -70,3 +71,154 @@ function bufferedPipe(stream: Stream, callback: (chunk: Buffer) => void, thresho
stream.on('end', flush);
}
/**
* Returns a XLSX stream of a view section that can be transformed or parsed.
*
* @param {Object} activeDoc - the activeDoc that the table being converted belongs to.
* @param {Integer} viewSectionId - id of the viewsection to export.
* @param {Integer[]} activeSortOrder (optional) - overriding sort order.
* @param {Filter[]} filters (optional) - filters defined from ui.
*/
async function doMakeXLSXFromViewSection(
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
viewSectionId: number,
sortOrder: number[],
filters: Filter[],
) {
const data = await doExportSection(activeDocSource, viewSectionId, sortOrder, filters);
const {exportTable, end} = convertToExcel(stream, testDates);
exportTable(data);
await end();
}
/**
* Returns a XLSX stream of a table that can be transformed or parsed.
*
* @param {Object} activeDoc - the activeDoc that the table being converted belongs to.
* @param {Integer} tableId - id of the table to export.
*/
async function doMakeXLSXFromTable(
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
tableId: string,
) {
const data = await doExportTable(activeDocSource, {tableId});
const {exportTable, end} = convertToExcel(stream, testDates);
exportTable(data);
await end();
}
/**
* Creates excel document with all tables from an active Grist document.
*/
async function doMakeXLSX(
activeDocSource: ActiveDocSource,
testDates: boolean,
stream: Stream,
): Promise<void> {
const {exportTable, end} = convertToExcel(stream, testDates);
await doExportDoc(activeDocSource, async (table: ExportData) => exportTable(table));
await end();
}
/**
* Converts export data to an excel file.
*/
function convertToExcel(stream: Stream, testDates: boolean): {
exportTable: (table: ExportData) => void,
end: () => Promise<void>,
} {
// Create workbook and add single sheet to it. Using the WorkbookWriter interface avoids
// creating the entire Excel file in memory, which can be very memory-heavy. See
// https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (The options useStyles and
// useSharedStrings replicate more closely what was used previously.)
const wb = new ExcelWriteStream.xlsx.WorkbookWriter({useStyles: true, useSharedStrings: true, stream});
if (testDates) {
// HACK: for testing, we will keep static dates
const date = new Date(Date.UTC(2018, 11, 1, 0, 0, 0));
wb.modified = date;
wb.created = date;
wb.lastPrinted = date;
wb.creator = 'test';
wb.lastModifiedBy = 'test';
}
// Prepare border - some of the cells can have background colors, in that case border will
// not be visible
const borderStyle: Border = {
color: { argb: 'FFE2E2E3' }, // dark gray - default border color for gdrive
style: 'thin'
};
const borders = {
left: borderStyle,
right: borderStyle,
top: borderStyle,
bottom: borderStyle
};
const headerBackground: Fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFEEEEEE' } // gray
};
const headerFontColor = {
color: {
argb: 'FF000000' // black
}
};
const centerAlignment: Partial<Alignment> = {
horizontal: 'center'
};
function exportTable(table: ExportData) {
const { columns, rowIds, access, tableName } = table;
const ws = wb.addWorksheet(sanitizeWorksheetName(tableName));
// Build excel formatters.
const formatters = columns.map(col => createExcelFormatter(col.formatter.type, col.formatter.widgetOpts));
// Generate headers for all columns with correct styles for whole column.
// Actual header style for a first row will be overwritten later.
ws.columns = columns.map((col, c) => ({ header: col.label, style: formatters[c].style() }));
// style up the header row
for (let i = 1; i <= columns.length; i++) {
// apply to all rows (including header)
ws.getColumn(i).border = borders;
// apply only to header
const header = ws.getCell(1, i);
header.fill = headerBackground;
header.font = headerFontColor;
header.alignment = centerAlignment;
}
// Make each column a little wider.
ws.columns.forEach(column => {
if (!column.header) {
return;
}
// 14 points is about 100 pixels in a default font (point is around 7.5 pixels)
column.width = column.header.length < 14 ? 14 : column.header.length;
});
// Populate excel file with data
for (const row of rowIds) {
ws.addRow(access.map((getter, c) => formatters[c].formatAny(getter(row)))).commit();
}
ws.commit();
}
function end() { return wb.commit(); }
return {exportTable, end};
}
/**
* Removes invalid characters, see https://github.com/exceljs/exceljs/pull/1484
*/
export function sanitizeWorksheetName(tableName: string): string {
return tableName
// Convert invalid characters to spaces
.replace(/[*?:/\\[\]]/g, ' ')
// Collapse multiple spaces into one
.replace(/\s+/g, ' ')
// Trim spaces and single quotes from the ends
.replace(/^['\s]+/, '')
.replace(/['\s]+$/, '');
}