From e10067ff78a605c19523f794ffd15374be26df14 Mon Sep 17 00:00:00 2001 From: Dmitry S Date: Fri, 2 Jun 2023 09:30:19 -0400 Subject: [PATCH] (core) Rearrange ExportXLSX code and fix ExportsAccessRules test that became flaky Summary: - Move makeXLSX* methods to workerExporter file to avoid the risk of creating a piscina worker pool from a thread. - Increase request timeout in ExportsAccessRules test that started failing occasionally Test Plan: Test should succeed more reliably Reviewers: jarek Reviewed By: jarek Differential Revision: https://phab.getgrist.com/D3910 --- app/server/lib/ExportXLSX.ts | 182 ++++--------------------------- app/server/lib/workerExporter.ts | 162 ++++++++++++++++++++++++++- 2 files changed, 177 insertions(+), 167 deletions(-) diff --git a/app/server/lib/ExportXLSX.ts b/app/server/lib/ExportXLSX.ts index 2159d199..1beb2dc3 100644 --- a/app/server/lib/ExportXLSX.ts +++ b/app/server/lib/ExportXLSX.ts @@ -5,26 +5,29 @@ * Google Drive export). * 2. It uses the 'piscina' library to call a makeXLSX* method in a worker thread, registered in * workerExporter.ts, to export full doc, a table, or a section. - * 3. Each of those methods calls a same-named method that's defined in this file. I.e. - * downloadXLSX() is called in the main thread, but makeXLSX() is called in the worker thread. - * 4. makeXLSX* methods here get data using an ActiveDocSource, which uses Rpc (from grain-rpc + * 3. Each of those methods calls a doMakeXLSX* method defined in that file. I.e. downloadXLSX() + * is called in the main thread, but makeXLSX() and doMakeXLSX() are called in the worker thread. + * 4. doMakeXLSX* methods get data using an ActiveDocSource, which uses Rpc (from grain-rpc * module) to request data over a message port from the ActiveDoc in the main thread. * 5. The resulting stream of Excel data is streamed back to the main thread using Rpc too. */ import {ActiveDoc} from 'app/server/lib/ActiveDoc'; -import {createExcelFormatter} from 'app/server/lib/ExcelFormatter'; import {ActiveDocSource, ActiveDocSourceDirect, DownloadOptions, ExportParameters} from 'app/server/lib/Export'; -import {doExportDoc, doExportSection, doExportTable, ExportData, Filter} from 'app/server/lib/Export'; import log from 'app/server/lib/log'; -import {Alignment, Border, stream as ExcelWriteStream, Fill} from 'exceljs'; import * as express from 'express'; import contentDisposition from 'content-disposition'; import {Rpc} from 'grain-rpc'; import {AbortController} from 'node-abort-controller'; -import {Stream, Writable} from 'stream'; +import {Writable} from 'stream'; import {MessageChannel} from 'worker_threads'; import Piscina from 'piscina'; +// If this file is imported from within a worker thread, we'll create more thread pools from each +// thread, with a potential for an infinite loop of doom. Better to catch that early. +if (Piscina.isWorkerThread) { + throw new Error("ExportXLSX must not be imported from within a worker thread"); +} + // Configure the thread-pool to use for exporting XLSX files. const exportPool = new Piscina({ filename: __dirname + '/workerExporter.js', @@ -64,13 +67,18 @@ export async function streamXLSX(activeDoc: ActiveDoc, req: express.Request, rpc.on('message', (chunk) => { outputStream.write(chunk); }); port1.on('message', (m) => rpc.receiveMessage(m)); - // When the worker thread is done, it closes the port on its side, and we listen to that to - // end the original request (the incoming HTTP request, in case of a download). - port1.on('close', () => { outputStream.end(); }); - // For request cancelling to work, remember that such requests are forwarded via DocApiForwarder. const abortController = new AbortController(); - req.on('close', () => abortController.abort()); + const cancelWorker = () => abortController.abort(); + + // When the worker thread is done, it closes the port on its side, and we listen to that to + // end the original request (the incoming HTTP request, in case of a download). + port1.on('close', () => { + outputStream.end(); + req.off('close', cancelWorker); + }); + + req.on('close', cancelWorker); const run = (method: string, ...args: any[]) => exportPool.run({port: port2, testDates, args}, { name: method, @@ -99,153 +107,3 @@ export async function streamXLSX(activeDoc: ActiveDoc, req: express.Request, } } -/** - * Returns a XLSX stream of a view section that can be transformed or parsed. - * - * @param {Object} activeDoc - the activeDoc that the table being converted belongs to. - * @param {Integer} viewSectionId - id of the viewsection to export. - * @param {Integer[]} activeSortOrder (optional) - overriding sort order. - * @param {Filter[]} filters (optional) - filters defined from ui. - */ -export async function makeXLSXFromViewSection( - activeDocSource: ActiveDocSource, - testDates: boolean, - stream: Stream, - viewSectionId: number, - sortOrder: number[], - filters: Filter[], -) { - const data = await doExportSection(activeDocSource, viewSectionId, sortOrder, filters); - const {exportTable, end} = convertToExcel(stream, testDates); - exportTable(data); - await end(); -} - -/** - * Returns a XLSX stream of a table that can be transformed or parsed. - * - * @param {Object} activeDoc - the activeDoc that the table being converted belongs to. - * @param {Integer} tableId - id of the table to export. - */ -export async function makeXLSXFromTable( - activeDocSource: ActiveDocSource, - testDates: boolean, - stream: Stream, - tableId: string, -) { - const data = await doExportTable(activeDocSource, {tableId}); - const {exportTable, end} = convertToExcel(stream, testDates); - exportTable(data); - await end(); -} - -/** - * Creates excel document with all tables from an active Grist document. - */ -export async function makeXLSX( - activeDocSource: ActiveDocSource, - testDates: boolean, - stream: Stream, -): Promise { - const {exportTable, end} = convertToExcel(stream, testDates); - await doExportDoc(activeDocSource, async (table: ExportData) => exportTable(table)); - await end(); -} - -/** - * Converts export data to an excel file. - */ -function convertToExcel(stream: Stream, testDates: boolean): { - exportTable: (table: ExportData) => void, - end: () => Promise, -} { - // Create workbook and add single sheet to it. Using the WorkbookWriter interface avoids - // creating the entire Excel file in memory, which can be very memory-heavy. See - // https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (The options useStyles and - // useSharedStrings replicate more closely what was used previously.) - const wb = new ExcelWriteStream.xlsx.WorkbookWriter({useStyles: true, useSharedStrings: true, stream}); - if (testDates) { - // HACK: for testing, we will keep static dates - const date = new Date(Date.UTC(2018, 11, 1, 0, 0, 0)); - wb.modified = date; - wb.created = date; - wb.lastPrinted = date; - wb.creator = 'test'; - wb.lastModifiedBy = 'test'; - } - // Prepare border - some of the cells can have background colors, in that case border will - // not be visible - const borderStyle: Border = { - color: { argb: 'FFE2E2E3' }, // dark gray - default border color for gdrive - style: 'thin' - }; - const borders = { - left: borderStyle, - right: borderStyle, - top: borderStyle, - bottom: borderStyle - }; - const headerBackground: Fill = { - type: 'pattern', - pattern: 'solid', - fgColor: { argb: 'FFEEEEEE' } // gray - }; - const headerFontColor = { - color: { - argb: 'FF000000' // black - } - }; - const centerAlignment: Partial = { - horizontal: 'center' - }; - function exportTable(table: ExportData) { - const { columns, rowIds, access, tableName } = table; - const ws = wb.addWorksheet(sanitizeWorksheetName(tableName)); - // Build excel formatters. - const formatters = columns.map(col => createExcelFormatter(col.formatter.type, col.formatter.widgetOpts)); - // Generate headers for all columns with correct styles for whole column. - // Actual header style for a first row will be overwritten later. - ws.columns = columns.map((col, c) => ({ header: col.label, style: formatters[c].style() })); - // style up the header row - for (let i = 1; i <= columns.length; i++) { - // apply to all rows (including header) - ws.getColumn(i).border = borders; - // apply only to header - const header = ws.getCell(1, i); - header.fill = headerBackground; - header.font = headerFontColor; - header.alignment = centerAlignment; - } - // Make each column a little wider. - ws.columns.forEach(column => { - if (!column.header) { - return; - } - // 14 points is about 100 pixels in a default font (point is around 7.5 pixels) - column.width = column.header.length < 14 ? 14 : column.header.length; - }); - // Populate excel file with data - for (const row of rowIds) { - ws.addRow(access.map((getter, c) => formatters[c].formatAny(getter(row)))).commit(); - } - ws.commit(); - } - function end() { return wb.commit(); } - return {exportTable, end}; -} - -/** - * Removes invalid characters, see https://github.com/exceljs/exceljs/pull/1484 - */ -export function sanitizeWorksheetName(tableName: string): string { - return tableName - // Convert invalid characters to spaces - .replace(/[*?:/\\[\]]/g, ' ') - - // Collapse multiple spaces into one - .replace(/\s+/g, ' ') - - // Trim spaces and single quotes from the ends - .replace(/^['\s]+/, '') - .replace(/['\s]+$/, ''); -} diff --git a/app/server/lib/workerExporter.ts b/app/server/lib/workerExporter.ts index ac8dfac1..004050b0 100644 --- a/app/server/lib/workerExporter.ts +++ b/app/server/lib/workerExporter.ts @@ -1,14 +1,15 @@ import {PassThrough} from 'stream'; -import {ActiveDocSource} from 'app/server/lib/Export'; -import * as ExportXLSX from 'app/server/lib/ExportXLSX'; +import {ActiveDocSource, doExportDoc, doExportSection, doExportTable, ExportData, Filter} from 'app/server/lib/Export'; +import {createExcelFormatter} from 'app/server/lib/ExcelFormatter'; import * as log from 'app/server/lib/log'; +import {Alignment, Border, stream as ExcelWriteStream, Fill} from 'exceljs'; import {Rpc} from 'grain-rpc'; import {Stream} from 'stream'; import {MessagePort, threadId} from 'worker_threads'; -export const makeXLSX = handleExport(ExportXLSX.makeXLSX); -export const makeXLSXFromTable = handleExport(ExportXLSX.makeXLSXFromTable); -export const makeXLSXFromViewSection = handleExport(ExportXLSX.makeXLSXFromViewSection); +export const makeXLSX = handleExport(doMakeXLSX); +export const makeXLSXFromTable = handleExport(doMakeXLSXFromTable); +export const makeXLSXFromViewSection = handleExport(doMakeXLSXFromViewSection); function handleExport( make: (a: ActiveDocSource, testDates: boolean, output: Stream, ...args: T) => Promise @@ -70,3 +71,154 @@ function bufferedPipe(stream: Stream, callback: (chunk: Buffer) => void, thresho stream.on('end', flush); } + +/** + * Returns a XLSX stream of a view section that can be transformed or parsed. + * + * @param {Object} activeDoc - the activeDoc that the table being converted belongs to. + * @param {Integer} viewSectionId - id of the viewsection to export. + * @param {Integer[]} activeSortOrder (optional) - overriding sort order. + * @param {Filter[]} filters (optional) - filters defined from ui. + */ +async function doMakeXLSXFromViewSection( + activeDocSource: ActiveDocSource, + testDates: boolean, + stream: Stream, + viewSectionId: number, + sortOrder: number[], + filters: Filter[], +) { + const data = await doExportSection(activeDocSource, viewSectionId, sortOrder, filters); + const {exportTable, end} = convertToExcel(stream, testDates); + exportTable(data); + await end(); +} + +/** + * Returns a XLSX stream of a table that can be transformed or parsed. + * + * @param {Object} activeDoc - the activeDoc that the table being converted belongs to. + * @param {Integer} tableId - id of the table to export. + */ +async function doMakeXLSXFromTable( + activeDocSource: ActiveDocSource, + testDates: boolean, + stream: Stream, + tableId: string, +) { + const data = await doExportTable(activeDocSource, {tableId}); + const {exportTable, end} = convertToExcel(stream, testDates); + exportTable(data); + await end(); +} + +/** + * Creates excel document with all tables from an active Grist document. + */ +async function doMakeXLSX( + activeDocSource: ActiveDocSource, + testDates: boolean, + stream: Stream, +): Promise { + const {exportTable, end} = convertToExcel(stream, testDates); + await doExportDoc(activeDocSource, async (table: ExportData) => exportTable(table)); + await end(); +} + +/** + * Converts export data to an excel file. + */ +function convertToExcel(stream: Stream, testDates: boolean): { + exportTable: (table: ExportData) => void, + end: () => Promise, +} { + // Create workbook and add single sheet to it. Using the WorkbookWriter interface avoids + // creating the entire Excel file in memory, which can be very memory-heavy. See + // https://github.com/exceljs/exceljs#streaming-xlsx-writercontents. (The options useStyles and + // useSharedStrings replicate more closely what was used previously.) + const wb = new ExcelWriteStream.xlsx.WorkbookWriter({useStyles: true, useSharedStrings: true, stream}); + if (testDates) { + // HACK: for testing, we will keep static dates + const date = new Date(Date.UTC(2018, 11, 1, 0, 0, 0)); + wb.modified = date; + wb.created = date; + wb.lastPrinted = date; + wb.creator = 'test'; + wb.lastModifiedBy = 'test'; + } + // Prepare border - some of the cells can have background colors, in that case border will + // not be visible + const borderStyle: Border = { + color: { argb: 'FFE2E2E3' }, // dark gray - default border color for gdrive + style: 'thin' + }; + const borders = { + left: borderStyle, + right: borderStyle, + top: borderStyle, + bottom: borderStyle + }; + const headerBackground: Fill = { + type: 'pattern', + pattern: 'solid', + fgColor: { argb: 'FFEEEEEE' } // gray + }; + const headerFontColor = { + color: { + argb: 'FF000000' // black + } + }; + const centerAlignment: Partial = { + horizontal: 'center' + }; + function exportTable(table: ExportData) { + const { columns, rowIds, access, tableName } = table; + const ws = wb.addWorksheet(sanitizeWorksheetName(tableName)); + // Build excel formatters. + const formatters = columns.map(col => createExcelFormatter(col.formatter.type, col.formatter.widgetOpts)); + // Generate headers for all columns with correct styles for whole column. + // Actual header style for a first row will be overwritten later. + ws.columns = columns.map((col, c) => ({ header: col.label, style: formatters[c].style() })); + // style up the header row + for (let i = 1; i <= columns.length; i++) { + // apply to all rows (including header) + ws.getColumn(i).border = borders; + // apply only to header + const header = ws.getCell(1, i); + header.fill = headerBackground; + header.font = headerFontColor; + header.alignment = centerAlignment; + } + // Make each column a little wider. + ws.columns.forEach(column => { + if (!column.header) { + return; + } + // 14 points is about 100 pixels in a default font (point is around 7.5 pixels) + column.width = column.header.length < 14 ? 14 : column.header.length; + }); + // Populate excel file with data + for (const row of rowIds) { + ws.addRow(access.map((getter, c) => formatters[c].formatAny(getter(row)))).commit(); + } + ws.commit(); + } + function end() { return wb.commit(); } + return {exportTable, end}; +} + +/** + * Removes invalid characters, see https://github.com/exceljs/exceljs/pull/1484 + */ +export function sanitizeWorksheetName(tableName: string): string { + return tableName + // Convert invalid characters to spaces + .replace(/[*?:/\\[\]]/g, ' ') + + // Collapse multiple spaces into one + .replace(/\s+/g, ' ') + + // Trim spaces and single quotes from the ends + .replace(/^['\s]+/, '') + .replace(/['\s]+$/, ''); +}