gristlabs_grist-core/app/server/lib/Export.ts

336 lines
12 KiB
TypeScript
Raw Normal View History

import {ApiError} from 'app/common/ApiError';
import {buildColFilter} from 'app/common/ColumnFilterFunc';
import {TableDataAction} from 'app/common/DocActions';
import {DocData} from 'app/common/DocData';
import {DocumentSettings} from 'app/common/DocumentSettings';
import * as gristTypes from 'app/common/gristTypes';
import * as gutil from 'app/common/gutil';
import {nativeCompare} from 'app/common/gutil';
import {isTableCensored} from 'app/common/isHiddenTable';
import {buildRowFilter} from 'app/common/RowFilterFunc';
import {schema, SchemaTypes} from 'app/common/schema';
import {SortFunc} from 'app/common/SortFunc';
import {Sort} from 'app/common/SortSpec';
import {MetaRowRecord, MetaTableData} from 'app/common/TableData';
import {BaseFormatter, createFullFormatterFromDocData} from 'app/common/ValueFormatter';
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
import {RequestWithLogin} from 'app/server/lib/Authorizer';
import {docSessionFromRequest} from 'app/server/lib/DocSession';
import {optIntegerParam, optJsonParam, stringParam} from 'app/server/lib/requestUtils';
import {ServerColumnGetters} from 'app/server/lib/ServerColumnGetters';
import * as express from 'express';
import * as _ from 'underscore';
// Helper type for Cell Accessor
type Access = (row: number) => any;
// Helper interface with information about the column
export interface ExportColumn {
id: number;
colId: string;
label: string;
type: string;
formatter: BaseFormatter;
parentPos: number;
description: string;
}
/**
* Bare data that is exported - used to convert to various formats.
*/
export interface ExportData {
/**
* Table name or table id.
*/
tableName: string;
/**
* Document name.
*/
docName: string;
/**
* Row ids (filtered and sorted).
*/
rowIds: number[];
/**
* Accessor for value in a column.
*/
access: Access[];
/**
* Columns information (primary used for formatting).
*/
columns: ExportColumn[];
/**
* Document settings
*/
docSettings: DocumentSettings;
}
/**
* Export parameters that identifies a section, filters, sort order.
*/
export interface ExportParameters {
tableId: string;
viewSectionId: number | undefined;
sortOrder: number[];
(core) support python3 in grist-core, and running engine via docker and/or gvisor Summary: * Moves essential plugins to grist-core, so that basic imports (e.g. csv) work. * Adds support for a `GRIST_SANDBOX_FLAVOR` flag that can systematically override how the data engine is run. - `GRIST_SANDBOX_FLAVOR=pynbox` is "classic" nacl-based sandbox. - `GRIST_SANDBOX_FLAVOR=docker` runs engines in individual docker containers. It requires an image specified in `sandbox/docker` (alternative images can be named with `GRIST_SANDBOX` flag - need to contain python and engine requirements). It is a simple reference implementation for sandboxing. - `GRIST_SANDBOX_FLAVOR=unsandboxed` runs whatever local version of python is specified by a `GRIST_SANDBOX` flag directly, with no sandboxing. Engine requirements must be installed, so an absolute path to a python executable in a virtualenv is easiest to manage. - `GRIST_SANDBOX_FLAVOR=gvisor` runs the data engine via gvisor's runsc. Experimental, with implementation not included in grist-core. Since gvisor runs on Linux only, this flavor supports wrapping the sandboxes in a single shared docker container. * Tweaks some recent express query parameter code to work in grist-core, which has a slightly different version of express (smoke test doesn't catch this since in Jenkins core is built within a workspace that has node_modules, and wires get crossed - in a dev environment the problem on master can be seen by doing `buildtools/build_core.sh /tmp/any_path_outside_grist`). The new sandbox options do not have tests yet, nor does this they change the behavior of grist servers today. They are there to clean up and consolidate a collection of patches I've been using that were getting cumbersome, and make it easier to run experiments. I haven't looked closely at imports beyond core. Test Plan: tested manually against regular grist and grist-core, including imports Reviewers: alexmojaki, dsagal Reviewed By: alexmojaki Differential Revision: https://phab.getgrist.com/D2942
2021-07-27 23:43:21 +00:00
filters: Filter[];
}
/**
* Options parameters for CSV and XLSX export functions.
*/
export interface DownloadOptions {
filename: string;
tableId: string;
viewSectionId: number | undefined;
filters: Filter[];
sortOrder: number[];
}
interface FilteredMetaTables {
[tableId: string]: TableDataAction;
}
/**
* Gets export parameters from a request.
*/
export function parseExportParameters(req: express.Request): ExportParameters {
const tableId = stringParam(req.query.tableId, 'tableId');
const viewSectionId = optIntegerParam(req.query.viewSection);
(core) support python3 in grist-core, and running engine via docker and/or gvisor Summary: * Moves essential plugins to grist-core, so that basic imports (e.g. csv) work. * Adds support for a `GRIST_SANDBOX_FLAVOR` flag that can systematically override how the data engine is run. - `GRIST_SANDBOX_FLAVOR=pynbox` is "classic" nacl-based sandbox. - `GRIST_SANDBOX_FLAVOR=docker` runs engines in individual docker containers. It requires an image specified in `sandbox/docker` (alternative images can be named with `GRIST_SANDBOX` flag - need to contain python and engine requirements). It is a simple reference implementation for sandboxing. - `GRIST_SANDBOX_FLAVOR=unsandboxed` runs whatever local version of python is specified by a `GRIST_SANDBOX` flag directly, with no sandboxing. Engine requirements must be installed, so an absolute path to a python executable in a virtualenv is easiest to manage. - `GRIST_SANDBOX_FLAVOR=gvisor` runs the data engine via gvisor's runsc. Experimental, with implementation not included in grist-core. Since gvisor runs on Linux only, this flavor supports wrapping the sandboxes in a single shared docker container. * Tweaks some recent express query parameter code to work in grist-core, which has a slightly different version of express (smoke test doesn't catch this since in Jenkins core is built within a workspace that has node_modules, and wires get crossed - in a dev environment the problem on master can be seen by doing `buildtools/build_core.sh /tmp/any_path_outside_grist`). The new sandbox options do not have tests yet, nor does this they change the behavior of grist servers today. They are there to clean up and consolidate a collection of patches I've been using that were getting cumbersome, and make it easier to run experiments. I haven't looked closely at imports beyond core. Test Plan: tested manually against regular grist and grist-core, including imports Reviewers: alexmojaki, dsagal Reviewed By: alexmojaki Differential Revision: https://phab.getgrist.com/D2942
2021-07-27 23:43:21 +00:00
const sortOrder = optJsonParam(req.query.activeSortSpec, []) as number[];
const filters: Filter[] = optJsonParam(req.query.filters, []);
return {
tableId,
viewSectionId,
sortOrder,
filters,
};
}
// Helper for getting filtered metadata tables.
async function getMetaTables(activeDoc: ActiveDoc, req: express.Request): Promise<FilteredMetaTables> {
const docSession = docSessionFromRequest(req as RequestWithLogin);
return safe(await activeDoc.fetchMetaTables(docSession), "No metadata available in active document");
}
// Makes assertion that value does exist or throws an error
function safe<T>(value: T, msg: string) {
if (!value) { throw new ApiError(msg, 404); }
return value as NonNullable<T>;
}
// Helper for getting table from filtered metadata.
function safeTable<TableId extends keyof SchemaTypes>(metaTables: FilteredMetaTables, tableId: TableId) {
const table = safe(metaTables[tableId], `No table '${tableId}' in document`);
const colTypes = safe(schema[tableId], `No table '${tableId}' in document schema`);
return new MetaTableData<TableId>(tableId, table, colTypes);
}
// Helper for getting record safely: it throws if the record is missing.
function safeRecord<TableId extends keyof SchemaTypes>(table: MetaTableData<TableId>, id: number) {
return safe(table.getRecord(id), `No record ${id} in table ${table.tableId}`);
}
// Check that tableRef points to an uncensored table, or throw otherwise.
function checkTableAccess(tables: MetaTableData<"_grist_Tables">, tableRef: number): void {
if (isTableCensored(tables, tableRef)) {
throw new ApiError(`Cannot find or access table`, 404);
}
}
/**
* Builds export for all raw tables that are in doc.
* @param activeDoc Active document
* @param req Request
*/
export async function exportDoc(
activeDoc: ActiveDoc,
req: express.Request) {
const metaTables = await getMetaTables(activeDoc, req);
const tables = safeTable(metaTables, '_grist_Tables');
// select raw tables
const tableRefs = tables.filterRowIds({ summarySourceTable: 0 });
const tableExports = await Promise.all(
tableRefs
.filter(tId => !isTableCensored(tables, tId)) // Omit censored tables
.map(tId => exportTable(activeDoc, tId, req, {metaTables}))
);
return tableExports;
}
/**
* Builds export data for section that can be used to produce files in various formats (csv, xlsx).
*/
export async function exportTable(
activeDoc: ActiveDoc,
tableRef: number,
req: express.Request,
{metaTables}: {metaTables?: FilteredMetaTables} = {},
): Promise<ExportData> {
metaTables = metaTables || await getMetaTables(activeDoc, req);
const docData = new DocData((tableId) => { throw new Error("Unexpected DocData fetch"); }, metaTables);
const tables = safeTable(metaTables, '_grist_Tables');
const metaColumns = safeTable(metaTables, '_grist_Tables_column');
checkTableAccess(tables, tableRef);
const table = safeRecord(tables, tableRef);
// Select only columns that belong to this table.
const tableColumns = metaColumns.filterRecords({parentId: tableRef})
// sort by parentPos and id, which should be the same order as in raw data
.sort((c1, c2) => nativeCompare(c1.parentPos, c2.parentPos) || nativeCompare(c1.id, c2.id));
// Produce a column description matching what user will see / expect to export
const columns: ExportColumn[] = tableColumns
.filter(tc => !gristTypes.isHiddenCol(tc.colId)) // Exclude helpers
.map<ExportColumn>(tc => {
// for reference columns, return display column, and copy settings from visible column
const displayCol = metaColumns.getRecord(tc.displayCol) || tc;
return {
id: displayCol.id,
colId: displayCol.colId,
label: tc.label,
type: tc.type,
formatter: createFullFormatterFromDocData(docData, tc.id),
parentPos: tc.parentPos,
description: tc.description,
};
});
// fetch actual data
const {tableData} = await activeDoc.fetchTable(docSessionFromRequest(req as RequestWithLogin), table.tableId, true);
const rowIds = tableData[2];
const dataByColId = tableData[3];
// sort rows
const getters = new ServerColumnGetters(rowIds, dataByColId, columns);
// create cell accessors
const access = columns.map(col => getters.getColGetter(col.id)!);
let tableName = table.tableId;
// since tables ids are not very friendly, borrow name from a primary view
if (table.primaryViewId) {
const viewId = table.primaryViewId;
const views = safeTable(metaTables, '_grist_Views');
const view = safeRecord(views, viewId);
tableName = view.name;
}
const docInfo = safeRecord(safeTable(metaTables, '_grist_DocInfo'), 1);
const docSettings = gutil.safeJsonParse(docInfo.documentSettings, {});
return {
tableName,
docName: activeDoc.docName,
rowIds,
access,
columns,
docSettings
};
}
/**
* Builds export data for section that can be used to produce files in various formats (csv, xlsx).
*/
export async function exportSection(
activeDoc: ActiveDoc,
viewSectionId: number,
sortSpec: Sort.SortSpec | null,
filters: Filter[] | null,
req: express.Request,
{metaTables}: {metaTables?: FilteredMetaTables} = {},
): Promise<ExportData> {
metaTables = metaTables || await getMetaTables(activeDoc, req);
const docData = new DocData((tableId) => { throw new Error("Unexpected DocData fetch"); }, metaTables);
const viewSections = safeTable(metaTables, '_grist_Views_section');
const viewSection = safeRecord(viewSections, viewSectionId);
safe(viewSection.tableRef, `Cannot find or access table`);
const tables = safeTable(metaTables, '_grist_Tables');
checkTableAccess(tables, viewSection.tableRef);
const table = safeRecord(tables, viewSection.tableRef);
const metaColumns = safeTable(metaTables, '_grist_Tables_column');
const columns = metaColumns.filterRecords({parentId: table.id});
const viewSectionFields = safeTable(metaTables, '_grist_Views_section_field');
const fields = viewSectionFields.filterRecords({parentId: viewSection.id});
const savedFilters = safeTable(metaTables, '_grist_Filters')
.filterRecords({viewSectionRef: viewSection.id});
const fieldsByColRef = _.indexBy(fields, 'colRef');
const savedFiltersByColRef = _.indexBy(savedFilters, 'colRef');
const unsavedFiltersByColRef = _.indexBy(filters ?? [], 'colRef');
// Produce a column description matching what user will see / expect to export
const viewify = (col: GristTablesColumn, field?: GristViewsSectionField): ExportColumn => {
const displayCol = metaColumns.getRecord(field?.displayCol || col.displayCol) || col;
return {
id: displayCol.id,
colId: displayCol.colId,
label: col.label,
type: col.type,
formatter: createFullFormatterFromDocData(docData, col.id, field?.id),
parentPos: col.parentPos,
description: col.description,
};
};
const buildFilters = (col: GristTablesColumn, field?: GristViewsSectionField) => {
const filterString = unsavedFiltersByColRef[col.id]?.filter || savedFiltersByColRef[col.id]?.filter;
const filterFunc = buildColFilter(filterString, col.type);
return {
filterFunc,
id: col.id,
colId: col.colId,
type: col.type,
};
};
const columnsForFilters = columns
.filter(column => !gristTypes.isHiddenCol(column.colId))
.map(column => buildFilters(column, fieldsByColRef[column.id]));
const viewColumns: ExportColumn[] = _.sortBy(fields, 'parentPos')
.map((field) => viewify(metaColumns.getRecord(field.colRef)!, field));
// The columns named in sort order need to now become display columns
sortSpec = sortSpec || gutil.safeJsonParse(viewSection.sortColRefs, []);
sortSpec = sortSpec!.map((colSpec) => {
const colRef = Sort.getColRef(colSpec);
const col = metaColumns.getRecord(colRef);
if (!col) {
return 0;
}
const effectiveColRef = viewify(col, fieldsByColRef[colRef]).id;
return Sort.swapColRef(colSpec, effectiveColRef);
});
// fetch actual data
const {tableData} = await activeDoc.fetchTable(docSessionFromRequest(req as RequestWithLogin), table.tableId, true);
let rowIds = tableData[2];
const dataByColId = tableData[3];
// sort rows
const getters = new ServerColumnGetters(rowIds, dataByColId, columns);
const sorter = new SortFunc(getters);
sorter.updateSpec(sortSpec);
rowIds.sort((a, b) => sorter.compare(a, b));
// create cell accessors
const tableAccess = columnsForFilters.map(col => getters.getColGetter(col.id)!);
// create row filter based on all columns filter
const rowFilter = columnsForFilters
.map((col, c) => buildRowFilter(tableAccess[c], col.filterFunc))
.reduce((prevFilter, curFilter) => (id) => prevFilter(id) && curFilter(id), () => true);
// filter rows numbers
rowIds = rowIds.filter(rowFilter);
const docInfo = safeRecord(safeTable(metaTables, '_grist_DocInfo'), 1);
const docSettings = gutil.safeJsonParse(docInfo.documentSettings, {});
return {
rowIds,
docSettings,
tableName: table.tableId,
docName: activeDoc.docName,
access: viewColumns.map(col => getters.getColGetter(col.id)!),
columns: viewColumns
};
}
type GristViewsSectionField = MetaRowRecord<'_grist_Views_section_field'>
type GristTablesColumn = MetaRowRecord<'_grist_Tables_column'>
// Type for filters passed from the client
export interface Filter { colRef: number, filter: string }