2020-07-21 13:20:51 +00:00
|
|
|
/* Helper file to separate ActiveDoc import functions and convert them to TypeScript. */
|
|
|
|
|
|
|
|
import * as path from 'path';
|
|
|
|
import * as _ from 'underscore';
|
|
|
|
|
2021-10-08 06:32:59 +00:00
|
|
|
import {ColumnDelta, createEmptyActionSummary} from 'app/common/ActionSummary';
|
2021-10-04 16:14:14 +00:00
|
|
|
import {ApplyUAResult, DataSourceTransformed, ImportOptions, ImportResult, ImportTableResult,
|
|
|
|
MergeOptions, MergeOptionsMap, MergeStrategy, TransformColumn, TransformRule,
|
2021-09-15 06:12:34 +00:00
|
|
|
TransformRuleMap} from 'app/common/ActiveDocAPI';
|
2020-07-21 13:20:51 +00:00
|
|
|
import {ApiError} from 'app/common/ApiError';
|
2021-10-04 16:14:14 +00:00
|
|
|
import {BulkColValues, CellValue, fromTableDataAction, TableRecordValue} from 'app/common/DocActions';
|
2020-07-21 13:20:51 +00:00
|
|
|
import * as gutil from 'app/common/gutil';
|
2021-10-08 06:32:59 +00:00
|
|
|
import {DocStateComparison} from 'app/common/UserAPI';
|
2020-07-21 13:20:51 +00:00
|
|
|
import {ParseFileResult, ParseOptions} from 'app/plugin/FileParserAPI';
|
|
|
|
import {GristTable} from 'app/plugin/GristTable';
|
|
|
|
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
|
|
|
|
import {DocSession, OptDocSession} from 'app/server/lib/DocSession';
|
|
|
|
import * as log from 'app/server/lib/log';
|
|
|
|
import {globalUploadSet, moveUpload, UploadInfo} from 'app/server/lib/uploads';
|
2021-10-04 16:14:14 +00:00
|
|
|
import {buildComparisonQuery} from 'app/server/lib/ExpandedQuery';
|
2021-11-09 20:03:12 +00:00
|
|
|
import flatten = require('lodash/flatten');
|
2020-07-21 13:20:51 +00:00
|
|
|
|
2021-10-04 16:14:14 +00:00
|
|
|
const IMPORT_TRANSFORM_COLUMN_PREFIX = 'gristHelper_Import_';
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* AddTableRetValue contains return value of user actions 'AddTable'
|
|
|
|
*/
|
|
|
|
interface AddTableRetValue {
|
|
|
|
table_id: string;
|
|
|
|
id: number;
|
|
|
|
columns: string[];
|
|
|
|
views: object[];
|
|
|
|
}
|
|
|
|
|
|
|
|
interface ReferenceDescription {
|
|
|
|
// the table index
|
|
|
|
tableIndex: number;
|
|
|
|
// the column index
|
|
|
|
colIndex: number;
|
|
|
|
// the id of the table which is referenced
|
|
|
|
refTableId: string;
|
|
|
|
}
|
|
|
|
|
2021-09-15 06:12:34 +00:00
|
|
|
interface FileImportOptions {
|
|
|
|
// Suggested name of the import file. It is sometimes used as a suggested table name, e.g. for csv imports.
|
|
|
|
originalFilename: string;
|
|
|
|
// Containing parseOptions as serialized JSON to pass to the import plugin.
|
|
|
|
parseOptions: ParseOptions;
|
2021-10-04 16:14:14 +00:00
|
|
|
// Map of table names to their merge options.
|
|
|
|
mergeOptionsMap: MergeOptionsMap;
|
2021-09-15 06:12:34 +00:00
|
|
|
// Flag to indicate whether table is temporary and hidden or regular.
|
|
|
|
isHidden: boolean;
|
|
|
|
// Index of original dataSource corresponding to current imported file.
|
|
|
|
uploadFileIndex: number;
|
|
|
|
// Map of table names to their transform rules.
|
|
|
|
transformRuleMap: TransformRuleMap;
|
|
|
|
}
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
export class ActiveDocImport {
|
|
|
|
constructor(private _activeDoc: ActiveDoc) {}
|
|
|
|
/**
|
|
|
|
* Imports files, removes previously created temporary hidden tables and creates the new ones
|
|
|
|
*/
|
|
|
|
public async importFiles(docSession: DocSession, dataSource: DataSourceTransformed,
|
|
|
|
parseOptions: ParseOptions, prevTableIds: string[]): Promise<ImportResult> {
|
|
|
|
this._activeDoc.startBundleUserActions(docSession);
|
|
|
|
await this._removeHiddenTables(docSession, prevTableIds);
|
|
|
|
const userId = docSession.authorizer.getUserId();
|
|
|
|
const accessId = this._activeDoc.makeAccessId(userId);
|
|
|
|
const uploadInfo: UploadInfo = globalUploadSet.getUploadInfo(dataSource.uploadId, accessId);
|
2021-09-15 06:12:34 +00:00
|
|
|
return this._importFiles(docSession, uploadInfo, dataSource.transforms, {parseOptions}, true);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Finishes import files, removes temporary hidden tables, temporary uploaded files and creates
|
|
|
|
* the new tables
|
|
|
|
*/
|
|
|
|
public async finishImportFiles(docSession: DocSession, dataSource: DataSourceTransformed,
|
2021-09-15 06:12:34 +00:00
|
|
|
prevTableIds: string[], importOptions: ImportOptions): Promise<ImportResult> {
|
2020-07-21 13:20:51 +00:00
|
|
|
this._activeDoc.startBundleUserActions(docSession);
|
|
|
|
try {
|
|
|
|
await this._removeHiddenTables(docSession, prevTableIds);
|
|
|
|
const userId = docSession.authorizer.getUserId();
|
|
|
|
const accessId = this._activeDoc.makeAccessId(userId);
|
|
|
|
const uploadInfo: UploadInfo = globalUploadSet.getUploadInfo(dataSource.uploadId, accessId);
|
|
|
|
const importResult = await this._importFiles(docSession, uploadInfo, dataSource.transforms,
|
2021-09-15 06:12:34 +00:00
|
|
|
importOptions, false);
|
2020-07-21 13:20:51 +00:00
|
|
|
await globalUploadSet.cleanup(dataSource.uploadId);
|
|
|
|
return importResult;
|
|
|
|
} finally {
|
|
|
|
this._activeDoc.stopBundleUserActions(docSession);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Cancels import files, removes temporary hidden tables and temporary uploaded files
|
|
|
|
*
|
|
|
|
* @param {ActiveDoc} activeDoc: Instance of ActiveDoc.
|
|
|
|
* @param {DataSourceTransformed} dataSource: an array of DataSource
|
|
|
|
* @param {Array<String>} prevTableIds: Array of tableIds as received from previous `importFiles`
|
|
|
|
* call when re-importing with changed `parseOptions`.
|
|
|
|
* @returns {Promise} Promise that's resolved when all actions are applied successfully.
|
|
|
|
*/
|
|
|
|
public async cancelImportFiles(docSession: DocSession,
|
|
|
|
dataSource: DataSourceTransformed,
|
|
|
|
prevTableIds: string[]): Promise<void> {
|
|
|
|
await this._removeHiddenTables(docSession, prevTableIds);
|
|
|
|
this._activeDoc.stopBundleUserActions(docSession);
|
|
|
|
await globalUploadSet.cleanup(dataSource.uploadId);
|
|
|
|
}
|
|
|
|
|
2021-10-08 06:32:59 +00:00
|
|
|
/**
|
|
|
|
* Returns a diff of changes that will be applied to the destination table from `transformRule`
|
|
|
|
* if the data from `hiddenTableId` is imported with the specified `mergeOptions`.
|
|
|
|
*
|
|
|
|
* The diff is returned as a `DocStateComparison` of the same doc, with the `rightChanges`
|
|
|
|
* containing the updated cell values. Old values are pulled from the destination record (if
|
|
|
|
* a match was found), and new values are the result of merging in the new cell values with
|
|
|
|
* the merge strategy from `mergeOptions`.
|
|
|
|
*
|
|
|
|
* No distinction is currently made for added records vs. updated existing records; instead,
|
|
|
|
* we treat added records as an updated record in `hiddenTableId` where all the column
|
|
|
|
* values changed from blank to the original column values from `hiddenTableId`.
|
|
|
|
*
|
|
|
|
* @param {string} hiddenTableId Source table.
|
|
|
|
* @param {TransformRule} transformRule Transform rule for the original source columns.
|
|
|
|
* The destination table id is populated in the rule.
|
|
|
|
* @param {MergeOptions} mergeOptions Merge options for how to match source rows
|
|
|
|
* with destination records, and how to merge their column values.
|
|
|
|
* @returns {Promise<DocStateComparison>} Comparison data for the changes that will occur if
|
|
|
|
* `hiddenTableId` is merged into the destination table from `transformRule`.
|
|
|
|
*/
|
|
|
|
public async generateImportDiff(hiddenTableId: string, {destCols, destTableId}: TransformRule,
|
|
|
|
{mergeCols, mergeStrategy}: MergeOptions): Promise<DocStateComparison> {
|
2021-11-09 20:03:12 +00:00
|
|
|
// Merge column ids from client have prefixes that need to be stripped.
|
|
|
|
mergeCols = stripPrefixes(mergeCols);
|
|
|
|
|
2021-10-08 06:32:59 +00:00
|
|
|
// Get column differences between `hiddenTableId` and `destTableId` for rows that exist in both tables.
|
2021-11-09 20:03:12 +00:00
|
|
|
const srcAndDestColIds: [string, string[]][] =
|
|
|
|
destCols.map(c => [c.colId!, [c.colId!.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length)]]);
|
|
|
|
const srcToDestColIds = new Map(srcAndDestColIds);
|
|
|
|
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId!, srcToDestColIds, mergeCols);
|
2021-10-08 06:32:59 +00:00
|
|
|
|
|
|
|
// Initialize container for updated column values in the expected format (ColumnDelta).
|
|
|
|
const updatedRecords: {[colId: string]: ColumnDelta} = {};
|
|
|
|
const updatedRecordIds: number[] = [];
|
2021-11-09 20:03:12 +00:00
|
|
|
const srcColIds = srcAndDestColIds.map(([srcColId, _destColId]) => srcColId);
|
2021-10-08 06:32:59 +00:00
|
|
|
for (const id of srcColIds) {
|
|
|
|
updatedRecords[id] = {};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Retrieve the function used to reconcile differences between source and destination.
|
|
|
|
const merge = getMergeFunction(mergeStrategy);
|
|
|
|
|
|
|
|
const numResultRows = comparisonResult[hiddenTableId + '.id'].length;
|
|
|
|
for (let i = 0; i < numResultRows; i++) {
|
|
|
|
const srcRowId = comparisonResult[hiddenTableId + '.id'][i] as number;
|
|
|
|
|
|
|
|
if (comparisonResult[destTableId + '.id'][i] === null) {
|
|
|
|
// No match in destination table found for source row, so it must be a new record.
|
|
|
|
for (const srcColId of srcColIds) {
|
|
|
|
updatedRecords[srcColId][srcRowId] = [[''], [(comparisonResult[`${hiddenTableId}.${srcColId}`][i])]];
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Otherwise, a match was found between source and destination tables.
|
|
|
|
for (const srcColId of srcColIds) {
|
2021-11-09 20:03:12 +00:00
|
|
|
const matchingDestColId = srcToDestColIds.get(srcColId)![0];
|
2021-10-08 06:32:59 +00:00
|
|
|
const srcVal = comparisonResult[`${hiddenTableId}.${srcColId}`][i];
|
|
|
|
const destVal = comparisonResult[`${destTableId}.${matchingDestColId}`][i];
|
|
|
|
|
|
|
|
// Exclude unchanged cell values from the comparison.
|
|
|
|
if (srcVal === destVal) { continue; }
|
|
|
|
|
|
|
|
updatedRecords[srcColId][srcRowId] = [[destVal], [merge(srcVal, destVal)]];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
updatedRecordIds.push(srcRowId);
|
|
|
|
}
|
|
|
|
|
|
|
|
return {
|
|
|
|
left: {n: 0, h: ''}, // NOTE: left, right, parent, and summary are not used by Importer.
|
|
|
|
right: {n: 0, h: ''},
|
|
|
|
parent: null,
|
|
|
|
summary: 'right',
|
|
|
|
details: {
|
|
|
|
leftChanges: createEmptyActionSummary(),
|
|
|
|
rightChanges: {
|
|
|
|
tableRenames: [],
|
|
|
|
tableDeltas: {
|
|
|
|
[hiddenTableId]: {
|
|
|
|
removeRows: [],
|
|
|
|
updateRows: updatedRecordIds,
|
|
|
|
addRows: [], // Since deltas are relative to the source table, we can't (yet) use this.
|
|
|
|
columnRenames: [],
|
|
|
|
columnDeltas: updatedRecords,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
/**
|
|
|
|
* Import the given upload as new tables in one step. This does not give the user a chance to
|
|
|
|
* modify parse options or transforms. The caller is responsible for cleaning up the upload.
|
|
|
|
*/
|
|
|
|
public async oneStepImport(docSession: OptDocSession, uploadInfo: UploadInfo): Promise<ImportResult> {
|
|
|
|
this._activeDoc.startBundleUserActions(docSession);
|
|
|
|
try {
|
|
|
|
return this._importFiles(docSession, uploadInfo, [], {}, false);
|
|
|
|
} finally {
|
|
|
|
this._activeDoc.stopBundleUserActions(docSession);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-09-15 06:12:34 +00:00
|
|
|
* Imports all files as new tables, using the given transform rules and import options.
|
2020-07-21 13:20:51 +00:00
|
|
|
* The isHidden flag indicates whether to create temporary hidden tables, or final ones.
|
|
|
|
*/
|
|
|
|
private async _importFiles(docSession: OptDocSession, upload: UploadInfo, transforms: TransformRuleMap[],
|
2021-10-04 16:14:14 +00:00
|
|
|
{parseOptions = {}, mergeOptionMaps = []}: ImportOptions,
|
2021-09-15 06:12:34 +00:00
|
|
|
isHidden: boolean): Promise<ImportResult> {
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
// Check that upload size is within the configured limits.
|
|
|
|
const limit = (Number(process.env.GRIST_MAX_UPLOAD_IMPORT_MB) * 1024 * 1024) || Infinity;
|
|
|
|
const totalSize = upload.files.reduce((acc, f) => acc + f.size, 0);
|
|
|
|
if (totalSize > limit) {
|
|
|
|
throw new ApiError(`Imported files must not exceed ${gutil.byteString(limit)}`, 413);
|
|
|
|
}
|
|
|
|
|
|
|
|
// The upload must be within the plugin-accessible directory. Once moved, subsequent calls to
|
|
|
|
// moveUpload() will return without having to do anything.
|
|
|
|
await moveUpload(upload, this._activeDoc.docPluginManager.tmpDir());
|
|
|
|
|
|
|
|
const importResult: ImportResult = {options: parseOptions, tables: []};
|
|
|
|
for (const [index, file] of upload.files.entries()) {
|
|
|
|
// If we have a better guess for the file's extension, replace it in origName, to ensure
|
|
|
|
// that DocPluginManager has access to it to guess the best parser type.
|
|
|
|
let origName: string = file.origName;
|
|
|
|
if (file.ext) {
|
|
|
|
origName = path.basename(origName, path.extname(origName)) + file.ext;
|
|
|
|
}
|
2021-09-15 06:12:34 +00:00
|
|
|
const res = await this._importFileAsNewTable(docSession, file.absPath, {
|
|
|
|
parseOptions,
|
2021-10-04 16:14:14 +00:00
|
|
|
mergeOptionsMap: mergeOptionMaps[index] || {},
|
2021-09-15 06:12:34 +00:00
|
|
|
isHidden,
|
|
|
|
originalFilename: origName,
|
|
|
|
uploadFileIndex: index,
|
|
|
|
transformRuleMap: transforms[index] || {}
|
|
|
|
});
|
2020-07-21 13:20:51 +00:00
|
|
|
if (index === 0) {
|
|
|
|
// Returned parse options from the first file should be used for all files in one upload.
|
|
|
|
importResult.options = parseOptions = res.options;
|
|
|
|
}
|
|
|
|
importResult.tables.push(...res.tables);
|
|
|
|
}
|
|
|
|
return importResult;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Imports the data stored at tmpPath.
|
|
|
|
*
|
|
|
|
* Currently it starts a python parser (that relies on the messytables library) as a child process
|
|
|
|
* outside the sandbox, and supports xls(x), csv, txt, and perhaps some other formats. It may
|
|
|
|
* result in the import of multiple tables, in case of e.g. Excel formats.
|
2021-09-15 06:12:34 +00:00
|
|
|
* @param {OptDocSession} docSession: Session instance to use for importing.
|
2020-07-21 13:20:51 +00:00
|
|
|
* @param {String} tmpPath: The path from of the original file.
|
2021-09-15 06:12:34 +00:00
|
|
|
* @param {FileImportOptions} importOptions: File import options.
|
2020-07-21 13:20:51 +00:00
|
|
|
* @returns {Promise<ImportResult>} with `options` property containing parseOptions as serialized JSON as adjusted
|
|
|
|
* or guessed by the plugin, and `tables`, which is which is a list of objects with information about
|
2021-09-15 06:12:34 +00:00
|
|
|
* tables, such as `hiddenTableId`, `uploadFileIndex`, `origTableName`, `transformSectionRef`, `destTableId`.
|
2020-07-21 13:20:51 +00:00
|
|
|
*/
|
2021-09-15 06:12:34 +00:00
|
|
|
private async _importFileAsNewTable(docSession: OptDocSession, tmpPath: string,
|
|
|
|
importOptions: FileImportOptions): Promise<ImportResult> {
|
2021-10-04 16:14:14 +00:00
|
|
|
const {originalFilename, parseOptions, mergeOptionsMap, isHidden, uploadFileIndex,
|
2021-09-15 06:12:34 +00:00
|
|
|
transformRuleMap} = importOptions;
|
2020-07-21 13:20:51 +00:00
|
|
|
log.info("ActiveDoc._importFileAsNewTable(%s, %s)", tmpPath, originalFilename);
|
2021-09-15 06:12:34 +00:00
|
|
|
const optionsAndData: ParseFileResult =
|
|
|
|
await this._activeDoc.docPluginManager.parseFile(tmpPath, originalFilename, parseOptions);
|
|
|
|
const options = optionsAndData.parseOptions;
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
const parsedTables = optionsAndData.tables;
|
|
|
|
const references = this._encodeReferenceAsInt(parsedTables);
|
|
|
|
|
|
|
|
const tables: ImportTableResult[] = [];
|
|
|
|
const fixedColumnIdsByTable: { [tableId: string]: string[]; } = {};
|
|
|
|
|
|
|
|
for (const table of parsedTables) {
|
|
|
|
const ext = path.extname(originalFilename);
|
|
|
|
const basename = path.basename(originalFilename, ext).trim();
|
|
|
|
const hiddenTableName = 'GristHidden_import';
|
|
|
|
const origTableName = table.table_name ? table.table_name : '';
|
|
|
|
const transformRule = transformRuleMap && transformRuleMap.hasOwnProperty(origTableName) ?
|
|
|
|
transformRuleMap[origTableName] : null;
|
|
|
|
const result: ApplyUAResult = await this._activeDoc.applyUserActions(docSession,
|
|
|
|
[["AddTable", hiddenTableName, table.column_metadata]]);
|
|
|
|
const retValue: AddTableRetValue = result.retValues[0];
|
|
|
|
const hiddenTableId = retValue.table_id; // The sanitized version of the table name.
|
|
|
|
const hiddenTableColIds = retValue.columns; // The sanitized names of the columns.
|
|
|
|
|
|
|
|
// The table_data received from importFile is an array of columns of data, rather than a
|
|
|
|
// dictionary, so that it doesn't depend on column names. We instead construct the
|
|
|
|
// dictionary once we receive the sanitized column names from AddTable.
|
|
|
|
const dataLength = table.table_data[0] ? table.table_data[0].length : 0;
|
|
|
|
log.info("Importing table %s, %s rows, from %s", hiddenTableId, dataLength, table.table_name);
|
|
|
|
|
|
|
|
const rowIdColumn = _.range(1, dataLength + 1);
|
|
|
|
const columnValues = _.object(hiddenTableColIds, table.table_data);
|
|
|
|
const destTableId = transformRule ? transformRule.destTableId : null;
|
|
|
|
const ruleCanBeApplied = (transformRule != null) &&
|
|
|
|
_.difference(transformRule.sourceCols, hiddenTableColIds).length === 0;
|
|
|
|
await this._activeDoc.applyUserActions(docSession,
|
|
|
|
[["ReplaceTableData", hiddenTableId, rowIdColumn, columnValues]]);
|
|
|
|
|
|
|
|
// data parsed and put into hiddenTableId
|
|
|
|
// For preview_table (isHidden) do GenImporterView to make views and formulas and cols
|
2021-10-04 16:14:14 +00:00
|
|
|
// For final import, call _transformAndFinishImport, which imports file using a transform rule (or blank)
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
let createdTableId: string;
|
|
|
|
let transformSectionRef: number = -1; // TODO: we only have this if we genImporterView, is it necessary?
|
|
|
|
|
|
|
|
if (isHidden) {
|
|
|
|
// Generate formula columns, view sections, etc
|
|
|
|
const results: ApplyUAResult = await this._activeDoc.applyUserActions(docSession,
|
|
|
|
[['GenImporterView', hiddenTableId, destTableId, ruleCanBeApplied ? transformRule : null]]);
|
|
|
|
|
|
|
|
transformSectionRef = results.retValues[0];
|
|
|
|
createdTableId = hiddenTableId;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// Do final import
|
2021-10-04 16:14:14 +00:00
|
|
|
const mergeOptions = mergeOptionsMap[origTableName] ?? null;
|
2020-07-21 13:20:51 +00:00
|
|
|
const intoNewTable: boolean = destTableId ? false : true;
|
|
|
|
const destTable = destTableId || table.table_name || basename;
|
2021-10-04 16:14:14 +00:00
|
|
|
createdTableId = await this._transformAndFinishImport(docSession, hiddenTableId, destTable,
|
|
|
|
intoNewTable, ruleCanBeApplied ? transformRule : null, mergeOptions);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fixedColumnIdsByTable[createdTableId] = hiddenTableColIds;
|
|
|
|
|
|
|
|
tables.push({
|
|
|
|
hiddenTableId: createdTableId, // TODO: rename thing?
|
|
|
|
uploadFileIndex,
|
|
|
|
origTableName,
|
|
|
|
transformSectionRef, // TODO: this shouldnt always be needed, and we only get it if genimporttransform
|
|
|
|
destTableId
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-11-18 22:29:33 +00:00
|
|
|
await this._fixReferences(docSession, tables, fixedColumnIdsByTable, references, isHidden);
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
return ({options, tables});
|
|
|
|
}
|
|
|
|
|
2021-10-04 16:14:14 +00:00
|
|
|
/**
|
|
|
|
* Imports records from `hiddenTableId` into `destTableId`, transforming the column
|
|
|
|
* values from `hiddenTableId` according to the `transformRule`. Finalizes import when done.
|
|
|
|
*
|
|
|
|
* If `mergeOptions` is present, records from `hiddenTableId` will be "merged" into `destTableId`
|
|
|
|
* according to a set of merge columns. Records from both tables that have equal values for all
|
|
|
|
* merge columns are treated as the same record, and will be updated in `destTableId` according
|
|
|
|
* to the strategy specified in `mergeOptions`.
|
|
|
|
*
|
|
|
|
* @param {string} hiddenTableId Source table containing records to be imported.
|
|
|
|
* @param {string} destTableId Destination table that will be updated.
|
|
|
|
* @param {boolean} intoNewTable True if import destination is a new table.
|
|
|
|
* @param {TransformRule|null} transformRule Rules for transforming source columns using formulas
|
|
|
|
* before merging/importing takes place.
|
|
|
|
* @param {MergeOptions|null} mergeOptions Options for how to merge matching records between
|
|
|
|
* the source and destination table.
|
|
|
|
* @returns {string} The table id of the new or updated destination table.
|
|
|
|
*/
|
|
|
|
private async _transformAndFinishImport(docSession: OptDocSession,
|
|
|
|
hiddenTableId: string, destTableId: string,
|
|
|
|
intoNewTable: boolean, transformRule: TransformRule|null,
|
|
|
|
mergeOptions: MergeOptions|null): Promise<string> {
|
|
|
|
log.info("ActiveDocImport._transformAndFinishImport(%s, %s, %s, %s, %s)",
|
|
|
|
hiddenTableId, destTableId, intoNewTable, transformRule, mergeOptions);
|
|
|
|
const srcCols = await this._activeDoc.getTableCols(docSession, hiddenTableId);
|
|
|
|
|
|
|
|
// Use a default transform rule if one was not provided by the client.
|
|
|
|
if (!transformRule) {
|
|
|
|
const transformDest = intoNewTable ? null : destTableId;
|
|
|
|
transformRule = await this._makeDefaultTransformRule(docSession, srcCols, transformDest);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Transform rules from client may have prefixed column ids, so we need to strip them.
|
2021-11-09 20:03:12 +00:00
|
|
|
stripRulePrefixes(transformRule);
|
2021-10-04 16:14:14 +00:00
|
|
|
|
|
|
|
if (intoNewTable) {
|
|
|
|
// Transform rules for new tables don't have filled in destination column ids.
|
|
|
|
const result = await this._activeDoc.applyUserActions(docSession, [['FillTransformRuleColIds', transformRule]]);
|
|
|
|
transformRule = result.retValues[0] as TransformRule;
|
2021-11-18 22:29:33 +00:00
|
|
|
|
|
|
|
// Encode Refs as Ints, to avoid table dependency issues. We'll convert back to Ref at the end.
|
|
|
|
encodeRuleReferences(transformRule);
|
2021-10-04 16:14:14 +00:00
|
|
|
} else if (transformRule.destCols.some(c => c.colId === null)) {
|
|
|
|
throw new Error('Column ids in transform rule must be filled when importing into an existing table');
|
|
|
|
}
|
|
|
|
|
|
|
|
await this._activeDoc.applyUserActions(docSession,
|
|
|
|
[['MakeImportTransformColumns', hiddenTableId, transformRule, false]]);
|
|
|
|
|
|
|
|
if (!intoNewTable && mergeOptions && mergeOptions.mergeCols.length > 0) {
|
|
|
|
await this._mergeAndFinishImport(docSession, hiddenTableId, destTableId, transformRule, mergeOptions);
|
|
|
|
return destTableId;
|
|
|
|
}
|
|
|
|
|
|
|
|
const hiddenTableData = fromTableDataAction(await this._activeDoc.fetchTable(docSession, hiddenTableId, true));
|
|
|
|
const columnData: BulkColValues = {};
|
|
|
|
|
|
|
|
const srcColIds = srcCols.map(c => c.id as string);
|
|
|
|
const destCols = transformRule.destCols;
|
|
|
|
for (const destCol of destCols) {
|
|
|
|
const formula = destCol.formula.trim();
|
|
|
|
if (!formula) { continue; }
|
|
|
|
|
|
|
|
const srcColId = formula.startsWith('$') && srcColIds.includes(formula.slice(1)) ?
|
|
|
|
formula.slice(1) : IMPORT_TRANSFORM_COLUMN_PREFIX + destCol.colId;
|
|
|
|
|
|
|
|
columnData[destCol.colId!] = hiddenTableData[srcColId];
|
|
|
|
}
|
|
|
|
|
|
|
|
// We no longer need the temporary import table, so remove it.
|
|
|
|
await this._activeDoc.applyUserActions(docSession, [['RemoveTable', hiddenTableId]]);
|
|
|
|
|
|
|
|
// If destination is a new table, we need to create it.
|
|
|
|
if (intoNewTable) {
|
|
|
|
const colSpecs = destCols.map(({type, colId: id, label}) => ({type, id, label}));
|
|
|
|
const newTable = await this._activeDoc.applyUserActions(docSession, [['AddTable', destTableId, colSpecs]]);
|
|
|
|
destTableId = newTable.retValues[0].table_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
await this._activeDoc.applyUserActions(docSession,
|
|
|
|
[['BulkAddRecord', destTableId, gutil.arrayRepeat(hiddenTableData.id.length, null), columnData]]);
|
|
|
|
|
|
|
|
return destTableId;
|
|
|
|
}
|
|
|
|
|
2021-10-08 06:32:59 +00:00
|
|
|
/**
|
|
|
|
* Merges matching records from `hiddenTableId` into `destTableId`, and finalizes import.
|
|
|
|
*
|
|
|
|
* @param {string} hiddenTableId Source table containing records to be imported.
|
|
|
|
* @param {string} destTableId Destination table that will be updated.
|
|
|
|
* @param {TransformRule} transformRule Rules for transforming source columns using formulas
|
|
|
|
* before merging/importing takes place.
|
|
|
|
* @param {MergeOptions} mergeOptions Options for how to merge matching records between
|
|
|
|
* the source and destination table.
|
|
|
|
*/
|
|
|
|
private async _mergeAndFinishImport(docSession: OptDocSession, hiddenTableId: string, destTableId: string,
|
|
|
|
{destCols, sourceCols}: TransformRule,
|
|
|
|
{mergeCols, mergeStrategy}: MergeOptions): Promise<void> {
|
2021-11-09 20:03:12 +00:00
|
|
|
// Merge column ids from client have prefixes that need to be stripped.
|
|
|
|
mergeCols = stripPrefixes(mergeCols);
|
|
|
|
|
2021-10-08 06:32:59 +00:00
|
|
|
// Get column differences between `hiddenTableId` and `destTableId` for rows that exist in both tables.
|
2021-11-09 20:03:12 +00:00
|
|
|
const srcAndDestColIds: [string, string][] = destCols.map(destCol => {
|
2021-10-08 06:32:59 +00:00
|
|
|
const formula = destCol.formula.trim();
|
|
|
|
const srcColId = formula.startsWith('$') && sourceCols.includes(formula.slice(1)) ?
|
|
|
|
formula.slice(1) : IMPORT_TRANSFORM_COLUMN_PREFIX + destCol.colId;
|
|
|
|
return [srcColId, destCol.colId!];
|
|
|
|
});
|
2021-11-09 20:03:12 +00:00
|
|
|
const srcToDestColIds: Map<string, string[]> = new Map();
|
|
|
|
srcAndDestColIds.forEach(([srcColId, destColId]) => {
|
|
|
|
if (!srcToDestColIds.has(srcColId)) {
|
|
|
|
srcToDestColIds.set(srcColId, [destColId]);
|
|
|
|
} else {
|
|
|
|
srcToDestColIds.get(srcColId)!.push(destColId);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId, srcToDestColIds, mergeCols);
|
2021-10-08 06:32:59 +00:00
|
|
|
|
|
|
|
// Initialize containers for new and updated records in the expected formats.
|
|
|
|
const newRecords: BulkColValues = {};
|
|
|
|
let numNewRecords = 0;
|
|
|
|
const updatedRecords: BulkColValues = {};
|
|
|
|
const updatedRecordIds: number[] = [];
|
|
|
|
|
2021-11-09 20:03:12 +00:00
|
|
|
const destColIds = flatten([...srcToDestColIds.values()]);
|
2021-10-08 06:32:59 +00:00
|
|
|
for (const id of destColIds) {
|
|
|
|
newRecords[id] = [];
|
|
|
|
updatedRecords[id] = [];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Retrieve the function used to reconcile differences between source and destination.
|
|
|
|
const merge = getMergeFunction(mergeStrategy);
|
|
|
|
|
2021-11-09 20:03:12 +00:00
|
|
|
const srcColIds = [...srcToDestColIds.keys()];
|
2021-10-08 06:32:59 +00:00
|
|
|
const numResultRows = comparisonResult[hiddenTableId + '.id'].length;
|
|
|
|
for (let i = 0; i < numResultRows; i++) {
|
|
|
|
if (comparisonResult[destTableId + '.id'][i] === null) {
|
|
|
|
// No match in destination table found for source row, so it must be a new record.
|
|
|
|
for (const srcColId of srcColIds) {
|
2021-11-09 20:03:12 +00:00
|
|
|
const matchingDestColIds = srcToDestColIds.get(srcColId);
|
|
|
|
matchingDestColIds!.forEach(id => {
|
|
|
|
newRecords[id].push(comparisonResult[`${hiddenTableId}.${srcColId}`][i]);
|
|
|
|
});
|
2021-10-08 06:32:59 +00:00
|
|
|
}
|
|
|
|
numNewRecords++;
|
|
|
|
} else {
|
|
|
|
// Otherwise, a match was found between source and destination tables, so we merge their columns.
|
|
|
|
for (const srcColId of srcColIds) {
|
2021-11-09 20:03:12 +00:00
|
|
|
const matchingDestColIds = srcToDestColIds.get(srcColId);
|
2021-10-08 06:32:59 +00:00
|
|
|
const srcVal = comparisonResult[`${hiddenTableId}.${srcColId}`][i];
|
2021-11-09 20:03:12 +00:00
|
|
|
matchingDestColIds!.forEach(id => {
|
|
|
|
const destVal = comparisonResult[`${destTableId}.${id}`][i];
|
|
|
|
updatedRecords[id].push(merge(srcVal, destVal));
|
|
|
|
});
|
2021-10-08 06:32:59 +00:00
|
|
|
}
|
|
|
|
updatedRecordIds.push(comparisonResult[destTableId + '.id'][i] as number);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We no longer need the temporary import table, so remove it.
|
|
|
|
await this._activeDoc.applyUserActions(docSession, [['RemoveTable', hiddenTableId]]);
|
|
|
|
|
|
|
|
if (updatedRecordIds.length > 0) {
|
|
|
|
await this._activeDoc.applyUserActions(docSession,
|
|
|
|
[['BulkUpdateRecord', destTableId, updatedRecordIds, updatedRecords]]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (numNewRecords > 0) {
|
|
|
|
await this._activeDoc.applyUserActions(docSession,
|
|
|
|
[['BulkAddRecord', destTableId, gutil.arrayRepeat(numNewRecords, null), newRecords]]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Builds and executes a SQL query that compares common columns from `hiddenTableId`
|
|
|
|
* and `destTableId`, returning matched rows that contain differences between both tables.
|
|
|
|
*
|
|
|
|
* The `mergeCols` parameter defines how rows from both tables are matched; we consider
|
|
|
|
* rows whose columns values for all columns in `mergeCols` to be the same record in both
|
|
|
|
* tables.
|
|
|
|
*
|
|
|
|
* @param {string} hiddenTableId Source table.
|
|
|
|
* @param {string} destTableId Destination table.
|
2021-11-09 20:03:12 +00:00
|
|
|
* @param {Map<string, string[]>} srcToDestColIds Map of source to one or more destination column ids
|
|
|
|
* to include in the comparison results.
|
2021-10-08 06:32:59 +00:00
|
|
|
* @param {string[]} mergeCols List of (destination) column ids to use for matching.
|
|
|
|
* @returns {Promise<BulkColValues} Decoded column values from both tables that were matched, and had differences.
|
|
|
|
*/
|
2021-11-09 20:03:12 +00:00
|
|
|
private async _getTableComparison(hiddenTableId: string, destTableId: string, srcToDestColIds: Map<string, string[]>,
|
2021-10-08 06:32:59 +00:00
|
|
|
mergeCols: string[]): Promise<BulkColValues> {
|
2021-11-09 20:03:12 +00:00
|
|
|
const mergeColIds = new Set(mergeCols);
|
|
|
|
const destToSrcMergeColIds = new Map();
|
|
|
|
srcToDestColIds.forEach((destColIds, srcColId) => {
|
|
|
|
const maybeMergeColId = destColIds.find(colId => mergeColIds.has(colId));
|
|
|
|
if (maybeMergeColId !== undefined) {
|
|
|
|
destToSrcMergeColIds.set(maybeMergeColId, srcColId);
|
|
|
|
}
|
|
|
|
});
|
2021-10-08 06:32:59 +00:00
|
|
|
|
2021-11-09 20:03:12 +00:00
|
|
|
const query = buildComparisonQuery(hiddenTableId, destTableId, srcToDestColIds, destToSrcMergeColIds);
|
2021-10-08 06:32:59 +00:00
|
|
|
const result = await this._activeDoc.docStorage.fetchQuery(query);
|
|
|
|
return this._activeDoc.docStorage.decodeMarshalledDataFromTables(result);
|
|
|
|
}
|
|
|
|
|
2021-10-04 16:14:14 +00:00
|
|
|
/**
|
|
|
|
* Returns a default TransformRule using column definitions from `destTableId`. If `destTableId`
|
|
|
|
* is null (in the case when the import destination is a new table), the `srcCols` are used instead.
|
|
|
|
*
|
|
|
|
* @param {TableRecordValue[]} srcCols Source column definitions.
|
|
|
|
* @param {string|null} destTableId The destination table id. If null, the destination is assumed
|
|
|
|
* to be a new table, and `srcCols` are used to build the transform rule.
|
|
|
|
* @returns {Promise<TransformRule>} The constructed transform rule.
|
|
|
|
*/
|
|
|
|
private async _makeDefaultTransformRule(docSession: OptDocSession, srcCols: TableRecordValue[],
|
|
|
|
destTableId: string|null): Promise<TransformRule> {
|
|
|
|
const targetCols = destTableId ? await this._activeDoc.getTableCols(docSession, destTableId) : srcCols;
|
|
|
|
const destCols: TransformColumn[] = [];
|
|
|
|
const srcColIds = srcCols.map(c => c.id as string);
|
|
|
|
|
|
|
|
for (const {id, fields} of targetCols) {
|
|
|
|
if (fields.isFormula === true || fields.formula !== '') { continue; }
|
|
|
|
|
|
|
|
destCols.push({
|
|
|
|
colId: destTableId ? id as string : null,
|
|
|
|
label: fields.label as string,
|
|
|
|
type: fields.type as string,
|
|
|
|
formula: srcColIds.includes(id as string) ? `$${id}` : ''
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
return {
|
|
|
|
destTableId,
|
|
|
|
destCols,
|
|
|
|
sourceCols: srcColIds
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
/**
|
|
|
|
* This function removes temporary hidden tables which were created during the import process
|
|
|
|
*
|
|
|
|
* @param {Array[String]} hiddenTableIds: Array of hidden table ids
|
|
|
|
* @returns {Promise} Promise that's resolved when all actions are applied successfully.
|
|
|
|
*/
|
|
|
|
private async _removeHiddenTables(docSession: DocSession, hiddenTableIds: string[]) {
|
|
|
|
if (hiddenTableIds.length !== 0) {
|
|
|
|
await this._activeDoc.applyUserActions(docSession, hiddenTableIds.map(t => ['RemoveTable', t]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-11-18 22:29:33 +00:00
|
|
|
* Changes every column of references into a column of integers in `parsedTables`. It
|
|
|
|
* returns a list of descriptors of all columns of references.
|
2020-07-21 13:20:51 +00:00
|
|
|
*/
|
|
|
|
private _encodeReferenceAsInt(parsedTables: GristTable[]): ReferenceDescription[] {
|
|
|
|
const references = [];
|
|
|
|
for (const [tableIndex, parsedTable] of parsedTables.entries()) {
|
|
|
|
for (const [colIndex, col] of parsedTable.column_metadata.entries()) {
|
|
|
|
const refTableId = gutil.removePrefix(col.type, "Ref:");
|
|
|
|
if (refTableId) {
|
|
|
|
references.push({refTableId, colIndex, tableIndex});
|
|
|
|
col.type = 'Int';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return references;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This function fix references that are broken by the change of table id.
|
|
|
|
*/
|
|
|
|
private async _fixReferences(docSession: OptDocSession,
|
|
|
|
tables: ImportTableResult[],
|
|
|
|
fixedColumnIds: { [tableId: string]: string[]; },
|
|
|
|
references: ReferenceDescription[],
|
|
|
|
isHidden: boolean) {
|
|
|
|
|
|
|
|
// collect all new table ids
|
|
|
|
const tablesByOrigName = _.indexBy(tables, 'origTableName');
|
|
|
|
|
|
|
|
// gather all of the user actions
|
|
|
|
let userActions: any[] = references.map( ref => {
|
|
|
|
const fixedTableId = tables[ref.tableIndex].hiddenTableId;
|
|
|
|
return [
|
|
|
|
'ModifyColumn',
|
|
|
|
fixedTableId,
|
|
|
|
fixedColumnIds[fixedTableId][ref.colIndex],
|
|
|
|
{ type: `Ref:${tablesByOrigName[ref.refTableId].hiddenTableId}` }
|
|
|
|
];
|
|
|
|
});
|
|
|
|
|
|
|
|
if (isHidden) {
|
|
|
|
userActions = userActions.concat(userActions.map(([, tableId, columnId, colInfo]) => [
|
2021-10-04 16:14:14 +00:00
|
|
|
'ModifyColumn', tableId, IMPORT_TRANSFORM_COLUMN_PREFIX + columnId, colInfo ]));
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// apply user actions
|
|
|
|
if (userActions.length) {
|
|
|
|
await this._activeDoc.applyUserActions(docSession, userActions);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2021-10-04 16:14:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Helper function that returns true if a given cell is blank (i.e. null or empty).
|
|
|
|
function isBlank(value: CellValue): boolean {
|
|
|
|
return value === null || (typeof value === 'string' && value.trim().length === 0);
|
|
|
|
}
|
|
|
|
|
2021-11-18 22:29:33 +00:00
|
|
|
/**
|
|
|
|
* Changes every Ref column to an Int column in `destCols`.
|
|
|
|
*
|
|
|
|
* Encoding references as ints can be useful when finishing imports to avoid
|
|
|
|
* issues such as importing linked tables in the wrong order. When encoding references,
|
|
|
|
* ActiveDocImport._fixReferences should be called at the end of importing to
|
|
|
|
* decode Ints back to Refs.
|
|
|
|
*/
|
|
|
|
function encodeRuleReferences({destCols}: TransformRule): void {
|
|
|
|
for (const col of destCols) {
|
|
|
|
const refTableId = gutil.removePrefix(col.type, "Ref:");
|
|
|
|
if (refTableId) {
|
|
|
|
col.type = 'Int';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-04 16:14:14 +00:00
|
|
|
// Helper function that strips import prefixes from columns in transform rules (if ids are present).
|
2021-11-09 20:03:12 +00:00
|
|
|
function stripRulePrefixes({destCols}: TransformRule): void {
|
2021-10-04 16:14:14 +00:00
|
|
|
for (const col of destCols) {
|
|
|
|
const colId = col.colId;
|
|
|
|
if (colId && colId.startsWith(IMPORT_TRANSFORM_COLUMN_PREFIX)) {
|
|
|
|
col.colId = colId.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-09 20:03:12 +00:00
|
|
|
// Helper function that returns new `colIds` with import prefixes stripped.
|
|
|
|
function stripPrefixes(colIds: string[]): string[] {
|
|
|
|
return colIds.map(id => id.startsWith(IMPORT_TRANSFORM_COLUMN_PREFIX) ?
|
|
|
|
id.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length) : id);
|
|
|
|
}
|
|
|
|
|
2021-10-04 16:14:14 +00:00
|
|
|
type MergeFunction = (srcVal: CellValue, destVal: CellValue) => CellValue;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a function that maps source and destination column values to a single output value.
|
|
|
|
*
|
|
|
|
* @param {MergeStrategy} mergeStrategy Determines how matching source and destination column values
|
|
|
|
* should be reconciled when merging.
|
|
|
|
* @returns {MergeFunction} Function that maps column value pairs to a single output value.
|
|
|
|
*/
|
|
|
|
function getMergeFunction({type}: MergeStrategy): MergeFunction {
|
|
|
|
switch (type) {
|
2021-10-08 06:32:59 +00:00
|
|
|
case 'replace-with-nonblank-source': {
|
2021-10-04 16:14:14 +00:00
|
|
|
return (srcVal, destVal) => isBlank(srcVal) ? destVal : srcVal;
|
2021-10-08 06:32:59 +00:00
|
|
|
}
|
|
|
|
case 'replace-all-fields': {
|
2021-10-04 16:14:14 +00:00
|
|
|
return (srcVal, _destVal) => srcVal;
|
2021-10-08 06:32:59 +00:00
|
|
|
}
|
|
|
|
case 'replace-blank-fields-only': {
|
2021-10-04 16:14:14 +00:00
|
|
|
return (srcVal, destVal) => isBlank(destVal) ? srcVal : destVal;
|
2021-10-08 06:32:59 +00:00
|
|
|
}
|
|
|
|
default: {
|
|
|
|
// Normally, we should never arrive here. If we somehow do, throw an error.
|
|
|
|
const unknownStrategyType: never = type;
|
|
|
|
throw new Error(`Unknown merge strategy: ${unknownStrategyType}`);
|
|
|
|
}
|
2021-10-04 16:14:14 +00:00
|
|
|
}
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|