(core) Migrate import code from data engine to Node

Summary:
Finishing imports now occurs in Node instead of the
data engine, which makes it possible to import into
on-demand tables. Merging code was also refactored
and now uses a SQL query to diff source and destination
tables in order to determine what to update or add.

Also fixes a bug where incremental imports involving
Excel files with multiple sheets would fail due to the UI
not serializing merge options correctly.

Test Plan: Browser tests.

Reviewers: jarek

Reviewed By: jarek

Differential Revision: https://phab.getgrist.com/D3046
This commit is contained in:
George Gevoian 2021-10-04 09:14:14 -07:00
parent 7e07f0ce56
commit e1780e4f58
11 changed files with 473 additions and 885 deletions

View File

@ -18,6 +18,7 @@ import {icon} from 'app/client/ui2018/icons';
import {IOptionFull, linkSelect, multiSelect} from 'app/client/ui2018/menus'; import {IOptionFull, linkSelect, multiSelect} from 'app/client/ui2018/menus';
import {cssModalButtons, cssModalTitle} from 'app/client/ui2018/modals'; import {cssModalButtons, cssModalTitle} from 'app/client/ui2018/modals';
import {DataSourceTransformed, ImportResult, ImportTableResult, MergeOptions, import {DataSourceTransformed, ImportResult, ImportTableResult, MergeOptions,
MergeOptionsMap,
MergeStrategy, TransformColumn, TransformRule, TransformRuleMap} from "app/common/ActiveDocAPI"; MergeStrategy, TransformColumn, TransformRule, TransformRuleMap} from "app/common/ActiveDocAPI";
import {byteString} from "app/common/gutil"; import {byteString} from "app/common/gutil";
import {FetchUrlOptions, UploadResult} from 'app/common/uploads'; import {FetchUrlOptions, UploadResult} from 'app/common/uploads';
@ -236,20 +237,8 @@ export class Importer extends Disposable {
return {uploadId: upload.uploadId, transforms}; return {uploadId: upload.uploadId, transforms};
} }
private _getMergeOptions(upload: UploadResult): Array<MergeOptions|null> { private _getMergeOptionMaps(upload: UploadResult): MergeOptionsMap[] {
return upload.files.map((_file, i) => { return upload.files.map((_file, i) => this._createMergeOptionsMap(i));
const sourceInfo = this._sourceInfoArray.get().find(info => info.uploadFileIndex === i);
if (!sourceInfo) { return null; }
const mergeOptions = this._mergeOptions[sourceInfo.hiddenTableId];
if (!mergeOptions) { return null; }
const {updateExistingRecords, mergeCols, mergeStrategy} = mergeOptions;
return {
mergeCols: updateExistingRecords.get() ? mergeCols.get() : [],
mergeStrategy: mergeStrategy.get()
};
});
} }
private _createTransformRuleMap(uploadFileIndex: number): TransformRuleMap { private _createTransformRuleMap(uploadFileIndex: number): TransformRuleMap {
@ -262,6 +251,16 @@ export class Importer extends Disposable {
return result; return result;
} }
private _createMergeOptionsMap(uploadFileIndex: number): MergeOptionsMap {
const result: MergeOptionsMap = {};
for (const sourceInfo of this._sourceInfoArray.get()) {
if (sourceInfo.uploadFileIndex === uploadFileIndex) {
result[sourceInfo.origTableName] = this._getMergeOptionsForSource(sourceInfo);
}
}
return result;
}
private _createTransformRule(sourceInfo: SourceInfo): TransformRule { private _createTransformRule(sourceInfo: SourceInfo): TransformRule {
const transformFields = sourceInfo.transformSection.get().viewFields().peek(); const transformFields = sourceInfo.transformSection.get().viewFields().peek();
const sourceFields = sourceInfo.sourceSection.viewFields().peek(); const sourceFields = sourceInfo.sourceSection.viewFields().peek();
@ -279,6 +278,17 @@ export class Importer extends Disposable {
}; };
} }
private _getMergeOptionsForSource(sourceInfo: SourceInfo): MergeOptions|undefined {
const mergeOptions = this._mergeOptions[sourceInfo.hiddenTableId];
if (!mergeOptions) { return undefined; }
const {updateExistingRecords, mergeCols, mergeStrategy} = mergeOptions;
return {
mergeCols: updateExistingRecords.get() ? mergeCols.get() : [],
mergeStrategy: mergeStrategy.get()
};
}
private _getHiddenTableIds(): string[] { private _getHiddenTableIds(): string[] {
return this._sourceInfoArray.get().map((t: SourceInfo) => t.hiddenTableId); return this._sourceInfoArray.get().map((t: SourceInfo) => t.hiddenTableId);
} }
@ -332,10 +342,10 @@ export class Importer extends Disposable {
this._screen.renderSpinner(); this._screen.renderSpinner();
const parseOptions = {...this._parseOptions.get(), NUM_ROWS: 0}; const parseOptions = {...this._parseOptions.get(), NUM_ROWS: 0};
const mergeOptions = this._getMergeOptions(upload); const mergeOptionMaps = this._getMergeOptionMaps(upload);
const importResult: ImportResult = await this._docComm.finishImportFiles( const importResult: ImportResult = await this._docComm.finishImportFiles(
this._getTransformedDataSource(upload), this._getHiddenTableIds(), {mergeOptions, parseOptions}); this._getTransformedDataSource(upload), this._getHiddenTableIds(), {mergeOptionMaps, parseOptions});
if (importResult.tables[0].hiddenTableId) { if (importResult.tables[0].hiddenTableId) {
const tableRowModel = this._gristDoc.docModel.dataTables[importResult.tables[0].hiddenTableId].tableMetaRow; const tableRowModel = this._gristDoc.docModel.dataTables[importResult.tables[0].hiddenTableId].tableMetaRow;

View File

@ -56,8 +56,14 @@ export interface ImportTableResult {
destTableId: string|null; destTableId: string|null;
} }
export interface MergeStrategy { export interface ImportOptions {
type: 'replace-with-nonblank-source' | 'replace-all-fields' | 'replace-blank-fields-only'; parseOptions?: ParseOptions; // Options for parsing the source file.
mergeOptionMaps?: MergeOptionsMap[]; // Options for merging fields, indexed by uploadFileIndex.
}
export interface MergeOptionsMap {
// Map of original GristTable name of imported table to its merge options, if any.
[origTableName: string]: MergeOptions|undefined;
} }
export interface MergeOptions { export interface MergeOptions {
@ -65,9 +71,8 @@ export interface MergeOptions {
mergeStrategy: MergeStrategy; // Determines how matched records should be merged between 2 tables. mergeStrategy: MergeStrategy; // Determines how matched records should be merged between 2 tables.
} }
export interface ImportOptions { export interface MergeStrategy {
parseOptions?: ParseOptions; // Options for parsing the source file. type: 'replace-with-nonblank-source' | 'replace-all-fields' | 'replace-blank-fields-only';
mergeOptions?: Array<MergeOptions|null>; // Options for merging fields, indexed by uploadFileIndex.
} }
/** /**

View File

@ -33,6 +33,7 @@ import {
DocAction, DocAction,
RowRecord, RowRecord,
TableDataAction, TableDataAction,
TableRecordValue,
toTableDataAction, toTableDataAction,
UserAction UserAction
} from 'app/common/DocActions'; } from 'app/common/DocActions';
@ -82,6 +83,7 @@ import cloneDeep = require('lodash/cloneDeep');
import flatten = require('lodash/flatten'); import flatten = require('lodash/flatten');
import remove = require('lodash/remove'); import remove = require('lodash/remove');
import zipObject = require('lodash/zipObject'); import zipObject = require('lodash/zipObject');
import without = require('lodash/without');
bluebird.promisifyAll(tmp); bluebird.promisifyAll(tmp);
@ -805,6 +807,40 @@ export class ActiveDoc extends EventEmitter {
return this._pyCall('find_col_from_values', values, n, optTableId); return this._pyCall('find_col_from_values', values, n, optTableId);
} }
/**
* Returns column metadata for all visible columns from `tableId`.
*
* @param {string} tableId Table to retrieve column metadata for.
* @returns {Promise<TableRecordValue[]>} Records containing metadata about the visible columns
* from `tableId`.
*/
public async getTableCols(docSession: OptDocSession, tableId: string): Promise<TableRecordValue[]> {
const metaTables = await this.fetchMetaTables(docSession);
const tableRef = tableIdToRef(metaTables, tableId);
const [, , colRefs, columnData] = metaTables._grist_Tables_column;
// colId is pulled out of fields and used as the root id
const fieldNames = without(Object.keys(columnData), "colId");
const columns: TableRecordValue[] = [];
(columnData.colId as string[]).forEach((id, index) => {
if (
// TODO param to include hidden columns
id === "manualSort" || id.startsWith("gristHelper_") || !id ||
// Filter columns from the requested table
columnData.parentId[index] !== tableRef
) {
return;
}
const column: TableRecordValue = { id, fields: { colRef: colRefs[index] } };
for (const key of fieldNames) {
column.fields[key] = columnData[key][index];
}
columns.push(column);
});
return columns;
}
/** /**
* Returns error message (traceback) for one invalid formula cell. * Returns error message (traceback) for one invalid formula cell.
* @param {String} tableId - Table name * @param {String} tableId - Table name
@ -1595,3 +1631,13 @@ function createEmptySandboxActionBundle(): SandboxActionBundle {
retValues: [] retValues: []
}; };
} }
// Helper that converts a Grist table id to a ref.
export function tableIdToRef(metaTables: { [p: string]: TableDataAction }, tableId: string) {
const [, , tableRefs, tableData] = metaTables._grist_Tables;
const tableRowIndex = tableData.tableId.indexOf(tableId);
if (tableRowIndex === -1) {
throw new ApiError(`Table not found "${tableId}"`, 404);
}
return tableRefs[tableRowIndex];
}

View File

@ -3,10 +3,11 @@
import * as path from 'path'; import * as path from 'path';
import * as _ from 'underscore'; import * as _ from 'underscore';
import {DataSourceTransformed, ImportOptions, ImportResult, ImportTableResult, MergeOptions, import {ApplyUAResult, DataSourceTransformed, ImportOptions, ImportResult, ImportTableResult,
MergeOptions, MergeOptionsMap, MergeStrategy, TransformColumn, TransformRule,
TransformRuleMap} from 'app/common/ActiveDocAPI'; TransformRuleMap} from 'app/common/ActiveDocAPI';
import {ApplyUAResult} from 'app/common/ActiveDocAPI';
import {ApiError} from 'app/common/ApiError'; import {ApiError} from 'app/common/ApiError';
import {BulkColValues, CellValue, fromTableDataAction, TableRecordValue} from 'app/common/DocActions';
import * as gutil from 'app/common/gutil'; import * as gutil from 'app/common/gutil';
import {ParseFileResult, ParseOptions} from 'app/plugin/FileParserAPI'; import {ParseFileResult, ParseOptions} from 'app/plugin/FileParserAPI';
import {GristTable} from 'app/plugin/GristTable'; import {GristTable} from 'app/plugin/GristTable';
@ -14,7 +15,9 @@ import {ActiveDoc} from 'app/server/lib/ActiveDoc';
import {DocSession, OptDocSession} from 'app/server/lib/DocSession'; import {DocSession, OptDocSession} from 'app/server/lib/DocSession';
import * as log from 'app/server/lib/log'; import * as log from 'app/server/lib/log';
import {globalUploadSet, moveUpload, UploadInfo} from 'app/server/lib/uploads'; import {globalUploadSet, moveUpload, UploadInfo} from 'app/server/lib/uploads';
import {buildComparisonQuery} from 'app/server/lib/ExpandedQuery';
const IMPORT_TRANSFORM_COLUMN_PREFIX = 'gristHelper_Import_';
/* /*
* AddTableRetValue contains return value of user actions 'AddTable' * AddTableRetValue contains return value of user actions 'AddTable'
@ -40,8 +43,8 @@ interface FileImportOptions {
originalFilename: string; originalFilename: string;
// Containing parseOptions as serialized JSON to pass to the import plugin. // Containing parseOptions as serialized JSON to pass to the import plugin.
parseOptions: ParseOptions; parseOptions: ParseOptions;
// Options for determining how matched fields between source and destination tables should be merged. // Map of table names to their merge options.
mergeOptions: MergeOptions|null; mergeOptionsMap: MergeOptionsMap;
// Flag to indicate whether table is temporary and hidden or regular. // Flag to indicate whether table is temporary and hidden or regular.
isHidden: boolean; isHidden: boolean;
// Index of original dataSource corresponding to current imported file. // Index of original dataSource corresponding to current imported file.
@ -121,7 +124,7 @@ export class ActiveDocImport {
* The isHidden flag indicates whether to create temporary hidden tables, or final ones. * The isHidden flag indicates whether to create temporary hidden tables, or final ones.
*/ */
private async _importFiles(docSession: OptDocSession, upload: UploadInfo, transforms: TransformRuleMap[], private async _importFiles(docSession: OptDocSession, upload: UploadInfo, transforms: TransformRuleMap[],
{parseOptions = {}, mergeOptions = []}: ImportOptions, {parseOptions = {}, mergeOptionMaps = []}: ImportOptions,
isHidden: boolean): Promise<ImportResult> { isHidden: boolean): Promise<ImportResult> {
// Check that upload size is within the configured limits. // Check that upload size is within the configured limits.
@ -145,7 +148,7 @@ export class ActiveDocImport {
} }
const res = await this._importFileAsNewTable(docSession, file.absPath, { const res = await this._importFileAsNewTable(docSession, file.absPath, {
parseOptions, parseOptions,
mergeOptions: mergeOptions[index] || null, mergeOptionsMap: mergeOptionMaps[index] || {},
isHidden, isHidden,
originalFilename: origName, originalFilename: origName,
uploadFileIndex: index, uploadFileIndex: index,
@ -175,7 +178,7 @@ export class ActiveDocImport {
*/ */
private async _importFileAsNewTable(docSession: OptDocSession, tmpPath: string, private async _importFileAsNewTable(docSession: OptDocSession, tmpPath: string,
importOptions: FileImportOptions): Promise<ImportResult> { importOptions: FileImportOptions): Promise<ImportResult> {
const {originalFilename, parseOptions, mergeOptions, isHidden, uploadFileIndex, const {originalFilename, parseOptions, mergeOptionsMap, isHidden, uploadFileIndex,
transformRuleMap} = importOptions; transformRuleMap} = importOptions;
log.info("ActiveDoc._importFileAsNewTable(%s, %s)", tmpPath, originalFilename); log.info("ActiveDoc._importFileAsNewTable(%s, %s)", tmpPath, originalFilename);
const optionsAndData: ParseFileResult = const optionsAndData: ParseFileResult =
@ -217,7 +220,7 @@ export class ActiveDocImport {
// data parsed and put into hiddenTableId // data parsed and put into hiddenTableId
// For preview_table (isHidden) do GenImporterView to make views and formulas and cols // For preview_table (isHidden) do GenImporterView to make views and formulas and cols
// For final import, call TransformAndFinishImport, which imports file using a transform rule (or blank) // For final import, call _transformAndFinishImport, which imports file using a transform rule (or blank)
let createdTableId: string; let createdTableId: string;
let transformSectionRef: number = -1; // TODO: we only have this if we genImporterView, is it necessary? let transformSectionRef: number = -1; // TODO: we only have this if we genImporterView, is it necessary?
@ -232,20 +235,15 @@ export class ActiveDocImport {
} else { } else {
// Do final import // Do final import
const mergeOptions = mergeOptionsMap[origTableName] ?? null;
const intoNewTable: boolean = destTableId ? false : true; const intoNewTable: boolean = destTableId ? false : true;
const destTable = destTableId || table.table_name || basename; const destTable = destTableId || table.table_name || basename;
const tableId = await this._activeDoc.applyUserActions(docSession, createdTableId = await this._transformAndFinishImport(docSession, hiddenTableId, destTable,
[['TransformAndFinishImport', intoNewTable, ruleCanBeApplied ? transformRule : null, mergeOptions);
hiddenTableId, destTable, intoNewTable,
ruleCanBeApplied ? transformRule : null, mergeOptions]]);
createdTableId = tableId.retValues[0]; // this is garbage for now I think?
} }
fixedColumnIdsByTable[createdTableId] = hiddenTableColIds; fixedColumnIdsByTable[createdTableId] = hiddenTableColIds;
tables.push({ tables.push({
hiddenTableId: createdTableId, // TODO: rename thing? hiddenTableId: createdTableId, // TODO: rename thing?
uploadFileIndex, uploadFileIndex,
@ -260,6 +258,121 @@ export class ActiveDocImport {
return ({options, tables}); return ({options, tables});
} }
/**
* Imports records from `hiddenTableId` into `destTableId`, transforming the column
* values from `hiddenTableId` according to the `transformRule`. Finalizes import when done.
*
* If `mergeOptions` is present, records from `hiddenTableId` will be "merged" into `destTableId`
* according to a set of merge columns. Records from both tables that have equal values for all
* merge columns are treated as the same record, and will be updated in `destTableId` according
* to the strategy specified in `mergeOptions`.
*
* @param {string} hiddenTableId Source table containing records to be imported.
* @param {string} destTableId Destination table that will be updated.
* @param {boolean} intoNewTable True if import destination is a new table.
* @param {TransformRule|null} transformRule Rules for transforming source columns using formulas
* before merging/importing takes place.
* @param {MergeOptions|null} mergeOptions Options for how to merge matching records between
* the source and destination table.
* @returns {string} The table id of the new or updated destination table.
*/
private async _transformAndFinishImport(docSession: OptDocSession,
hiddenTableId: string, destTableId: string,
intoNewTable: boolean, transformRule: TransformRule|null,
mergeOptions: MergeOptions|null): Promise<string> {
log.info("ActiveDocImport._transformAndFinishImport(%s, %s, %s, %s, %s)",
hiddenTableId, destTableId, intoNewTable, transformRule, mergeOptions);
const srcCols = await this._activeDoc.getTableCols(docSession, hiddenTableId);
// Use a default transform rule if one was not provided by the client.
if (!transformRule) {
const transformDest = intoNewTable ? null : destTableId;
transformRule = await this._makeDefaultTransformRule(docSession, srcCols, transformDest);
}
// Transform rules from client may have prefixed column ids, so we need to strip them.
stripPrefixes(transformRule);
if (intoNewTable) {
// Transform rules for new tables don't have filled in destination column ids.
const result = await this._activeDoc.applyUserActions(docSession, [['FillTransformRuleColIds', transformRule]]);
transformRule = result.retValues[0] as TransformRule;
} else if (transformRule.destCols.some(c => c.colId === null)) {
throw new Error('Column ids in transform rule must be filled when importing into an existing table');
}
await this._activeDoc.applyUserActions(docSession,
[['MakeImportTransformColumns', hiddenTableId, transformRule, false]]);
if (!intoNewTable && mergeOptions && mergeOptions.mergeCols.length > 0) {
await this._mergeAndFinishImport(docSession, hiddenTableId, destTableId, transformRule, mergeOptions);
return destTableId;
}
const hiddenTableData = fromTableDataAction(await this._activeDoc.fetchTable(docSession, hiddenTableId, true));
const columnData: BulkColValues = {};
const srcColIds = srcCols.map(c => c.id as string);
const destCols = transformRule.destCols;
for (const destCol of destCols) {
const formula = destCol.formula.trim();
if (!formula) { continue; }
const srcColId = formula.startsWith('$') && srcColIds.includes(formula.slice(1)) ?
formula.slice(1) : IMPORT_TRANSFORM_COLUMN_PREFIX + destCol.colId;
columnData[destCol.colId!] = hiddenTableData[srcColId];
}
// We no longer need the temporary import table, so remove it.
await this._activeDoc.applyUserActions(docSession, [['RemoveTable', hiddenTableId]]);
// If destination is a new table, we need to create it.
if (intoNewTable) {
const colSpecs = destCols.map(({type, colId: id, label}) => ({type, id, label}));
const newTable = await this._activeDoc.applyUserActions(docSession, [['AddTable', destTableId, colSpecs]]);
destTableId = newTable.retValues[0].table_id;
}
await this._activeDoc.applyUserActions(docSession,
[['BulkAddRecord', destTableId, gutil.arrayRepeat(hiddenTableData.id.length, null), columnData]]);
return destTableId;
}
/**
* Returns a default TransformRule using column definitions from `destTableId`. If `destTableId`
* is null (in the case when the import destination is a new table), the `srcCols` are used instead.
*
* @param {TableRecordValue[]} srcCols Source column definitions.
* @param {string|null} destTableId The destination table id. If null, the destination is assumed
* to be a new table, and `srcCols` are used to build the transform rule.
* @returns {Promise<TransformRule>} The constructed transform rule.
*/
private async _makeDefaultTransformRule(docSession: OptDocSession, srcCols: TableRecordValue[],
destTableId: string|null): Promise<TransformRule> {
const targetCols = destTableId ? await this._activeDoc.getTableCols(docSession, destTableId) : srcCols;
const destCols: TransformColumn[] = [];
const srcColIds = srcCols.map(c => c.id as string);
for (const {id, fields} of targetCols) {
if (fields.isFormula === true || fields.formula !== '') { continue; }
destCols.push({
colId: destTableId ? id as string : null,
label: fields.label as string,
type: fields.type as string,
formula: srcColIds.includes(id as string) ? `$${id}` : ''
});
}
return {
destTableId,
destCols,
sourceCols: srcColIds
};
}
/** /**
* This function removes temporary hidden tables which were created during the import process * This function removes temporary hidden tables which were created during the import process
* *
@ -316,7 +429,7 @@ export class ActiveDocImport {
if (isHidden) { if (isHidden) {
userActions = userActions.concat(userActions.map(([, tableId, columnId, colInfo]) => [ userActions = userActions.concat(userActions.map(([, tableId, columnId, colInfo]) => [
'ModifyColumn', tableId, 'gristHelper_Import_' + columnId, colInfo ])); 'ModifyColumn', tableId, IMPORT_TRANSFORM_COLUMN_PREFIX + columnId, colInfo ]));
} }
// apply user actions // apply user actions
@ -325,4 +438,131 @@ export class ActiveDocImport {
} }
} }
/**
* Merges matching records from `hiddenTableId` into `destTableId`, and finalizes import.
*
* @param {string} hiddenTableId Source table containing records to be imported.
* @param {string} destTableId Destination table that will be updated.
* @param {TransformRule} transformRule Rules for transforming source columns using formulas
* before merging/importing takes place.
* @param {MergeOptions} mergeOptions Options for how to merge matching records between
* the source and destination table.
*/
private async _mergeAndFinishImport(docSession: OptDocSession, hiddenTableId: string, destTableId: string,
transformRule: TransformRule, mergeOptions: MergeOptions): Promise<void> {
// Prepare a set of column pairs (source and destination) for selecting and joining.
const selectColumns: [string, string][] = [];
const joinColumns: [string, string][] = [];
for (const destCol of transformRule.destCols) {
const destColId = destCol.colId as string;
const formula = destCol.formula.trim();
const srcColId = formula.startsWith('$') && transformRule.sourceCols.includes(formula.slice(1)) ?
formula.slice(1) : IMPORT_TRANSFORM_COLUMN_PREFIX + destCol.colId;
selectColumns.push([srcColId, destColId]);
if (mergeOptions.mergeCols.includes(destColId)) {
joinColumns.push([srcColId, destColId]);
}
}
const selectColumnsMap = new Map(selectColumns);
const joinColumnsMap = new Map(joinColumns);
// Construct and execute a SQL query that will tell us the differences between source and destination.
const query = buildComparisonQuery(hiddenTableId, destTableId, selectColumnsMap, joinColumnsMap);
const result = await this._activeDoc.docStorage.fetchQuery(query);
const decodedResult = this._activeDoc.docStorage.decodeMarshalledDataFromTables(result);
// Initialize containers for new and updated records in the expected formats.
const newRecords: BulkColValues = {};
let numNewRecords = 0;
const updatedRecords: BulkColValues = {};
const updatedRecordIds: number[] = [];
const destColIds = [...selectColumnsMap.values()];
for (const id of destColIds) {
newRecords[id] = [];
updatedRecords[id] = [];
}
// Retrieve the function used to reconcile differences between source and destination.
const merge = getMergeFunction(mergeOptions.mergeStrategy);
const srcColIds = [...selectColumnsMap.keys()];
const numResultRows = decodedResult[hiddenTableId + '.id'].length;
for (let i = 0; i < numResultRows; i++) {
if (decodedResult[destTableId + '.id'][i] === null) {
// No match in destination table found for source row, so it must be a new record.
for (const srcColId of srcColIds) {
const matchingDestColId = selectColumnsMap.get(srcColId);
newRecords[matchingDestColId!].push(decodedResult[`${hiddenTableId}.${srcColId}`][i]);
}
numNewRecords++;
} else {
// Otherwise, a match was found between source and destination tables, so we merge their columns.
for (const srcColId of srcColIds) {
const matchingDestColId = selectColumnsMap.get(srcColId);
const srcVal = decodedResult[`${hiddenTableId}.${srcColId}`][i];
const destVal = decodedResult[`${destTableId}.${matchingDestColId}`][i];
updatedRecords[matchingDestColId!].push(merge(srcVal, destVal));
}
updatedRecordIds.push(decodedResult[destTableId + '.id'][i] as number);
}
}
// We no longer need the temporary import table, so remove it.
await this._activeDoc.applyUserActions(docSession, [['RemoveTable', hiddenTableId]]);
if (updatedRecordIds.length > 0) {
await this._activeDoc.applyUserActions(docSession,
[['BulkUpdateRecord', destTableId, updatedRecordIds, updatedRecords]]);
}
if (numNewRecords > 0) {
await this._activeDoc.applyUserActions(docSession,
[['BulkAddRecord', destTableId, gutil.arrayRepeat(numNewRecords, null), newRecords]]);
}
}
}
// Helper function that returns true if a given cell is blank (i.e. null or empty).
function isBlank(value: CellValue): boolean {
return value === null || (typeof value === 'string' && value.trim().length === 0);
}
// Helper function that strips import prefixes from columns in transform rules (if ids are present).
function stripPrefixes({destCols}: TransformRule): void {
for (const col of destCols) {
const colId = col.colId;
if (colId && colId.startsWith(IMPORT_TRANSFORM_COLUMN_PREFIX)) {
col.colId = colId.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length);
}
}
}
type MergeFunction = (srcVal: CellValue, destVal: CellValue) => CellValue;
/**
* Returns a function that maps source and destination column values to a single output value.
*
* @param {MergeStrategy} mergeStrategy Determines how matching source and destination column values
* should be reconciled when merging.
* @returns {MergeFunction} Function that maps column value pairs to a single output value.
*/
function getMergeFunction({type}: MergeStrategy): MergeFunction {
switch (type) {
case 'replace-with-nonblank-source':
return (srcVal, destVal) => isBlank(srcVal) ? destVal : srcVal;
case 'replace-all-fields':
return (srcVal, _destVal) => srcVal;
case 'replace-blank-fields-only':
return (srcVal, destVal) => isBlank(destVal) ? srcVal : destVal;
default:
// Normally, we should never arrive here. If we somehow do, we throw an error.
throw new Error(`Unknown merge strategy: ${type}`);
}
} }

View File

@ -2,7 +2,7 @@ import { createEmptyActionSummary } from "app/common/ActionSummary";
import { ApiError } from 'app/common/ApiError'; import { ApiError } from 'app/common/ApiError';
import { BrowserSettings } from "app/common/BrowserSettings"; import { BrowserSettings } from "app/common/BrowserSettings";
import { import {
CellValue, fromTableDataAction, TableColValues, TableDataAction, TableRecordValue, CellValue, fromTableDataAction, TableColValues, TableRecordValue,
} from 'app/common/DocActions'; } from 'app/common/DocActions';
import {isRaisedException} from "app/common/gristTypes"; import {isRaisedException} from "app/common/gristTypes";
import { arrayRepeat, isAffirmative } from "app/common/gutil"; import { arrayRepeat, isAffirmative } from "app/common/gutil";
@ -10,7 +10,7 @@ import { SortFunc } from 'app/common/SortFunc';
import { DocReplacementOptions, DocState, DocStateComparison, DocStates, NEW_DOCUMENT_CODE} from 'app/common/UserAPI'; import { DocReplacementOptions, DocState, DocStateComparison, DocStates, NEW_DOCUMENT_CODE} from 'app/common/UserAPI';
import { HomeDBManager, makeDocAuthResult } from 'app/gen-server/lib/HomeDBManager'; import { HomeDBManager, makeDocAuthResult } from 'app/gen-server/lib/HomeDBManager';
import { concatenateSummaries, summarizeAction } from "app/server/lib/ActionSummary"; import { concatenateSummaries, summarizeAction } from "app/server/lib/ActionSummary";
import { ActiveDoc } from "app/server/lib/ActiveDoc"; import { ActiveDoc, tableIdToRef } from "app/server/lib/ActiveDoc";
import { assertAccess, getOrSetDocAuth, getTransitiveHeaders, getUserId, isAnonymousUser, import { assertAccess, getOrSetDocAuth, getTransitiveHeaders, getUserId, isAnonymousUser,
RequestWithLogin } from 'app/server/lib/Authorizer'; RequestWithLogin } from 'app/server/lib/Authorizer';
import { DocManager } from "app/server/lib/DocManager"; import { DocManager } from "app/server/lib/DocManager";
@ -168,42 +168,12 @@ export class DocWorkerApi {
activeDoc.fetchMetaTables(docSessionFromRequest(req))); activeDoc.fetchMetaTables(docSessionFromRequest(req)));
} }
function tableIdToRef(metaTables: { [p: string]: TableDataAction }, tableId: any) {
const [, , tableRefs, tableData] = metaTables._grist_Tables;
const tableRowIndex = tableData.tableId.indexOf(tableId);
if (tableRowIndex === -1) {
throw new ApiError(`Table not found "${tableId}"`, 404);
}
return tableRefs[tableRowIndex];
}
// Get the columns of the specified table in recordish format // Get the columns of the specified table in recordish format
this._app.get('/api/docs/:docId/tables/:tableId/columns', canView, this._app.get('/api/docs/:docId/tables/:tableId/columns', canView,
withDoc(async (activeDoc, req, res) => { withDoc(async (activeDoc, req, res) => {
const metaTables = await getMetaTables(activeDoc, req); const tableId = req.params.tableId;
const tableRef = tableIdToRef(metaTables, req.params.tableId); const columns = await handleSandboxError('', [],
const [, , colRefs, columnData] = metaTables._grist_Tables_column; activeDoc.getTableCols(docSessionFromRequest(req), tableId));
// colId is pulled out of fields and used as the root id
const fieldNames = _.without(Object.keys(columnData), "colId");
const columns: TableRecordValue[] = [];
(columnData.colId as string[]).forEach((id, index) => {
if (
// TODO param to include hidden columns
// By default we want the list of returned colums to match the fields in /records
id === "manualSort" || id.startsWith("gristHelper_") || !id ||
// Filter columns from the requested table
columnData.parentId[index] !== tableRef
) {
return;
}
const column: TableRecordValue = {id, fields: {colRef: colRefs[index]}};
for (const key of fieldNames) {
column.fields[key] = columnData[key][index];
}
columns.push(column);
});
res.json({columns}); res.json({columns});
}) })
); );

View File

@ -9,7 +9,7 @@
import * as sqlite3 from '@gristlabs/sqlite3'; import * as sqlite3 from '@gristlabs/sqlite3';
import {LocalActionBundle} from 'app/common/ActionBundle'; import {LocalActionBundle} from 'app/common/ActionBundle';
import {DocAction, TableColValues, TableDataAction, toTableDataAction} from 'app/common/DocActions'; import {BulkColValues, DocAction, TableColValues, TableDataAction, toTableDataAction} from 'app/common/DocActions';
import * as gristTypes from 'app/common/gristTypes'; import * as gristTypes from 'app/common/gristTypes';
import * as marshal from 'app/common/marshal'; import * as marshal from 'app/common/marshal';
import * as schema from 'app/common/schema'; import * as schema from 'app/common/schema';
@ -843,13 +843,14 @@ export class DocStorage implements ISQLiteDB {
// Convert query to SQL. // Convert query to SQL.
const params: any[] = []; const params: any[] = [];
const whereParts: string[] = []; let whereParts: string[] = [];
for (const colId of Object.keys(query.filters)) { for (const colId of Object.keys(query.filters)) {
const values = query.filters[colId]; const values = query.filters[colId];
// If values is empty, "IN ()" works in SQLite (always false), but wouldn't work in Postgres. // If values is empty, "IN ()" works in SQLite (always false), but wouldn't work in Postgres.
whereParts.push(`${quoteIdent(query.tableId)}.${quoteIdent(colId)} IN (${values.map(() => '?').join(', ')})`); whereParts.push(`${quoteIdent(query.tableId)}.${quoteIdent(colId)} IN (${values.map(() => '?').join(', ')})`);
params.push(...values); params.push(...values);
} }
whereParts = whereParts.concat(query.wheres ?? []);
const sql = this._getSqlForQuery(query, whereParts); const sql = this._getSqlForQuery(query, whereParts);
return this._getDB().allMarshal(sql, params); return this._getDB().allMarshal(sql, params);
} }
@ -883,6 +884,27 @@ export class DocStorage implements ISQLiteDB {
return columnValues; return columnValues;
} }
/**
* Variant of `decodeMarshalledData` that supports decoding data containing columns from
* multiple tables.
*
* Expects all column names in `marshalledData` to be prefixed with the table id and a
* trailing period (separator).
*/
public decodeMarshalledDataFromTables(marshalledData: Buffer | Uint8Array): BulkColValues {
const columnValues: BulkColValues = marshal.loads(marshalledData);
// Decode in-place to avoid unnecessary array creation.
for (const col of Object.keys(columnValues)) {
const [tableId, colId] = col.split('.');
const type = this._getGristType(tableId, colId);
const column = columnValues[col];
for (let i = 0; i < column.length; i++) {
column[i] = DocStorage._decodeValue(column[i], type, DocStorage._getSqlType(type));
}
}
return columnValues;
}
/** /**
* Applies stored actions received from data engine to the database by converting them to SQL * Applies stored actions received from data engine to the database by converting them to SQL
* statements and executing a serialized transaction. * statements and executing a serialized transaction.

View File

@ -27,6 +27,9 @@ export interface ExpandedQuery extends ServerQuery {
// A list of selections for regular data and data computed via formulas. // A list of selections for regular data and data computed via formulas.
selects?: string[]; selects?: string[];
// A list of conditions for filtering query results.
wheres?: string[];
} }
/** /**
@ -128,3 +131,78 @@ export function expandQuery(iquery: ServerQuery, docData: DocData, onDemandFormu
query.selects = [...selects]; query.selects = [...selects];
return query; return query;
} }
/**
* Build a query that relates two homogenous tables sharing a common set of columns,
* returning rows that exist in both tables (if they have differences), and rows from
* `leftTableId` that don't exist in `rightTableId`.
*
* In practice, this is currently only used for generating diffs and add/update actions
* for incremental imports into existing tables. Specifically, `leftTableId` is the
* source table, and `rightTableId` is the destination table.
*
* Columns from the query result are prefixed with the table id and a '.' separator.
*
* NOTE: Intended for internal use from trusted parts of Grist only.
*
* @param {string} leftTableId Name of the left table in the comparison.
* @param {string} rightTableId Name of the right table in the comparison.
* @param {Map<string, string>} selectColumns Map of left table column ids to their matching equivalent
* from the right table. All of these columns will be included in the result, aliased by table id.
* @param {Map<string, string>} joinColumns Map of left table column ids to their matching equivalent
* from the right table. These columns are used to join `leftTableID` to `rightTableId`.
* @returns {ExpandedQuery} The constructed query.
*/
export function buildComparisonQuery(leftTableId: string, rightTableId: string, selectColumns: Map<string, string>,
joinColumns: Map<string, string>): ExpandedQuery {
const query: ExpandedQuery = { tableId: leftTableId, filters: {} };
// Start accumulating the JOINS, SELECTS and WHERES needed for the query.
const joins: string[] = [];
const selects: string[] = [];
const wheres: string[] = [];
// Include the 'id' column from both tables.
selects.push(
`${quoteIdent(leftTableId)}.id AS ${quoteIdent(leftTableId + '.id')}`,
`${quoteIdent(rightTableId)}.id AS ${quoteIdent(rightTableId + '.id')}`
);
// Select columns from both tables using the table id as a prefix for each column name.
selectColumns.forEach((rightTableColumn, leftTableColumn) => {
const leftColumnAlias = `${leftTableId}.${leftTableColumn}`;
const rightColumnAlias = `${rightTableId}.${rightTableColumn}`;
selects.push(
`${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)} AS ${quoteIdent(leftColumnAlias)}`,
`${quoteIdent(rightTableId)}.${quoteIdent(rightTableColumn)} AS ${quoteIdent(rightColumnAlias)}`
);
});
// Join both tables on `joinColumns`, including unmatched rows from `leftTableId`.
const joinConditions: string[] = [];
joinColumns.forEach((rightTableColumn, leftTableColumn) => {
const leftExpression = `${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)}`;
const rightExpression = `${quoteIdent(rightTableId)}.${quoteIdent(rightTableColumn)}`;
joinConditions.push(`${leftExpression} = ${rightExpression}`);
});
joins.push(`LEFT JOIN ${quoteIdent(rightTableId)} ON ${joinConditions.join(' AND ')}`);
// Filter out matching rows where all non-join columns from both tables are identical.
const whereConditions: string[] = [];
for (const [leftTableColumn, rightTableColumn] of selectColumns.entries()) {
if (joinColumns.has(leftTableColumn)) { continue; }
const leftColumnAlias = quoteIdent(`${leftTableId}.${leftTableColumn}`);
const rightColumnAlias = quoteIdent(`${rightTableId}.${rightTableColumn}`);
// Only include rows that have differences in column values.
whereConditions.push(`${leftColumnAlias} IS NOT ${rightColumnAlias}`);
}
wheres.push(`(${whereConditions.join(' OR ')})`);
// Copy decisions to the query object, and return.
query.joins = joins;
query.selects = selects;
query.wheres = wheres;
return query;
}

View File

@ -544,6 +544,6 @@ async function createBackupFile(filePath: string, versionNum: number): Promise<s
* Validate and quote SQL identifiers such as table and column names. * Validate and quote SQL identifiers such as table and column names.
*/ */
export function quoteIdent(ident: string): string { export function quoteIdent(ident: string): string {
assert(/^\w+$/.test(ident), `SQL identifier is not valid: ${ident}`); assert(/^[\w.]+$/.test(ident), `SQL identifier is not valid: ${ident}`);
return `"${ident}"`; return `"${ident}"`;
} }

View File

@ -15,7 +15,8 @@ _import_transform_col_prefix = 'gristHelper_Import_'
def _gen_colids(transform_rule): def _gen_colids(transform_rule):
""" """
For a transform_rule with colIds = None, For a transform_rule with colIds = None,
fills in colIds generated from labels. fills in colIds generated from labels and returns the updated
transform_rule.
""" """
dest_cols = transform_rule["destCols"] dest_cols = transform_rule["destCols"]
@ -29,6 +30,8 @@ def _gen_colids(transform_rule):
for dest_col, col_id in zip(dest_cols, col_ids): for dest_col, col_id in zip(dest_cols, col_ids):
dest_col["colId"] = col_id dest_col["colId"] = col_id
return transform_rule
def _strip_prefixes(transform_rule): def _strip_prefixes(transform_rule):
"If transform_rule has prefixed _col_ids, strips prefix" "If transform_rule has prefixed _col_ids, strips prefix"
@ -40,68 +43,6 @@ def _strip_prefixes(transform_rule):
dest_col["colId"] = colId[len(_import_transform_col_prefix):] dest_col["colId"] = colId[len(_import_transform_col_prefix):]
def _is_blank(value):
"If value is blank (e.g. None, blank string), returns true."
if value is None:
return True
elif isinstance(value, six.string_types) and value.strip() == '':
return True
else:
return False
def _build_merge_col_map(column_data, merge_cols):
"""
Returns a dictionary with keys that are comprised of
the values from column_data for the columns in
merge_cols. The values are the row ids (index) in
column_data for that particular key; multiple row ids
imply that duplicates exist that contain the same values
for all columns in merge_cols.
Used for merging into tables where fast, constant-time lookups
are needed. For example, a source table can pass in its
column_data into this function to build the map, and the
destination table can then query the map using its own
values for the columns in merge_cols to check for any
matching rows that are candidates for updating.
"""
merge_col_map = defaultdict(list)
for row_id, key in enumerate(zip(*[column_data[col] for col in merge_cols])):
# If any part of the key is blank, don't include it in the map.
if any(_is_blank(val) for val in key):
continue
try:
merge_col_map[key].append(row_id + 1)
except TypeError:
pass # If key isn't hashable, don't include it in the map.
return merge_col_map
# Dictionary mapping merge strategy types from ActiveDocAPI.ts to functions
# that merge source and destination column values.
#
# NOTE: This dictionary should be kept in sync with the types in that file.
#
# All functions have the same signature: (src, dest) => output,
# where src and dest are column values from a source and destination
# table respectively, and output is either src or destination.
#
# For example, a key of replace-with-nonblank-source will return a merge function
# that returns the src argument if it's not blank. Otherwise it returns the
# dest argument. In the context of incremental imports, this is a function
# that update destination fields when the source field isn't blank, preserving
# existing values in the destination field that aren't replaced.
_merge_funcs = {
'replace-with-nonblank-source': lambda src, dest: dest if _is_blank(src) else src,
'replace-all-fields': lambda src, _: src,
'replace-blank-fields-only': lambda src, dest: src if _is_blank(dest) else dest
}
class ImportActions(object): class ImportActions(object):
def __init__(self, useractions, docmodel, engine): def __init__(self, useractions, docmodel, engine):
@ -130,11 +71,6 @@ class ImportActions(object):
# if importing into an existing table, and they are sometimes prefixed with # if importing into an existing table, and they are sometimes prefixed with
# _import_transform_col_prefix (if transform_rule comes from client) # _import_transform_col_prefix (if transform_rule comes from client)
# TransformAndFinishImport gets the full hidden_table (reparsed) and a transform_rule,
# (or can use a default one if it's not provided). It fills in colIds if necessary and
# strips colId prefixes. It also skips creating some formula columns
# (ones with trivial copy formulas) as an optimization.
def _MakeDefaultTransformRule(self, hidden_table_id, dest_table_id): def _MakeDefaultTransformRule(self, hidden_table_id, dest_table_id):
""" """
@ -174,9 +110,21 @@ class ImportActions(object):
# doesnt generate other fields of transform_rule, but sandbox only used destCols # doesnt generate other fields of transform_rule, but sandbox only used destCols
def FillTransformRuleColIds(self, transform_rule):
"""
Takes a transform rule with missing dest col ids, and returns it
with sanitized and de-duplicated ids generated from the original
column labels.
# Returns NOTE: This work could be done outside the data engine, but the logic
def _MakeImportTransformColumns(self, hidden_table_id, transform_rule, gen_all): for cleaning column identifiers is quite complex and currently only lives
in the data engine. In the future, it may be worth porting it to
Node to avoid an extra trip to the data engine.
"""
return _gen_colids(transform_rule)
def MakeImportTransformColumns(self, hidden_table_id, transform_rule, gen_all):
""" """
Makes prefixed columns in the grist hidden import table (hidden_table_id) Makes prefixed columns in the grist hidden import table (hidden_table_id)
@ -197,7 +145,7 @@ class ImportActions(object):
#wrap dest_cols as namedtuples, to allow access like 'dest_col.param' #wrap dest_cols as namedtuples, to allow access like 'dest_col.param'
dest_cols = [namedtuple('col', c.keys())(*c.values()) for c in transform_rule['destCols']] dest_cols = [namedtuple('col', c.keys())(*c.values()) for c in transform_rule['destCols']]
log.debug("_MakeImportTransformColumns: {}".format("gen_all" if gen_all else "optimize")) log.debug("MakeImportTransformColumns: {}".format("gen_all" if gen_all else "optimize"))
#create prefixed formula column for each of dest_cols #create prefixed formula column for each of dest_cols
#take formula from transform_rule #take formula from transform_rule
@ -222,69 +170,6 @@ class ImportActions(object):
return new_cols return new_cols
def _MergeColumnData(self, dest_table_id, column_data, merge_options):
"""
Merges column_data into table dest_table_id, replacing rows that
match all merge_cols with values from column_data, and adding
unmatched rows to the end of table dest_table_id.
dest_table_id: id of destination table
column_data: column data from source table to merge into destination table
merge_cols: list of column ids to use as keys for merging
"""
dest_table = self._engine.tables[dest_table_id]
merge_cols = merge_options['mergeCols']
merge_col_map = _build_merge_col_map(column_data, merge_cols)
updated_row_ids = []
updated_rows = {}
new_rows = {}
matched_src_table_rows = set()
# Initialize column data for new and updated rows.
for col_id in six.iterkeys(column_data):
updated_rows[col_id] = []
new_rows[col_id] = []
strategy_type = merge_options['mergeStrategy']['type']
merge = _merge_funcs[strategy_type]
# Compute which source table rows should update existing records in destination table.
dest_cols = [dest_table.get_column(col) for col in merge_cols]
for dest_row_id in dest_table.row_ids:
lookup_key = tuple(col.raw_get(dest_row_id) for col in dest_cols)
try:
src_row_ids = merge_col_map.get(lookup_key)
except TypeError:
# We can arrive here if lookup_key isn't hashable. If that's the case, skip
# this row since we can't efficiently search for a match in the source table.
continue
if src_row_ids:
matched_src_table_rows.update(src_row_ids)
updated_row_ids.append(dest_row_id)
for col_id, col_vals in six.iteritems(column_data):
src_val = col_vals[src_row_ids[-1] - 1]
dest_val = dest_table.get_column(col_id).raw_get(dest_row_id)
updated_rows[col_id].append(merge(src_val, dest_val))
num_src_rows = len(column_data[merge_cols[0]])
# Compute which source table rows should be added to destination table as new records.
for row_id in xrange(1, num_src_rows + 1):
# If we've matched against the row before, we shouldn't add it.
if row_id in matched_src_table_rows:
continue
for col_id, col_val in six.iteritems(column_data):
new_rows[col_id].append(col_val[row_id - 1])
self._useractions.BulkUpdateRecord(dest_table_id, updated_row_ids, updated_rows)
self._useractions.BulkAddRecord(dest_table_id,
[None] * (num_src_rows - len(matched_src_table_rows)), new_rows)
def DoGenImporterView(self, source_table_id, dest_table_id, transform_rule = None): def DoGenImporterView(self, source_table_id, dest_table_id, transform_rule = None):
""" """
Generates viewsections/formula columns for importer Generates viewsections/formula columns for importer
@ -336,7 +221,7 @@ class ImportActions(object):
raise ValueError(errstr + repr(transform_rule)) raise ValueError(errstr + repr(transform_rule))
new_cols = self._MakeImportTransformColumns(source_table_id, transform_rule, gen_all=True) new_cols = self.MakeImportTransformColumns(source_table_id, transform_rule, gen_all=True)
# we want to generate all columns so user can see them and edit # we want to generate all columns so user can see them and edit
#========= Create new transform view section. #========= Create new transform view section.
@ -348,94 +233,3 @@ class ImportActions(object):
self._docmodel.add(new_section.fields, colRef=new_cols) self._docmodel.add(new_section.fields, colRef=new_cols)
return new_section.id return new_section.id
def DoTransformAndFinishImport(self, hidden_table_id, dest_table_id,
into_new_table, transform_rule,
merge_options):
"""
Finishes import into new or existing table depending on flag 'into_new_table'
Returns destination table id. (new or existing)
"""
hidden_table = self._engine.tables[hidden_table_id]
hidden_table_rec = self._docmodel.tables.lookupOne(tableId=hidden_table_id)
src_cols = {c.colId for c in hidden_table_rec.columns}
log.debug("Starting TransformAndFinishImport, dest_cols:\n "
+ str(transform_rule["destCols"] if transform_rule else "None"))
log.debug("hidden_table_id:" + hidden_table_id)
log.debug("hidden table columns: "
+ str([(a.colId, a.label, a.type) for a in hidden_table_rec.columns]))
log.debug("dest_table_id: "
+ str(dest_table_id) + ('(NEW)' if into_new_table else '(Existing)'))
# === fill in blank transform rule
if not transform_rule:
transform_dest = None if into_new_table else dest_table_id
transform_rule = self._MakeDefaultTransformRule(hidden_table_id, transform_dest)
dest_cols = transform_rule["destCols"]
# === Normalize transform rule (gen colids)
_strip_prefixes(transform_rule) #when transform_rule from client, colIds will be prefixed
if into_new_table: # 'colId's are undefined if making new table
_gen_colids(transform_rule)
else:
if None in (dc["colId"] for dc in dest_cols):
errstr = "colIds must be defined in transform_rule for importing into existing table: "
raise ValueError(errstr + repr(transform_rule))
log.debug("Normalized dest_cols:\n " + str(dest_cols))
# ======== Make and update formula columns
# Make columns from transform_rule (now with filled-in colIds colIds),
# gen_all false skips copy columns (faster)
new_cols = self._MakeImportTransformColumns(hidden_table_id, transform_rule, gen_all=False)
self._engine._bring_all_up_to_date()
# ========= Fetch Data for each col
# (either copying, blank, or from formula column)
row_ids = list(hidden_table.row_ids) #fetch row_ids now, before we remove hidden_table
log.debug("num rows: " + str(len(row_ids)))
column_data = {} # { col:[values...], ... }
for curr_col in dest_cols:
formula = curr_col["formula"].strip()
if formula:
if (formula.startswith("$") and formula[1:] in src_cols): #copy formula
src_col_id = formula[1:]
else: #normal formula, fetch from prefix column
src_col_id = _import_transform_col_prefix + curr_col["colId"]
log.debug("Copying from: " + src_col_id)
src_col = hidden_table.get_column(src_col_id)
column_data[curr_col["colId"]] = [src_col.raw_get(r) for r in row_ids]
# ========= Cleanup, Prepare new table (if needed), insert data
self._useractions.RemoveTable(hidden_table_id)
if into_new_table:
col_specs = [ {'type': curr_col['type'], 'id': curr_col['colId'], 'label': curr_col['label']}
for curr_col in dest_cols]
log.debug("Making new table. Columns:\n " + str(col_specs))
new_table = self._useractions.AddTable(dest_table_id, col_specs)
dest_table_id = new_table['table_id']
if not merge_options.get('mergeCols'):
self._useractions.BulkAddRecord(dest_table_id, [None] * len(row_ids), column_data)
else:
self._MergeColumnData(dest_table_id, column_data, merge_options)
log.debug("Finishing TransformAndFinishImport")
return dest_table_id

View File

@ -1,578 +0,0 @@
# pylint: disable=line-too-long
import logger
import test_engine
log = logger.Logger(__name__, logger.INFO)
#TODO: test naming (basics done, maybe check numbered column renaming)
#TODO: check autoimport into existing table (match up column names)
class TestImportTransform(test_engine.EngineTestCase):
def init_state(self):
# Add source table
self.apply_user_action(['AddTable', 'Hidden_table', [
{'id': 'employee_id', 'type': 'Int'},
{'id': 'fname', 'type': 'Text'},
{'id': 'mname', 'type': 'Text'},
{'id': 'lname', 'type': 'Text'},
{'id': 'email', 'type': 'Text'},
]])
self.apply_user_action(['BulkAddRecord', 'Hidden_table', [1, 2, 3, 4, 5, 6, 7], {
'employee_id': [1, 2, 3, 4, 5, 6, 7],
'fname': ['Bob', 'Carry', 'Don', 'Amir', 'Ken', 'George', 'Barbara'],
'mname': ['F.', None, 'B.', '', 'C.', '', 'D.'],
'lname': ['Nike', 'Jonson', "Yoon", "Greene", "Foster", "Huang", "Kinney"],
'email': [
'bob@example.com', None, "don@example.com", "amir@example.com",
"ken@example.com", "", "barbara@example.com"
]
}])
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[1, "manualSort", "ManualSortPos", False, ""],
[2, "employee_id", "Int", False, ""],
[3, "fname", "Text", False, ""],
[4, "mname", "Text", False, ""],
[5, "lname", "Text", False, ""],
[6, "email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 1)
#Filled in colids for existing table
self.TEMP_transform_rule_colids = {
"destCols": [
{ "colId": "Employee_ID", "label": "Employee ID",
"type": "Int", "formula": "$employee_id" },
{ "colId": "First_Name", "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": "Last_Name", "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": "Middle_Initial", "label": "Middle Initial",
"type": "Text", "formula": "$mname[0] if $mname else ''" },
{ "colId": "Email", "label": "Email",
"type": "Text", "formula": "$email" },
#{ "colId": "Blank", "label": "Blank", // Destination1 has no blank column
# "type": "Text", "formula": "" },
]
}
#Then try it with blank in colIds (for new tables)
self.TEMP_transform_rule_no_colids = {
"destCols": [
{ "colId": None, "label": "Employee ID",
"type": "Int", "formula": "$employee_id" },
{ "colId": None, "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": None, "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": None, "label": "Middle Initial",
"type": "Text", "formula": "$mname[0] if $mname else ''" },
{ "colId": None, "label": "Email",
"type": "Text", "formula": "$email" },
{ "colId": None, "label": "Blank",
"type": "Text", "formula": "" },
]
}
# Add destination table which contains columns corresponding to source table with different names
self.apply_user_action(['AddTable', 'Destination1', [
{'label': 'Employee ID', 'id': 'Employee_ID', 'type': 'Int'},
{'label': 'First Name', 'id': 'First_Name', 'type': 'Text'},
{'label': 'Last Name', 'id': 'Last_Name', 'type': 'Text'},
{'label': 'Middle Initial', 'id': 'Middle_Initial', 'type': 'Text'},
{'label': 'Email', 'id': 'Email', 'type': 'Text'}]])
self.apply_user_action(['BulkAddRecord', 'Destination1', [1, 2, 3], {
'Employee_ID': [1, 2, 3],
'First_Name': ['Bob', 'Carry', 'Don'],
'Last_Name': ['Nike', 'Jonson', "Yoon"],
'Middle_Initial': ['F.', 'M.', None],
'Email': ['', 'carry.m.jonson@example.com', 'don.b.yoon@example.com']
}])
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Verify created tables
self.assertPartialData("_grist_Tables", ["id", "tableId"], [
[1, "Hidden_table"],
[2, "Destination1"]
])
def test_finish_import_into_new_table(self):
# Add source and destination tables
self.init_state()
#into_new_table = True, transform_rule : no colids (will be generated for new table), merge_options = {}
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, self.TEMP_transform_rule_no_colids, {}])
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["AddTable", "NewTable", [{"formula": "", "id": "manualSort", "isFormula": False, "type": "ManualSortPos"}, {"formula": "", "id": "Employee_ID", "isFormula": False, "type": "Int"}, {"formula": "", "id": "First_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Last_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Middle_Initial", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Email", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Blank", "isFormula": False, "type": "Text"}]],
["AddRecord", "_grist_Tables", 3, {"primaryViewId": 0, "tableId": "NewTable"}],
["BulkAddRecord", "_grist_Tables_column", [13, 14, 15, 16, 17, 18, 19], {"colId": ["manualSort", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "Blank"], "formula": ["", "", "", "", "", "", ""], "isFormula": [False, False, False, False, False, False, False], "label": ["manualSort", "Employee ID", "First Name", "Last Name", "Middle Initial", "Email", "Blank"], "parentId": [3, 3, 3, 3, 3, 3, 3], "parentPos": [13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0], "type": ["ManualSortPos", "Int", "Text", "Text", "Text", "Text", "Text"], "widgetOptions": ["", "", "", "", "", "", ""]}],
["AddRecord", "_grist_Views", 3, {"name": "NewTable", "type": "raw_data"}],
["AddRecord", "_grist_TabBar", 3, {"tabPos": 3.0, "viewRef": 3}],
["AddRecord", "_grist_Pages", 3, {"indentation": 0, "pagePos": 3.0, "viewRef": 3}],
["AddRecord", "_grist_Views_section", 3, {"borderWidth": 1, "defaultWidth": 100, "parentId": 3, "parentKey": "record", "sortColRefs": "[]", "tableRef": 3, "title": ""}],
["BulkAddRecord", "_grist_Views_section_field", [11, 12, 13, 14, 15, 16], {"colRef": [14, 15, 16, 17, 18, 19], "parentId": [3, 3, 3, 3, 3, 3], "parentPos": [11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}],
["UpdateRecord", "_grist_Tables", 3, {"primaryViewId": 3}],
["BulkAddRecord", "NewTable", [1, 2, 3, 4, 5, 6, 7], {"Email": ["bob@example.com", None, "don@example.com", "amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [1, 2, 3, 4, 5, 6, 7], "First_Name": ["Bob", "Carry", "Don", "Amir", "Ken", "George", "Barbara"], "Last_Name": ["Nike", "Jonson", "Yoon", "Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["F", "", "B", "", "C", "", "D"], "manualSort": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]}],
]
})
#1-6 in hidden table, 7-12 in destTable, 13-19 for new table
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[13, "manualSort", "ManualSortPos", False, ""],
[14, "Employee_ID", "Int", False, ""],
[15, "First_Name", "Text", False, ""],
[16, "Last_Name", "Text", False, ""],
[17, "Middle_Initial", "Text", False, ""],
[18, "Email", "Text", False, ""],
[19, "Blank", "Text", False, ""],
], rows=lambda r: r.parentId.id == 3)
self.assertTableData('NewTable', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "Blank", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", "", 1.0],
[2, 2, "Carry", "Jonson", "", None, "", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", "", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", "", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", "", 5.0],
[6, 6, "George", "Huang", "", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", "", 7.0],
])
# Verify removed hidden table and add the new one
self.assertPartialData("_grist_Tables", ["id", "tableId"], [
[2, "Destination1"],
[3, "NewTable"]
])
def test_finish_import_into_existing_table(self):
# Add source and destination tables
self.init_state()
#into_new_table = False, transform_rule : colids, merge_options = None
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids, None])
#1-6 in hidden table, 7-12 in destTable
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# First 3 rows were already in Destination1 before import
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", None, "don.b.yoon@example.com", 3.0],
[4, 1, "Bob", "Nike", "F", "bob@example.com", 4.0],
[5, 2, "Carry", "Jonson", "", None, 5.0],
[6, 3, "Don", "Yoon", "B", "don@example.com", 6.0],
[7, 4, "Amir", "Greene", "", "amir@example.com", 7.0],
[8, 5, "Ken", "Foster", "C", "ken@example.com", 8.0],
[9, 6, "George", "Huang", "", "", 9.0],
[10, 7, "Barbara", "Kinney", "D", "barbara@example.com", 10.0],
])
# Verify removed hidden table
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
#does the same thing using a blank transform rule
def test_finish_import_into_new_table_blank(self):
# Add source and destination tables
self.init_state()
#into_new_table = True, transform_rule = None, merge_options = None
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, None, None])
#1-6 in src table, 7-12 in hiddentable
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[13, "manualSort", "ManualSortPos", False, ""],
[14, "employee_id", "Int", False, ""],
[15, "fname", "Text", False, ""],
[16, "mname", "Text", False, ""],
[17, "lname", "Text", False, ""],
[18, "email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 3)
self.assertTableData('NewTable', cols="all", data=[
["id", "employee_id", "fname", "lname", "mname", "email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", None, None, 2.0],
[3, 3, "Don", "Yoon", "B.", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C.", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D.", "barbara@example.com", 7.0],
])
# Verify removed hidden table and add the new one
self.assertPartialData("_grist_Tables", ["id", "tableId"], [
[2, "Destination1"],
[3, "NewTable"]
])
def test_finish_import_into_existing_table_with_single_merge_col(self):
# Add source and destination tables.
self.init_state()
# Use 'Employee_ID' as the merge column, updating existing employees in Destination1 with the same employee id.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don@example.com"], "Middle_Initial": ["F", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that Destination1 has no duplicates and that previous records (1 - 3) are updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_finish_import_into_existing_table_with_multiple_merge_cols(self):
# Add source and destination tables.
self.init_state()
# Use 'First_Name' and 'Last_Name' as the merge columns, updating existing employees in Destination1 with the same name.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['First_Name', 'Last_Name'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don@example.com"], "Middle_Initial": ["F", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that Destination1 has no duplicates and that previous records (1 - 3) are updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_finish_import_into_existing_table_with_no_matching_merge_cols(self):
# Add source and destination tables.
self.init_state()
# Use 'Email' as the merge column: existing employees in Destination1 have different emails, so none should match incoming data.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Email'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkAddRecord", "Destination1", [4, 5, 6, 7, 8, 9, 10], {"Email": ["bob@example.com", None, "don@example.com", "amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [1, 2, 3, 4, 5, 6, 7], "First_Name": ["Bob", "Carry", "Don", "Amir", "Ken", "George", "Barbara"], "Last_Name": ["Nike", "Jonson", "Yoon", "Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["F", "", "B", "", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that no existing records were updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", None, "don.b.yoon@example.com", 3.0],
[4, 1, "Bob", "Nike", "F", "bob@example.com", 4.0],
[5, 2, "Carry", "Jonson", "", None, 5.0],
[6, 3, "Don", "Yoon", "B", "don@example.com", 6.0],
[7, 4, "Amir", "Greene", "", "amir@example.com", 7.0],
[8, 5, "Ken", "Foster", "C", "ken@example.com", 8.0],
[9, 6, "George", "Huang", "", "", 9.0],
[10, 7, "Barbara", "Kinney", "D", "barbara@example.com", 10.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_replace_all_fields_merge_strategy(self):
# Add source and destination tables.
self.init_state()
# Use replace all fields strategy on the 'Employee_ID' column.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-all-fields'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 2, 3], {"Email": ["bob@example.com", None, "don@example.com"], "Middle_Initial": ["F", "", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that existing fields were replaced with incoming fields.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "", None, 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_replace_blank_fields_only_merge_strategy(self):
# Add source and destination tables.
self.init_state()
# Use replace blank fields only strategy on the 'Employee_ID' column.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-blank-fields-only'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don.b.yoon@example.com"], "Middle_Initial": ["F.", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that only blank existing fields were updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don.b.yoon@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_merging_updates_all_duplicates_in_destination_table(self):
# Add source and destination tables.
self.init_state()
# Add duplicates to the destination table with different values than original.
self.apply_user_action(['BulkAddRecord', 'Destination1', [4, 5], {
'Employee_ID': [3, 3],
'First_Name': ['Don', 'Don'],
'Last_Name': ["Yoon", "Yoon"],
'Middle_Initial': [None, 'B'],
'Email': ['don.yoon@example.com', 'yoon.don@example.com']
}])
# Use replace with nonblank source strategy on the 'Employee_ID' column.
self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that all duplicates were updated with new data from the source table.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 3, "Don", "Yoon", "B", "don@example.com", 4.0],
[5, 3, "Don", "Yoon", "B", "don@example.com", 5.0],
[6, 4, "Amir", "Greene", "", "amir@example.com", 6.0],
[7, 5, "Ken", "Foster", "C", "ken@example.com", 7.0],
[8, 6, "George", "Huang", "", "", 8.0],
[9, 7, "Barbara", "Kinney", "D", "barbara@example.com", 9.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_merging_uses_latest_duplicate_in_source_table_for_matching(self):
# Add source and destination tables.
self.init_state()
# Add duplicates to the source table with different values than the original.
self.apply_user_action(['BulkAddRecord', 'Hidden_table', [8, 9], {
'employee_id': [3, 3],
'fname': ['Don', 'Don'],
'lname': ["Yoon", "yoon"],
'mname': [None, None],
'email': ['d.yoon@example.com', 'yoon.don@example.com']
}])
# Use replace with nonblank source strategy on the 'Employee_ID' column.
self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the last record for Don Yoon in the source table was used for updating the destination table.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "yoon", None, "yoon.don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])

View File

@ -1550,8 +1550,9 @@ class UserActions(object):
return self._import_actions.DoGenImporterView(source_table_id, dest_table_id, transform_rule) return self._import_actions.DoGenImporterView(source_table_id, dest_table_id, transform_rule)
@useraction @useraction
def TransformAndFinishImport(self, hidden_table_id, dest_table_id, into_new_table, def MakeImportTransformColumns(self, source_table_id, transform_rule, gen_all):
transform_rule, merge_options = None): return self._import_actions.MakeImportTransformColumns(source_table_id, transform_rule, gen_all)
return self._import_actions.DoTransformAndFinishImport(hidden_table_id, dest_table_id,
into_new_table, transform_rule, @useraction
merge_options or {}) def FillTransformRuleColIds(self, transform_rule):
return self._import_actions.FillTransformRuleColIds(transform_rule)