(core) Enable incremental imports

Summary:
The import dialog now has an option to 'Update existing records',
which when checked will allow for selection of 1 or more fields
to match source and destination tables on.

If all fields match, then the matched record in the
destination table will be merged with the incoming record
from the source table. This means the incoming values will
replace the destination table values, unless the incoming
values are blank.

Additional merge strategies are implemented in the data
engine, but the import dialog only uses one of the
strategies currently. The others can be exposed in the UI
in the future, and tweak the behavior of how source
and destination values should be merged in different contexts,
such as when blank values exist.

Test Plan: Python and browser tests.

Reviewers: paulfitz

Reviewed By: paulfitz

Subscribers: alexmojaki

Differential Revision: https://phab.getgrist.com/D3020
pull/115/head
George Gevoian 3 years ago
parent a543e5194a
commit 8a7edb6257

@ -526,7 +526,7 @@ export class GristDoc extends DisposableWithEvents {
multiple: true});
if (uploadResult) {
const dataSource = {uploadId: uploadResult.uploadId, transforms: []};
const importResult = await this.docComm.finishImportFiles(dataSource, {}, []);
const importResult = await this.docComm.finishImportFiles(dataSource, [], {});
const tableId = importResult.tables[0].hiddenTableId;
const tableRowModel = this.docModel.dataTables[tableId].tableMetaRow;
await this.openDocPage(tableRowModel.primaryViewId());

@ -15,14 +15,16 @@ import {openFilePicker} from "app/client/ui/FileDialog";
import {bigBasicButton, bigPrimaryButton} from 'app/client/ui2018/buttons';
import {colors, testId, vars} from 'app/client/ui2018/cssVars';
import {icon} from 'app/client/ui2018/icons';
import {IOptionFull, linkSelect} from 'app/client/ui2018/menus';
import {IOptionFull, linkSelect, multiSelect} from 'app/client/ui2018/menus';
import {cssModalButtons, cssModalTitle} from 'app/client/ui2018/modals';
import {DataSourceTransformed, ImportResult, ImportTableResult} from "app/common/ActiveDocAPI";
import {TransformColumn, TransformRule, TransformRuleMap} from "app/common/ActiveDocAPI";
import {DataSourceTransformed, ImportResult, ImportTableResult, MergeOptions,
MergeStrategy, TransformColumn, TransformRule, TransformRuleMap} from "app/common/ActiveDocAPI";
import {byteString} from "app/common/gutil";
import {UploadResult} from 'app/common/uploads';
import {ParseOptions, ParseOptionSchema} from 'app/plugin/FileParserAPI';
import {Computed, Disposable, dom, DomContents, IDisposable, Observable, styled} from 'grainjs';
import {Computed, Disposable, dom, DomContents, IDisposable, MutableObsArray, obsArray, Observable,
styled} from 'grainjs';
import {labeledSquareCheckbox} from "app/client/ui2018/checkbox";
// Special values for import destinations; null means "new table".
// TODO We should also support "skip table" (needs server support), so that one can open, say,
@ -45,6 +47,15 @@ export interface SourceInfo {
transformSection: Observable<ViewSectionRec>;
destTableId: Observable<DestId>;
}
// UI state of selected merge options for each source table (from SourceInfo).
interface MergeOptionsState {
[srcTableId: string]: {
updateExistingRecords: Observable<boolean>;
mergeCols: MutableObsArray<string>;
mergeStrategy: Observable<MergeStrategy>;
hasInvalidMergeCols: Observable<boolean>;
} | undefined;
}
/**
* Importer manages an import files to Grist tables and shows Preview
@ -119,6 +130,7 @@ export class Importer extends Disposable {
private _uploadResult?: UploadResult;
private _screen: PluginScreen;
private _mergeOptions: MergeOptionsState = {};
private _parseOptions = Observable.create<ParseOptions>(this, {});
private _sourceInfoArray = Observable.create<SourceInfo[]>(this, []);
private _sourceInfoSelected = Observable.create<SourceInfo|null>(this, null);
@ -223,6 +235,22 @@ export class Importer extends Disposable {
return {uploadId: upload.uploadId, transforms};
}
private _getMergeOptions(upload: UploadResult): Array<MergeOptions|null> {
return upload.files.map((_file, i) => {
const sourceInfo = this._sourceInfoArray.get().find(info => info.uploadFileIndex === i);
if (!sourceInfo) { return null; }
const mergeOptions = this._mergeOptions[sourceInfo.hiddenTableId];
if (!mergeOptions) { return null; }
const {updateExistingRecords, mergeCols, mergeStrategy} = mergeOptions;
return {
mergeCols: updateExistingRecords.get() ? mergeCols.get() : [],
mergeStrategy: mergeStrategy.get()
};
});
}
private _createTransformRuleMap(uploadFileIndex: number): TransformRuleMap {
const result: TransformRuleMap = {};
for (const sourceInfo of this._sourceInfoArray.get()) {
@ -276,6 +304,16 @@ export class Importer extends Disposable {
throw new Error("No data was imported");
}
this._mergeOptions = {};
this._getHiddenTableIds().forEach(tableId => {
this._mergeOptions[tableId] = {
updateExistingRecords: Observable.create(null, false),
mergeCols: obsArray(),
mergeStrategy: Observable.create(null, {type: 'replace-with-nonblank-source'}),
hasInvalidMergeCols: Observable.create(null, false)
};
});
// Select the first sourceInfo to show in preview.
this._sourceInfoSelected.set(this._sourceInfoArray.get()[0] || null);
@ -287,11 +325,16 @@ export class Importer extends Disposable {
}
}
private async _finishImport(upload: UploadResult) {
private async _maybeFinishImport(upload: UploadResult) {
const isConfigValid = this._validateImportConfiguration();
if (!isConfigValid) { return; }
this._screen.renderSpinner();
const parseOptions = {...this._parseOptions.get(), NUM_ROWS: 0};
const mergeOptions = this._getMergeOptions(upload);
const importResult: ImportResult = await this._docComm.finishImportFiles(
this._getTransformedDataSource(upload), parseOptions, this._getHiddenTableIds());
this._getTransformedDataSource(upload), this._getHiddenTableIds(), {mergeOptions, parseOptions});
if (importResult.tables[0].hiddenTableId) {
const tableRowModel = this._gristDoc.docModel.dataTables[importResult.tables[0].hiddenTableId].tableMetaRow;
@ -310,6 +353,28 @@ export class Importer extends Disposable {
this.dispose();
}
private _resetTableMergeOptions(tableId: string) {
this._mergeOptions[tableId]?.mergeCols.set([]);
}
private _validateImportConfiguration(): boolean {
let isValid = true;
const selectedSourceInfo = this._sourceInfoSelected.get();
if (!selectedSourceInfo) { return isValid; } // No configuration to validate.
const mergeOptions = this._mergeOptions[selectedSourceInfo.hiddenTableId];
if (!mergeOptions) { return isValid; } // No configuration to validate.
const {updateExistingRecords, mergeCols, hasInvalidMergeCols} = mergeOptions;
if (updateExistingRecords.get() && mergeCols.get().length === 0) {
hasInvalidMergeCols.set(true);
isValid = false;
}
return isValid;
}
private _buildModalTitle(rightElement?: DomContents) {
const title = this._importSourceElem ? this._importSourceElem.importSource.label : 'Import from file';
return cssModalHeader(cssModalTitle(title), rightElement);
@ -329,18 +394,64 @@ export class Importer extends Disposable {
cssTableList(
dom.forEach(this._sourceInfoArray, (info) => {
const destTableId = Computed.create(null, (use) => use(info.destTableId))
.onWrite((destId) => this._updateTransformSection(info, destId));
.onWrite((destId) => {
this._resetTableMergeOptions(info.hiddenTableId);
void this._updateTransformSection(info, destId);
});
return cssTableInfo(
dom.autoDispose(destTableId),
cssTableLine(cssToFrom('From'),
cssTableSource(getSourceDescription(info, upload), testId('importer-from'))),
cssTableLine(cssToFrom('To'), linkSelect<DestId>(destTableId, this._destTables)),
cssTableInfo.cls('-selected', (use) => use(this._sourceInfoSelected) === info),
dom.on('click', () => this._sourceInfoSelected.set(info)),
dom.on('click', () => {
if (info === this._sourceInfoSelected.get() || !this._validateImportConfiguration()) {
return;
}
this._sourceInfoSelected.set(info);
}),
testId('importer-source'),
);
}),
),
dom.maybe(this._sourceInfoSelected, (info) =>
dom.maybe(info.destTableId, () => {
const {mergeCols, updateExistingRecords, hasInvalidMergeCols} = this._mergeOptions[info.hiddenTableId]!;
return cssMergeOptions(
cssMergeOptionsToggle(labeledSquareCheckbox(
updateExistingRecords,
'Update existing records',
testId('importer-update-existing-records')
)),
dom.maybe(updateExistingRecords, () => [
cssMergeOptionsMessage(
'Imported rows will be merged with records that have the same values for all of these fields:',
testId('importer-merge-fields-message')
),
dom.domComputed(info.transformSection, section => {
// When changes are made to selected fields, reset the multiSelect error observable.
const invalidColsListener = mergeCols.addListener((val, _prev) => {
if (val.length !== 0 && hasInvalidMergeCols.get()) {
hasInvalidMergeCols.set(false);
}
});
return [
dom.autoDispose(invalidColsListener),
multiSelect(
mergeCols,
section.viewFields().peek().map(field => field.label()),
{
placeholder: 'Select fields to match on',
error: hasInvalidMergeCols
},
testId('importer-merge-fields-select')
),
];
})
])
);
})
),
dom.maybe(this._previewViewSection, () => cssSectionHeader('Preview')),
dom.maybe(this._previewViewSection, (viewSection) => {
const gridView = this._createPreview(viewSection);
@ -353,7 +464,7 @@ export class Importer extends Disposable {
),
cssModalButtons(
bigPrimaryButton('Import',
dom.on('click', () => this._finishImport(upload)),
dom.on('click', () => this._maybeFinishImport(upload)),
testId('modal-confirm'),
),
bigBasicButton('Cancel',
@ -480,3 +591,16 @@ const cssPreviewGrid = styled('div', `
height: 300px;
border: 1px solid ${colors.darkGrey};
`);
const cssMergeOptions = styled('div', `
margin-bottom: 16px;
`);
const cssMergeOptionsToggle = styled('div', `
margin-bottom: 8px;
`);
const cssMergeOptionsMessage = styled('div', `
color: ${colors.slate};
margin-bottom: 8px;
`);

@ -5,10 +5,10 @@ import {cssSelectBtn} from 'app/client/ui2018/select';
import {IconName} from 'app/client/ui2018/IconList';
import {icon} from 'app/client/ui2018/icons';
import {commonUrls} from 'app/common/gristUrls';
import {dom, DomElementArg, DomElementMethod} from 'grainjs';
import {MaybeObsArray, Observable, styled} from 'grainjs';
import {Computed, dom, DomElementArg, DomElementMethod, MaybeObsArray, MutableObsArray, Observable,
styled} from 'grainjs';
import * as weasel from 'popweasel';
import {IAutocompleteOptions} from 'popweasel';
import {cssCheckboxSquare, cssLabel, cssLabelText} from 'app/client/ui2018/checkbox';
export interface IOptionFull<T> {
value: T;
@ -132,6 +132,95 @@ export function linkSelect<T>(obs: Observable<T>, optionArray: MaybeObsArray<IOp
return elem;
}
export interface IMultiSelectUserOptions {
placeholder?: string;
error?: Observable<boolean>;
}
/**
* Creates a select dropdown widget that supports selecting multiple options.
*
* The observable array `selectedOptions` reflects the selected options, and
* `availableOptions` is an array (normal or observable) of selectable options.
* These may either be strings, or {label, value} objects.
*/
export function multiSelect<T>(selectedOptions: MutableObsArray<T>,
availableOptions: MaybeObsArray<IOption<T>>,
options: IMultiSelectUserOptions = {},
...domArgs: DomElementArg[]) {
const selectedOptionsSet = Computed.create(null, selectedOptions, (_use, opts) => new Set(opts));
const selectedOptionsText = Computed.create(null, selectedOptionsSet, (use, selectedOpts) => {
if (selectedOpts.size === 0) {
return options.placeholder ?? 'Select fields';
}
const optionArray = Array.isArray(availableOptions) ? availableOptions : use(availableOptions);
return optionArray
.filter(opt => selectedOpts.has(weasel.getOptionFull(opt).value))
.map(opt => weasel.getOptionFull(opt).label)
.join(', ');
});
function buildMultiSelectMenu(ctl: weasel.IOpenController) {
return cssMultiSelectMenu(
{ tabindex: '-1' }, // Allow menu to be focused.
dom.cls(menuCssClass),
dom.onKeyDown({
Enter: () => ctl.close(),
Escape: () => ctl.close()
}),
elem => {
// Set focus on open, so that keyboard events work.
setTimeout(() => elem.focus(), 0);
// Sets menu width to match parent container (button) width.
const style = elem.style;
style.minWidth = ctl.getTriggerElem().getBoundingClientRect().width + 'px';
style.marginLeft = style.marginRight = '0';
},
dom.domComputed(selectedOptionsSet, selectedOpts => {
return dom.forEach(availableOptions, option => {
const fullOption = weasel.getOptionFull(option);
return cssCheckboxLabel(
cssCheckboxSquare(
{type: 'checkbox'},
dom.prop('checked', selectedOpts.has(fullOption.value)),
dom.on('change', (_ev, elem) => {
if (elem.checked) {
selectedOptions.push(fullOption.value);
} else {
selectedOpts.delete(fullOption.value);
selectedOptions.set([...selectedOpts]);
}
}),
dom.style('position', 'relative'),
testId('multi-select-menu-option-checkbox')
),
cssCheckboxText(fullOption.label, testId('multi-select-menu-option-text')),
testId('multi-select-menu-option')
);
});
}),
testId('multi-select-menu')
);
}
return cssSelectBtn(
dom.autoDispose(selectedOptionsSet),
dom.autoDispose(selectedOptionsText),
cssMultiSelectSummary(dom.text(selectedOptionsText)),
icon('Dropdown'),
elem => {
weasel.setPopupToCreateDom(elem, ctl => buildMultiSelectMenu(ctl), weasel.defaultMenuOptions);
},
dom.style('border', use => {
return options.error && use(options.error) ? '1px solid red' : `1px solid ${colors.darkGrey}`;
}),
...domArgs
);
}
/**
* Creates a select dropdown widget that is more ideal for forms. Implemented using the <select>
* element to work with browser form autofill and typing in the desired value to quickly set it.
@ -207,7 +296,7 @@ export function upgradeText(needUpgrade: boolean) {
export function autocomplete(
inputElem: HTMLInputElement,
choices: MaybeObsArray<string>,
options: IAutocompleteOptions = {}
options: weasel.IAutocompleteOptions = {}
) {
return weasel.autocomplete(inputElem, choices, {
...defaults, ...options,
@ -376,3 +465,27 @@ const cssAnnotateMenuItem = styled('span', `
color: white;
}
`);
const cssMultiSelectSummary = styled('div', `
flex: 1 1 0px;
overflow: hidden;
text-overflow: ellipsis;
`);
const cssMultiSelectMenu = styled(weasel.cssMenu, `
display: flex;
flex-direction: column;
max-height: calc(max(300px, 95vh - 300px));
max-width: 400px;
padding-bottom: 0px;
`);
const cssCheckboxLabel = styled(cssLabel, `
padding: 8px 16px;
`);
const cssCheckboxText = styled(cssLabelText, `
margin-right: 12px;
color: ${colors.dark};
white-space: pre;
`);

@ -56,6 +56,20 @@ export interface ImportTableResult {
destTableId: string|null;
}
export interface MergeStrategy {
type: 'replace-with-nonblank-source' | 'replace-all-fields' | 'replace-blank-fields-only';
}
export interface MergeOptions {
mergeCols: string[]; // Columns to use as merge keys for incremental imports.
mergeStrategy: MergeStrategy; // Determines how matched records should be merged between 2 tables.
}
export interface ImportOptions {
parseOptions?: ParseOptions; // Options for parsing the source file.
mergeOptions?: Array<MergeOptions|null>; // Options for merging fields, indexed by uploadFileIndex.
}
/**
* Represents a query for Grist data. The tableId is required. An empty set of filters indicates
* the full table. Examples:
@ -159,8 +173,8 @@ export interface ActiveDocAPI {
/**
* Finishes import files, creates the new tables, and cleans up temporary hidden tables and uploads.
*/
finishImportFiles(dataSource: DataSourceTransformed,
parseOptions: ParseOptions, prevTableIds: string[]): Promise<ImportResult>;
finishImportFiles(dataSource: DataSourceTransformed, prevTableIds: string[],
options: ImportOptions): Promise<ImportResult>;
/**
* Cancels import files, cleans up temporary hidden tables and uploads.

@ -21,6 +21,7 @@ import {
ApplyUAResult,
DataSourceTransformed,
ForkResult,
ImportOptions,
ImportResult,
QueryResult,
ServerQuery
@ -467,8 +468,8 @@ export class ActiveDoc extends EventEmitter {
* call, or empty if there was no previous call.
*/
public finishImportFiles(docSession: DocSession, dataSource: DataSourceTransformed,
parseOptions: ParseOptions, prevTableIds: string[]): Promise<ImportResult> {
return this._activeDocImport.finishImportFiles(docSession, dataSource, parseOptions, prevTableIds);
prevTableIds: string[], importOptions: ImportOptions): Promise<ImportResult> {
return this._activeDocImport.finishImportFiles(docSession, dataSource, prevTableIds, importOptions);
}
/**

@ -3,7 +3,8 @@
import * as path from 'path';
import * as _ from 'underscore';
import {DataSourceTransformed, ImportResult, ImportTableResult, TransformRuleMap} from 'app/common/ActiveDocAPI';
import {DataSourceTransformed, ImportOptions, ImportResult, ImportTableResult, MergeOptions,
TransformRuleMap} from 'app/common/ActiveDocAPI';
import {ApplyUAResult} from 'app/common/ActiveDocAPI';
import {ApiError} from 'app/common/ApiError';
import * as gutil from 'app/common/gutil';
@ -34,6 +35,21 @@ interface ReferenceDescription {
refTableId: string;
}
interface FileImportOptions {
// Suggested name of the import file. It is sometimes used as a suggested table name, e.g. for csv imports.
originalFilename: string;
// Containing parseOptions as serialized JSON to pass to the import plugin.
parseOptions: ParseOptions;
// Options for determining how matched fields between source and destination tables should be merged.
mergeOptions: MergeOptions|null;
// Flag to indicate whether table is temporary and hidden or regular.
isHidden: boolean;
// Index of original dataSource corresponding to current imported file.
uploadFileIndex: number;
// Map of table names to their transform rules.
transformRuleMap: TransformRuleMap;
}
export class ActiveDocImport {
constructor(private _activeDoc: ActiveDoc) {}
/**
@ -46,7 +62,7 @@ export class ActiveDocImport {
const userId = docSession.authorizer.getUserId();
const accessId = this._activeDoc.makeAccessId(userId);
const uploadInfo: UploadInfo = globalUploadSet.getUploadInfo(dataSource.uploadId, accessId);
return this._importFiles(docSession, uploadInfo, dataSource.transforms, parseOptions, true);
return this._importFiles(docSession, uploadInfo, dataSource.transforms, {parseOptions}, true);
}
/**
@ -54,7 +70,7 @@ export class ActiveDocImport {
* the new tables
*/
public async finishImportFiles(docSession: DocSession, dataSource: DataSourceTransformed,
parseOptions: ParseOptions, prevTableIds: string[]): Promise<ImportResult> {
prevTableIds: string[], importOptions: ImportOptions): Promise<ImportResult> {
this._activeDoc.startBundleUserActions(docSession);
try {
await this._removeHiddenTables(docSession, prevTableIds);
@ -62,7 +78,7 @@ export class ActiveDocImport {
const accessId = this._activeDoc.makeAccessId(userId);
const uploadInfo: UploadInfo = globalUploadSet.getUploadInfo(dataSource.uploadId, accessId);
const importResult = await this._importFiles(docSession, uploadInfo, dataSource.transforms,
parseOptions, false);
importOptions, false);
await globalUploadSet.cleanup(dataSource.uploadId);
return importResult;
} finally {
@ -101,11 +117,12 @@ export class ActiveDocImport {
}
/**
* Imports all files as new tables, using the given transform rules and parse options.
* Imports all files as new tables, using the given transform rules and import options.
* The isHidden flag indicates whether to create temporary hidden tables, or final ones.
*/
private async _importFiles(docSession: OptDocSession, upload: UploadInfo, transforms: TransformRuleMap[],
parseOptions: ParseOptions, isHidden: boolean): Promise<ImportResult> {
{parseOptions = {}, mergeOptions = []}: ImportOptions,
isHidden: boolean): Promise<ImportResult> {
// Check that upload size is within the configured limits.
const limit = (Number(process.env.GRIST_MAX_UPLOAD_IMPORT_MB) * 1024 * 1024) || Infinity;
@ -126,8 +143,14 @@ export class ActiveDocImport {
if (file.ext) {
origName = path.basename(origName, path.extname(origName)) + file.ext;
}
const res = await this._importFileAsNewTable(docSession, index, file.absPath, origName,
parseOptions, isHidden, transforms[index] || {});
const res = await this._importFileAsNewTable(docSession, file.absPath, {
parseOptions,
mergeOptions: mergeOptions[index] || null,
isHidden,
originalFilename: origName,
uploadFileIndex: index,
transformRuleMap: transforms[index] || {}
});
if (index === 0) {
// Returned parse options from the first file should be used for all files in one upload.
importResult.options = parseOptions = res.options;
@ -143,27 +166,21 @@ export class ActiveDocImport {
* Currently it starts a python parser (that relies on the messytables library) as a child process
* outside the sandbox, and supports xls(x), csv, txt, and perhaps some other formats. It may
* result in the import of multiple tables, in case of e.g. Excel formats.
* @param {ActiveDoc} activeDoc: Instance of ActiveDoc.
* @param {Number} dataSourceIdx: Index of original dataSourse corresponding to current imported file.
* @param {OptDocSession} docSession: Session instance to use for importing.
* @param {String} tmpPath: The path from of the original file.
* @param {String} originalFilename: Suggested name of the import file. It is sometimes used as a
* suggested table name, e.g. for csv imports.
* @param {String} options: Containing parseOptions as serialized JSON to pass to the import plugin.
* @param {Boolean} isHidden: Flag to indicate whether table is temporary and hidden or regular.
* @param {TransformRuleMap} transformRuleMap: Containing transform rules for each table in file such as
* `destTableId`, `destCols`, `sourceCols`.
* @param {FileImportOptions} importOptions: File import options.
* @returns {Promise<ImportResult>} with `options` property containing parseOptions as serialized JSON as adjusted
* or guessed by the plugin, and `tables`, which is which is a list of objects with information about
* tables, such as `hiddenTableId`, `dataSourceIndex`, `origTableName`, `transformSectionRef`, `destTableId`.
* tables, such as `hiddenTableId`, `uploadFileIndex`, `origTableName`, `transformSectionRef`, `destTableId`.
*/
private async _importFileAsNewTable(docSession: OptDocSession, uploadFileIndex: number, tmpPath: string,
originalFilename: string,
options: ParseOptions, isHidden: boolean,
transformRuleMap: TransformRuleMap|undefined): Promise<ImportResult> {
private async _importFileAsNewTable(docSession: OptDocSession, tmpPath: string,
importOptions: FileImportOptions): Promise<ImportResult> {
const {originalFilename, parseOptions, mergeOptions, isHidden, uploadFileIndex,
transformRuleMap} = importOptions;
log.info("ActiveDoc._importFileAsNewTable(%s, %s)", tmpPath, originalFilename);
const optionsAndData: ParseFileResult = await this._activeDoc.docPluginManager.parseFile(tmpPath,
originalFilename, options);
options = optionsAndData.parseOptions;
const optionsAndData: ParseFileResult =
await this._activeDoc.docPluginManager.parseFile(tmpPath, originalFilename, parseOptions);
const options = optionsAndData.parseOptions;
const parsedTables = optionsAndData.tables;
const references = this._encodeReferenceAsInt(parsedTables);
@ -220,7 +237,7 @@ export class ActiveDocImport {
const tableId = await this._activeDoc.applyUserActions(docSession,
[['TransformAndFinishImport',
hiddenTableId, destTable, intoNewTable,
ruleCanBeApplied ? transformRule : null]]);
ruleCanBeApplied ? transformRule : null, mergeOptions]]);
createdTableId = tableId.retValues[0]; // this is garbage for now I think?

@ -1,4 +1,7 @@
from collections import namedtuple
from collections import defaultdict, namedtuple
import six
from six.moves import zip, xrange
import column
import identifiers
@ -37,6 +40,68 @@ def _strip_prefixes(transform_rule):
dest_col["colId"] = colId[len(_import_transform_col_prefix):]
def _is_blank(value):
"If value is blank (e.g. None, blank string), returns true."
if value is None:
return True
elif isinstance(value, six.string_types) and value.strip() == '':
return True
else:
return False
def _build_merge_col_map(column_data, merge_cols):
"""
Returns a dictionary with keys that are comprised of
the values from column_data for the columns in
merge_cols. The values are the row ids (index) in
column_data for that particular key; multiple row ids
imply that duplicates exist that contain the same values
for all columns in merge_cols.
Used for merging into tables where fast, constant-time lookups
are needed. For example, a source table can pass in its
column_data into this function to build the map, and the
destination table can then query the map using its own
values for the columns in merge_cols to check for any
matching rows that are candidates for updating.
"""
merge_col_map = defaultdict(list)
for row_id, key in enumerate(zip(*[column_data[col] for col in merge_cols])):
# If any part of the key is blank, don't include it in the map.
if any(_is_blank(val) for val in key):
continue
try:
merge_col_map[key].append(row_id + 1)
except TypeError:
pass # If key isn't hashable, don't include it in the map.
return merge_col_map
# Dictionary mapping merge strategy types from ActiveDocAPI.ts to functions
# that merge source and destination column values.
#
# NOTE: This dictionary should be kept in sync with the types in that file.
#
# All functions have the same signature: (src, dest) => output,
# where src and dest are column values from a source and destination
# table respectively, and output is either src or destination.
#
# For example, a key of replace-with-nonblank-source will return a merge function
# that returns the src argument if it's not blank. Otherwise it returns the
# dest argument. In the context of incremental imports, this is a function
# that update destination fields when the source field isn't blank, preserving
# existing values in the destination field that aren't replaced.
_merge_funcs = {
'replace-with-nonblank-source': lambda src, dest: dest if _is_blank(src) else src,
'replace-all-fields': lambda src, _: src,
'replace-blank-fields-only': lambda src, dest: src if _is_blank(dest) else dest
}
class ImportActions(object):
def __init__(self, useractions, docmodel, engine):
@ -157,6 +222,68 @@ class ImportActions(object):
return new_cols
def _MergeColumnData(self, dest_table_id, column_data, merge_options):
"""
Merges column_data into table dest_table_id, replacing rows that
match all merge_cols with values from column_data, and adding
unmatched rows to the end of table dest_table_id.
dest_table_id: id of destination table
column_data: column data from source table to merge into destination table
merge_cols: list of column ids to use as keys for merging
"""
dest_table = self._engine.tables[dest_table_id]
merge_cols = merge_options['mergeCols']
merge_col_map = _build_merge_col_map(column_data, merge_cols)
updated_row_ids = []
updated_rows = {}
new_rows = {}
matched_src_table_rows = set()
# Initialize column data for new and updated rows.
for col_id in six.iterkeys(column_data):
updated_rows[col_id] = []
new_rows[col_id] = []
strategy_type = merge_options['mergeStrategy']['type']
merge = _merge_funcs[strategy_type]
# Compute which source table rows should update existing records in destination table.
dest_cols = [dest_table.get_column(col) for col in merge_cols]
for dest_row_id in dest_table.row_ids:
lookup_key = tuple(col.raw_get(dest_row_id) for col in dest_cols)
try:
src_row_ids = merge_col_map.get(lookup_key)
except TypeError:
# We can arrive here if lookup_key isn't hashable. If that's the case, skip
# this row since we can't efficiently search for a match in the source table.
continue
if src_row_ids:
matched_src_table_rows.update(src_row_ids)
updated_row_ids.append(dest_row_id)
for col_id, col_vals in six.iteritems(column_data):
src_val = col_vals[src_row_ids[-1] - 1]
dest_val = dest_table.get_column(col_id).raw_get(dest_row_id)
updated_rows[col_id].append(merge(src_val, dest_val))
num_src_rows = len(column_data[merge_cols[0]])
# Compute which source table rows should be added to destination table as new records.
for row_id in xrange(1, num_src_rows + 1):
# If we've matched against the row before, we shouldn't add it.
if row_id in matched_src_table_rows:
continue
for col_id, col_val in six.iteritems(column_data):
new_rows[col_id].append(col_val[row_id - 1])
self._useractions.BulkUpdateRecord(dest_table_id, updated_row_ids, updated_rows)
self._useractions.BulkAddRecord(dest_table_id,
[None] * (num_src_rows - len(matched_src_table_rows)), new_rows)
def DoGenImporterView(self, source_table_id, dest_table_id, transform_rule = None):
"""
@ -224,7 +351,8 @@ class ImportActions(object):
def DoTransformAndFinishImport(self, hidden_table_id, dest_table_id,
into_new_table, transform_rule):
into_new_table, transform_rule,
merge_options):
"""
Finishes import into new or existing table depending on flag 'into_new_table'
Returns destination table id. (new or existing)
@ -303,7 +431,10 @@ class ImportActions(object):
new_table = self._useractions.AddTable(dest_table_id, col_specs)
dest_table_id = new_table['table_id']
self._useractions.BulkAddRecord(dest_table_id, [None] * len(row_ids), column_data)
if not merge_options.get('mergeCols'):
self._useractions.BulkAddRecord(dest_table_id, [None] * len(row_ids), column_data)
else:
self._MergeColumnData(dest_table_id, column_data, merge_options)
log.debug("Finishing TransformAndFinishImport")

@ -13,68 +13,91 @@ class TestImportTransform(test_engine.EngineTestCase):
def init_state(self):
# Add source table
self.apply_user_action(['AddTable', 'Hidden_table', [
{'id': 'employee_id', 'type': 'Int'},
{'id': 'fname', 'type': 'Text'},
{'id': 'mname', 'type': 'Text'},
{'id': 'lname', 'type': 'Text'},
{'id': 'email', 'type': 'Text'},
]])
self.apply_user_action(['BulkAddRecord', 'Hidden_table', [1, 2], {'fname': ['Carry', 'Don'],
'mname': ['M.', 'B.'],
'lname': ['Jonson', "Yoon"]
}])
self.apply_user_action(['BulkAddRecord', 'Hidden_table', [1, 2, 3, 4, 5, 6, 7], {
'employee_id': [1, 2, 3, 4, 5, 6, 7],
'fname': ['Bob', 'Carry', 'Don', 'Amir', 'Ken', 'George', 'Barbara'],
'mname': ['F.', None, 'B.', '', 'C.', '', 'D.'],
'lname': ['Nike', 'Jonson', "Yoon", "Greene", "Foster", "Huang", "Kinney"],
'email': [
'bob@example.com', None, "don@example.com", "amir@example.com",
"ken@example.com", "", "barbara@example.com"
]
}])
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[1, "manualSort", "ManualSortPos", False, ""],
[2, "fname", "Text", False, ""],
[3, "mname", "Text", False, ""],
[4, "lname", "Text", False, ""],
[2, "employee_id", "Int", False, ""],
[3, "fname", "Text", False, ""],
[4, "mname", "Text", False, ""],
[5, "lname", "Text", False, ""],
[6, "email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 1)
#Filled in colids for existing table
self.TEMP_transform_rule_colids = {
"destCols": [
{ "colId": "First_Name", "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": "Last_Name", "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": "Middle_Initial", "label": "Middle Initial",
"type": "Text", "formula": "$mname[0]" },
#{ "colId": "Blank", "label": "Blank", //destination1 has no blank column
# "type": "Text", "formula": "" },
]
"destCols": [
{ "colId": "Employee_ID", "label": "Employee ID",
"type": "Int", "formula": "$employee_id" },
{ "colId": "First_Name", "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": "Last_Name", "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": "Middle_Initial", "label": "Middle Initial",
"type": "Text", "formula": "$mname[0] if $mname else ''" },
{ "colId": "Email", "label": "Email",
"type": "Text", "formula": "$email" },
#{ "colId": "Blank", "label": "Blank", // Destination1 has no blank column
# "type": "Text", "formula": "" },
]
}
#Then try it with blank in colIds (for new tables)
self.TEMP_transform_rule_no_colids = {
"destCols": [
{ "colId": None, "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": None, "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": None, "label": "Middle Initial",
"type": "Text", "formula": "$mname[0]" },
{ "colId": None, "label": "Blank",
"type": "Text", "formula": "" },
]
"destCols": [
{ "colId": None, "label": "Employee ID",
"type": "Int", "formula": "$employee_id" },
{ "colId": None, "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": None, "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": None, "label": "Middle Initial",
"type": "Text", "formula": "$mname[0] if $mname else ''" },
{ "colId": None, "label": "Email",
"type": "Text", "formula": "$email" },
{ "colId": None, "label": "Blank",
"type": "Text", "formula": "" },
]
}
# Add destination table which contains columns corresponding to source table with different names
self.apply_user_action(['AddTable', 'Destination1', [
{'label': 'First Name', 'id': 'First_Name', 'type': 'Text'},
{'label': 'Last Name', 'id': 'Last_Name', 'type': 'Text'},
{'label': 'Middle Initial', 'id': 'Middle_Initial', 'type': 'Text'}]])
self.apply_user_action(['BulkAddRecord', 'Destination1', [1], {'First_Name': ['Bob'],
'Last_Name': ['Nike'],
'Middle_Initial': ['F.']}])
{'label': 'Employee ID', 'id': 'Employee_ID', 'type': 'Int'},
{'label': 'First Name', 'id': 'First_Name', 'type': 'Text'},
{'label': 'Last Name', 'id': 'Last_Name', 'type': 'Text'},
{'label': 'Middle Initial', 'id': 'Middle_Initial', 'type': 'Text'},
{'label': 'Email', 'id': 'Email', 'type': 'Text'}]])
self.apply_user_action(['BulkAddRecord', 'Destination1', [1, 2, 3], {
'Employee_ID': [1, 2, 3],
'First_Name': ['Bob', 'Carry', 'Don'],
'Last_Name': ['Nike', 'Jonson', "Yoon"],
'Middle_Initial': ['F.', 'M.', None],
'Email': ['', 'carry.m.jonson@example.com', 'don.b.yoon@example.com']
}])
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[5, "manualSort", "ManualSortPos", False, ""],
[6, "First_Name", "Text", False, ""],
[7, "Last_Name", "Text", False, ""],
[8, "Middle_Initial","Text", False, ""],
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Verify created tables
@ -84,57 +107,62 @@ class TestImportTransform(test_engine.EngineTestCase):
])
def test_finish_import_into_new_table(self):
# Add source and destination tables
self.init_state()
#into_new_table = True, transform_rule : no colids (will be generated for new table)
#into_new_table = True, transform_rule : no colids (will be generated for new table), merge_options = {}
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, self.TEMP_transform_rule_no_colids])
['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, self.TEMP_transform_rule_no_colids, {}])
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0]", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 9, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0]", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 9.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3]],
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 9]],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["AddTable", "NewTable", [{"formula": "", "id": "manualSort", "isFormula": False, "type": "ManualSortPos"}, {"formula": "", "id": "First_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Last_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Middle_Initial", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Blank", "isFormula": False, "type": "Text"}]],
["AddTable", "NewTable", [{"formula": "", "id": "manualSort", "isFormula": False, "type": "ManualSortPos"}, {"formula": "", "id": "Employee_ID", "isFormula": False, "type": "Int"}, {"formula": "", "id": "First_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Last_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Middle_Initial", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Email", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Blank", "isFormula": False, "type": "Text"}]],
["AddRecord", "_grist_Tables", 3, {"primaryViewId": 0, "tableId": "NewTable"}],
["BulkAddRecord", "_grist_Tables_column", [9, 10, 11, 12, 13], {"colId": ["manualSort", "First_Name", "Last_Name", "Middle_Initial", "Blank"], "formula": ["", "", "", "", ""], "isFormula": [False, False, False, False, False], "label": ["manualSort", "First Name", "Last Name", "Middle Initial", "Blank"], "parentId": [3, 3, 3, 3, 3], "parentPos": [9.0, 10.0, 11.0, 12.0, 13.0], "type": ["ManualSortPos", "Text", "Text", "Text", "Text"], "widgetOptions": ["", "", "", "", ""]}],
["BulkAddRecord", "_grist_Tables_column", [13, 14, 15, 16, 17, 18, 19], {"colId": ["manualSort", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "Blank"], "formula": ["", "", "", "", "", "", ""], "isFormula": [False, False, False, False, False, False, False], "label": ["manualSort", "Employee ID", "First Name", "Last Name", "Middle Initial", "Email", "Blank"], "parentId": [3, 3, 3, 3, 3, 3, 3], "parentPos": [13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0], "type": ["ManualSortPos", "Int", "Text", "Text", "Text", "Text", "Text"], "widgetOptions": ["", "", "", "", "", "", ""]}],
["AddRecord", "_grist_Views", 3, {"name": "NewTable", "type": "raw_data"}],
["AddRecord", "_grist_TabBar", 3, {"tabPos": 3.0, "viewRef": 3}],
["AddRecord", "_grist_Pages", 3, {"indentation": 0, "pagePos": 3.0, "viewRef": 3}],
["AddRecord", "_grist_Views_section", 3, {"borderWidth": 1, "defaultWidth": 100, "parentId": 3, "parentKey": "record", "sortColRefs": "[]", "tableRef": 3, "title": ""}],
["BulkAddRecord", "_grist_Views_section_field", [7, 8, 9, 10], {"colRef": [10, 11, 12, 13], "parentId": [3, 3, 3, 3], "parentPos": [7.0, 8.0, 9.0, 10.0]}],
["BulkAddRecord", "_grist_Views_section_field", [11, 12, 13, 14, 15, 16], {"colRef": [14, 15, 16, 17, 18, 19], "parentId": [3, 3, 3, 3, 3, 3], "parentPos": [11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}],
["UpdateRecord", "_grist_Tables", 3, {"primaryViewId": 3}],
["BulkAddRecord", "NewTable", [1, 2], {"First_Name": ["Carry", "Don"], "Last_Name": ["Jonson", "Yoon"], "Middle_Initial": ["M", "B"], "manualSort": [1.0, 2.0]}],
["BulkAddRecord", "NewTable", [1, 2, 3, 4, 5, 6, 7], {"Email": ["bob@example.com", None, "don@example.com", "amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [1, 2, 3, 4, 5, 6, 7], "First_Name": ["Bob", "Carry", "Don", "Amir", "Ken", "George", "Barbara"], "Last_Name": ["Nike", "Jonson", "Yoon", "Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["F", "", "B", "", "C", "", "D"], "manualSort": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]}],
]
})
#1-4 in hidden table, 5-8 in destTable, 9-13 for new table
#1-6 in hidden table, 7-12 in destTable, 13-19 for new table
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[ 9, "manualSort", "ManualSortPos", False, ""],
[10, "First_Name", "Text", False, ""],
[11, "Last_Name", "Text", False, ""],
[12, "Middle_Initial", "Text", False, ""],
[13, "Blank", "Text", False, ""],
[13, "manualSort", "ManualSortPos", False, ""],
[14, "Employee_ID", "Int", False, ""],
[15, "First_Name", "Text", False, ""],
[16, "Last_Name", "Text", False, ""],
[17, "Middle_Initial", "Text", False, ""],
[18, "Email", "Text", False, ""],
[19, "Blank", "Text", False, ""],
], rows=lambda r: r.parentId.id == 3)
self.assertTableData('NewTable', cols="all", data=[
["id", "First_Name", "Last_Name", "Middle_Initial", "Blank", "manualSort"],
[1, "Carry", "Jonson", "M", "", 1.0],
[2, "Don", "Yoon", "B", "", 2.0]
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "Blank", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", "", 1.0],
[2, 2, "Carry", "Jonson", "", None, "", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", "", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", "", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", "", 5.0],
[6, 6, "George", "Huang", "", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", "", 7.0],
])
# Verify removed hidden table and add the new one
self.assertPartialData("_grist_Tables", ["id", "tableId"], [
[2, "Destination1"],
@ -142,28 +170,36 @@ class TestImportTransform(test_engine.EngineTestCase):
])
def test_finish_import_into_existing_table(self):
# Add source and destination tables
self.init_state()
#into_new_table false, transform_rule=null
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids])
#into_new_table = False, transform_rule : colids, merge_options = None
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids, None])
#1-4 in hidden table, 5-8 in destTable
#1-6 in hidden table, 7-12 in destTable
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[5, "manualSort", "ManualSortPos", False, ""],
[6, "First_Name", "Text", False, ""],
[7, "Last_Name", "Text", False, ""],
[8, "Middle_Initial", "Text", False, ""],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# First 3 rows were already in Destination1 before import
self.assertTableData('Destination1', cols="all", data=[
["id", "First_Name", "Last_Name", "Middle_Initial", "manualSort"],
[1, "Bob", "Nike", "F.", 1.0], #F. was there to begin with
[2, "Carry", "Jonson", "M", 2.0], #others imported with $mname[0]
[3, "Don", "Yoon", "B", 3.0],
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", None, "don.b.yoon@example.com", 3.0],
[4, 1, "Bob", "Nike", "F", "bob@example.com", 4.0],
[5, 2, "Carry", "Jonson", "", None, 5.0],
[6, 3, "Don", "Yoon", "B", "don@example.com", 6.0],
[7, 4, "Amir", "Greene", "", "amir@example.com", 7.0],
[8, 5, "Ken", "Foster", "C", "ken@example.com", 8.0],
[9, 6, "George", "Huang", "", "", 9.0],
[10, 7, "Barbara", "Kinney", "D", "barbara@example.com", 10.0],
])
# Verify removed hidden table
@ -174,22 +210,29 @@ class TestImportTransform(test_engine.EngineTestCase):
# Add source and destination tables
self.init_state()
#into_new_table = True, transform_rule : no colids (will be generated for new table)
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, None])
#into_new_table = True, transform_rule = None, merge_options = None
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, None, None])
#1-4 in src table, 5-8 in hiddentable
#1-6 in src table, 7-12 in hiddentable
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[9, "manualSort", "ManualSortPos", False, ""],
[10, "fname", "Text", False, ""],
[11, "mname", "Text", False, ""],
[12, "lname", "Text", False, ""],
[13, "manualSort", "ManualSortPos", False, ""],
[14, "employee_id", "Int", False, ""],
[15, "fname", "Text", False, ""],
[16, "mname", "Text", False, ""],
[17, "lname", "Text", False, ""],
[18, "email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 3)
self.assertTableData('NewTable', cols="all", data=[
["id", "fname", "lname", "mname", "manualSort"],
[1, "Carry", "Jonson", "M.", 1.0],
[2, "Don", "Yoon", "B.", 2.0]
["id", "employee_id", "fname", "lname", "mname", "email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", None, None, 2.0],
[3, 3, "Don", "Yoon", "B.", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C.", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D.", "barbara@example.com", 7.0],
])
@ -198,3 +241,338 @@ class TestImportTransform(test_engine.EngineTestCase):
[2, "Destination1"],
[3, "NewTable"]
])
def test_finish_import_into_existing_table_with_single_merge_col(self):
# Add source and destination tables.
self.init_state()
# Use 'Employee_ID' as the merge column, updating existing employees in Destination1 with the same employee id.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don@example.com"], "Middle_Initial": ["F", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that Destination1 has no duplicates and that previous records (1 - 3) are updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_finish_import_into_existing_table_with_multiple_merge_cols(self):
# Add source and destination tables.
self.init_state()
# Use 'First_Name' and 'Last_Name' as the merge columns, updating existing employees in Destination1 with the same name.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['First_Name', 'Last_Name'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don@example.com"], "Middle_Initial": ["F", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that Destination1 has no duplicates and that previous records (1 - 3) are updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_finish_import_into_existing_table_with_no_matching_merge_cols(self):
# Add source and destination tables.
self.init_state()
# Use 'Email' as the merge column: existing employees in Destination1 have different emails, so none should match incoming data.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Email'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkAddRecord", "Destination1", [4, 5, 6, 7, 8, 9, 10], {"Email": ["bob@example.com", None, "don@example.com", "amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [1, 2, 3, 4, 5, 6, 7], "First_Name": ["Bob", "Carry", "Don", "Amir", "Ken", "George", "Barbara"], "Last_Name": ["Nike", "Jonson", "Yoon", "Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["F", "", "B", "", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that no existing records were updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", None, "don.b.yoon@example.com", 3.0],
[4, 1, "Bob", "Nike", "F", "bob@example.com", 4.0],
[5, 2, "Carry", "Jonson", "", None, 5.0],
[6, 3, "Don", "Yoon", "B", "don@example.com", 6.0],
[7, 4, "Amir", "Greene", "", "amir@example.com", 7.0],
[8, 5, "Ken", "Foster", "C", "ken@example.com", 8.0],
[9, 6, "George", "Huang", "", "", 9.0],
[10, 7, "Barbara", "Kinney", "D", "barbara@example.com", 10.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_replace_all_fields_merge_strategy(self):
# Add source and destination tables.
self.init_state()
# Use replace all fields strategy on the 'Employee_ID' column.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-all-fields'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 2, 3], {"Email": ["bob@example.com", None, "don@example.com"], "Middle_Initial": ["F", "", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that existing fields were replaced with incoming fields.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "", None, 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_replace_blank_fields_only_merge_strategy(self):
# Add source and destination tables.
self.init_state()
# Use replace blank fields only strategy on the 'Employee_ID' column.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-blank-fields-only'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don.b.yoon@example.com"], "Middle_Initial": ["F.", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that only blank existing fields were updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don.b.yoon@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_merging_updates_all_duplicates_in_destination_table(self):
# Add source and destination tables.
self.init_state()
# Add duplicates to the destination table with different values than original.
self.apply_user_action(['BulkAddRecord', 'Destination1', [4, 5], {
'Employee_ID': [3, 3],
'First_Name': ['Don', 'Don'],
'Last_Name': ["Yoon", "Yoon"],
'Middle_Initial': [None, 'B'],
'Email': ['don.yoon@example.com', 'yoon.don@example.com']
}])
# Use replace with nonblank source strategy on the 'Employee_ID' column.
self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that all duplicates were updated with new data from the source table.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 3, "Don", "Yoon", "B", "don@example.com", 4.0],
[5, 3, "Don", "Yoon", "B", "don@example.com", 5.0],
[6, 4, "Amir", "Greene", "", "amir@example.com", 6.0],
[7, 5, "Ken", "Foster", "C", "ken@example.com", 7.0],
[8, 6, "George", "Huang", "", "", 8.0],
[9, 7, "Barbara", "Kinney", "D", "barbara@example.com", 9.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_merging_uses_latest_duplicate_in_source_table_for_matching(self):
# Add source and destination tables.
self.init_state()
# Add duplicates to the source table with different values than the original.
self.apply_user_action(['BulkAddRecord', 'Hidden_table', [8, 9], {
'employee_id': [3, 3],
'fname': ['Don', 'Don'],
'lname': ["Yoon", "yoon"],
'mname': [None, None],
'email': ['d.yoon@example.com', 'yoon.don@example.com']
}])
# Use replace with nonblank source strategy on the 'Employee_ID' column.
self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the last record for Don Yoon in the source table was used for updating the destination table.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "yoon", None, "yoon.don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])

@ -1511,7 +1511,8 @@ class UserActions(object):
return self._import_actions.DoGenImporterView(source_table_id, dest_table_id, transform_rule)
@useraction
def TransformAndFinishImport(self, hidden_table_id, dest_table_id,
into_new_table, transform_rule):
return self._import_actions.DoTransformAndFinishImport(
hidden_table_id, dest_table_id, into_new_table, transform_rule)
def TransformAndFinishImport(self, hidden_table_id, dest_table_id, into_new_table,
transform_rule, merge_options = None):
return self._import_actions.DoTransformAndFinishImport(hidden_table_id, dest_table_id,
into_new_table, transform_rule,
merge_options or {})

Loading…
Cancel
Save