From 62db263d1fbfc783b2272af06b91ecc7d5870af6 Mon Sep 17 00:00:00 2001 From: George Gevoian Date: Thu, 7 Oct 2021 23:32:59 -0700 Subject: [PATCH] (core) Add diff preview to Importer Summary: Updates the preview table in Importer to show a diff of changes when importing into an existing table and updating existing records. Test Plan: Browser tests. Reviewers: paulfitz Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D3060 --- app/client/components/DocComm.ts | 1 + app/client/components/Importer.ts | 266 ++++++++++++++++++++------- app/common/ActiveDocAPI.ts | 9 +- app/server/lib/ActiveDoc.ts | 24 ++- app/server/lib/ActiveDocImport.ts | 296 ++++++++++++++++++++---------- app/server/lib/DocWorker.ts | 1 + app/server/lib/ExpandedQuery.ts | 28 ++- test/nbrowser/gristUtils.ts | 14 +- 8 files changed, 471 insertions(+), 168 deletions(-) diff --git a/app/client/components/DocComm.ts b/app/client/components/DocComm.ts index 6b1835e6..9fb5bbc1 100644 --- a/app/client/components/DocComm.ts +++ b/app/client/components/DocComm.ts @@ -40,6 +40,7 @@ export class DocComm extends Disposable implements ActiveDocAPI { public importFiles = this._wrapMethod("importFiles"); public finishImportFiles = this._wrapMethod("finishImportFiles"); public cancelImportFiles = this._wrapMethod("cancelImportFiles"); + public generateImportDiff = this._wrapMethod("generateImportDiff"); public addAttachments = this._wrapMethod("addAttachments"); public findColFromValues = this._wrapMethod("findColFromValues"); public getFormulaError = this._wrapMethod("getFormulaError"); diff --git a/app/client/components/Importer.ts b/app/client/components/Importer.ts index 71642f2f..f3937538 100644 --- a/app/client/components/Importer.ts +++ b/app/client/components/Importer.ts @@ -11,12 +11,14 @@ import {ImportSourceElement} from 'app/client/lib/ImportSourceElement'; import {fetchURL, isDriveUrl, selectFiles, uploadFiles} from 'app/client/lib/uploads'; import {reportError} from 'app/client/models/AppModel'; import {ViewSectionRec} from 'app/client/models/DocModel'; +import {SortedRowSet} from "app/client/models/rowset"; import {openFilePicker} from "app/client/ui/FileDialog"; import {bigBasicButton, bigPrimaryButton} from 'app/client/ui2018/buttons'; import {colors, testId, vars} from 'app/client/ui2018/cssVars'; import {icon} from 'app/client/ui2018/icons'; import {IOptionFull, linkSelect, multiSelect} from 'app/client/ui2018/menus'; import {cssModalButtons, cssModalTitle} from 'app/client/ui2018/modals'; +import {loadingSpinner} from "app/client/ui2018/loaders"; import {DataSourceTransformed, ImportResult, ImportTableResult, MergeOptions, MergeOptionsMap, MergeStrategy, TransformColumn, TransformRule, TransformRuleMap} from "app/common/ActiveDocAPI"; @@ -27,6 +29,7 @@ import {Computed, Disposable, dom, DomContents, IDisposable, MutableObsArray, ob styled} from 'grainjs'; import {labeledSquareCheckbox} from "app/client/ui2018/checkbox"; import {ACCESS_DENIED, AUTH_INTERRUPTED, canReadPrivateFiles, getGoogleCodeForReading} from "app/client/ui/googleAuth"; +import debounce = require('lodash/debounce'); // Special values for import destinations; null means "new table". // TODO We should also support "skip table" (needs server support), so that one can open, say, @@ -36,27 +39,34 @@ type DestId = string | null; // We expect a function for creating the preview GridView, to avoid the need to require the // GridView module here. That brings many dependencies, making a simple test fixture difficult. type CreatePreviewFunc = (vs: ViewSectionRec) => GridView; -type GridView = IDisposable & {viewPane: HTMLElement}; +type GridView = IDisposable & {viewPane: HTMLElement, sortedRows: SortedRowSet, listenTo: (...args: any[]) => void}; -// SourceInfo conteains information about source table and corresponding destination table id, -// transform sectionRef (can be used to show previous transform section with users changes) -// and also originalFilename and path. export interface SourceInfo { + // The source table id. hiddenTableId: string; uploadFileIndex: number; origTableName: string; sourceSection: ViewSectionRec; - transformSection: Observable; + // A viewsection containing transform (formula) columns pointing to the original source columns. + transformSection: Observable; + // The destination table id. destTableId: Observable; + // True if there is at least one request in progress to create a new transform section. + isLoadingSection: Observable; + // Reference to last promise for the GenImporterView action (which creates `transformSection`). + lastGenImporterViewPromise: Promise|null; } -// UI state of selected merge options for each source table (from SourceInfo). + +interface MergeOptionsStateMap { + [hiddenTableId: string]: MergeOptionsState|undefined; +} + +// UI state of merge options for a SourceInfo. interface MergeOptionsState { - [srcTableId: string]: { - updateExistingRecords: Observable; - mergeCols: MutableObsArray; - mergeStrategy: Observable; - hasInvalidMergeCols: Observable; - } | undefined; + updateExistingRecords: Observable; + mergeCols: MutableObsArray; + mergeStrategy: Observable; + hasInvalidMergeCols: Observable; } /** @@ -132,7 +142,7 @@ export class Importer extends Disposable { private _uploadResult?: UploadResult; private _screen: PluginScreen; - private _mergeOptions: MergeOptionsState = {}; + private _mergeOptions: MergeOptionsStateMap = {}; private _parseOptions = Observable.create(this, {}); private _sourceInfoArray = Observable.create(this, []); private _sourceInfoSelected = Observable.create(this, null); @@ -140,10 +150,21 @@ export class Importer extends Disposable { private _previewViewSection: Observable = Computed.create(this, this._sourceInfoSelected, (use, info) => { if (!info) { return null; } + + const isLoading = use(info.isLoadingSection); + if (isLoading) { return null; } + const viewSection = use(info.transformSection); return viewSection && !use(viewSection._isDeleted) ? viewSection : null; }); + // True if there is at least one request in progress to generate an import diff. + private _isLoadingDiff = Observable.create(this, false); + // Promise for the most recent generateImportDiff action. + private _lastGenImportDiffPromise: Promise|null = null; + + private _updateImportDiff = debounce(this._updateDiff, 1000, {leading: true, trailing: true}); + // destTables is a list of options for import destinations, and includes all tables in the // document, plus two values: to import as a new table, and to skip an import table entirely. private _destTables = Computed.create>>(this, (use) => [ @@ -156,6 +177,10 @@ export class Importer extends Disposable { private _createPreview: CreatePreviewFunc) { super(); this._screen = PluginScreen.create(this, _importSourceElem?.importSource.label || "Import from file"); + + this.onDispose(() => { + this._resetImportDiffState(); + }); } /* @@ -225,11 +250,25 @@ export class Importer extends Disposable { return this._gristDoc.docModel.viewSections.getRowModel(sectionRef); } - private async _updateTransformSection(sourceInfo: SourceInfo, destTableId: string|null) { - const transformSectionRef = await this._gristDoc.docData.sendAction( - ['GenImporterView', sourceInfo.hiddenTableId, destTableId, null]); + private async _updateTransformSection(sourceInfo: SourceInfo) { + this._resetImportDiffState(); + + sourceInfo.isLoadingSection.set(true); + sourceInfo.transformSection.set(null); + + const genImporterViewPromise = this._gristDoc.docData.sendAction( + ['GenImporterView', sourceInfo.hiddenTableId, sourceInfo.destTableId.get(), null]); + sourceInfo.lastGenImporterViewPromise = genImporterViewPromise; + const transformSectionRef = await genImporterViewPromise; + + // If the request is superseded by a newer request, or the Importer is disposed, do nothing. + if (this.isDisposed() || sourceInfo.lastGenImporterViewPromise !== genImporterViewPromise) { + return; + } + + // Otherwise, update the transform section for `sourceInfo`. sourceInfo.transformSection.set(this._gristDoc.docModel.viewSections.getRowModel(transformSectionRef)); - sourceInfo.destTableId.set(destTableId); + sourceInfo.isLoadingSection.set(false); } private _getTransformedDataSource(upload: UploadResult): DataSourceTransformed { @@ -262,7 +301,12 @@ export class Importer extends Disposable { } private _createTransformRule(sourceInfo: SourceInfo): TransformRule { - const transformFields = sourceInfo.transformSection.get().viewFields().peek(); + const transformSection = sourceInfo.transformSection.get(); + if (!transformSection) { + throw new Error(`Table ${sourceInfo.hiddenTableId} is missing transform section`); + } + + const transformFields = transformSection.viewFields().peek(); const sourceFields = sourceInfo.sourceSection.viewFields().peek(); const destTableId: DestId = sourceInfo.destTableId.get(); @@ -295,8 +339,9 @@ export class Importer extends Disposable { private async _reImport(upload: UploadResult) { this._screen.renderSpinner(); + this._resetImportDiffState(); try { - const parseOptions = {...this._parseOptions.get(), NUM_ROWS: 100}; + const parseOptions = {...this._parseOptions.get(), NUM_ROWS: 0}; const importResult: ImportResult = await this._docComm.importFiles( this._getTransformedDataSource(upload), parseOptions, this._getHiddenTableIds()); @@ -308,7 +353,9 @@ export class Importer extends Disposable { origTableName: info.origTableName, sourceSection: this._getPrimaryViewSection(info.hiddenTableId)!, transformSection: Observable.create(null, this._getSectionByRef(info.transformSectionRef)), - destTableId: Observable.create(null, info.destTableId) + destTableId: Observable.create(null, info.destTableId), + isLoadingSection: Observable.create(null, false), + lastGenImporterViewPromise: null }))); if (this._sourceInfoArray.get().length === 0) { @@ -321,7 +368,7 @@ export class Importer extends Disposable { updateExistingRecords: Observable.create(null, false), mergeCols: obsArray(), mergeStrategy: Observable.create(null, {type: 'replace-with-nonblank-source'}), - hasInvalidMergeCols: Observable.create(null, false) + hasInvalidMergeCols: Observable.create(null, false), }; }); @@ -341,6 +388,8 @@ export class Importer extends Disposable { if (!isConfigValid) { return; } this._screen.renderSpinner(); + this._resetImportDiffState(); + const parseOptions = {...this._parseOptions.get(), NUM_ROWS: 0}; const mergeOptionMaps = this._getMergeOptionMaps(upload); @@ -356,6 +405,8 @@ export class Importer extends Disposable { } private async _cancelImport() { + this._resetImportDiffState(); + if (this._uploadResult) { await this._docComm.cancelImportFiles( this._getTransformedDataSource(this._uploadResult), this._getHiddenTableIds()); @@ -391,6 +442,59 @@ export class Importer extends Disposable { return cssModalHeader(cssModalTitle(title), rightElement); } + /** + * Triggers an update of the import diff in the preview table. When called in quick succession, + * only the most recent call will result in an update being made to the preview table. + * + * NOTE: This method should not be called directly. Instead, use _updateImportDiff, which + * wraps this method and debounces it. + * + * @param {SourceInfo} info The source to update the diff for. + */ + private async _updateDiff(info: SourceInfo) { + const mergeOptions = this._mergeOptions[info.hiddenTableId]!; + + this._isLoadingDiff.set(true); + + if (!mergeOptions.updateExistingRecords.get() || mergeOptions.mergeCols.get().length === 0) { + // We can simply disable document comparison mode when merging isn't configured. + this._gristDoc.comparison = null; + } else { + // Request a diff of the current source and wait for a response. + const genImportDiffPromise = this._docComm.generateImportDiff(info.hiddenTableId, + this._createTransformRule(info), this._getMergeOptionsForSource(info)!); + this._lastGenImportDiffPromise = genImportDiffPromise; + const diff = await genImportDiffPromise; + + // If the request is superseded by a newer request, or the Importer is disposed, do nothing. + if (this.isDisposed() || genImportDiffPromise !== this._lastGenImportDiffPromise) { return; } + + // Otherwise, put the document in comparison mode with the diff data. + this._gristDoc.comparison = diff; + } + + this._isLoadingDiff.set(false); + } + + /** + * Resets all state variables related to diffs to their default values. + */ + private _resetImportDiffState() { + this._cancelPendingDiffRequests(); + this._gristDoc.comparison = null; + } + + /** + * Effectively cancels all pending diff requests by causing their fulfilled promises to + * be ignored by their attached handlers. Since we can't natively cancel the promises, this + * is functionally equivalent to canceling the outstanding requests. + */ + private _cancelPendingDiffRequests() { + this._updateImportDiff.cancel(); + this._lastGenImportDiffPromise = null; + this._isLoadingDiff.set(false); + } + // The importer state showing import in progress, with a list of tables, and a preview. private _renderMain(upload: UploadResult) { const schema = this._parseOptions.get().SCHEMA; @@ -405,9 +509,10 @@ export class Importer extends Disposable { cssTableList( dom.forEach(this._sourceInfoArray, (info) => { const destTableId = Computed.create(null, (use) => use(info.destTableId)) - .onWrite((destId) => { + .onWrite(async (destId) => { + info.destTableId.set(destId); this._resetTableMergeOptions(info.hiddenTableId); - void this._updateTransformSection(info, destId); + await this._updateTransformSection(info); }); return cssTableInfo( dom.autoDispose(destTableId), @@ -415,67 +520,90 @@ export class Importer extends Disposable { cssTableSource(getSourceDescription(info, upload), testId('importer-from'))), cssTableLine(cssToFrom('To'), linkSelect(destTableId, this._destTables)), cssTableInfo.cls('-selected', (use) => use(this._sourceInfoSelected) === info), - dom.on('click', () => { + dom.on('click', async () => { if (info === this._sourceInfoSelected.get() || !this._validateImportConfiguration()) { return; } + + this._cancelPendingDiffRequests(); this._sourceInfoSelected.set(info); + await this._updateDiff(info); }), testId('importer-source'), ); }), ), - dom.maybe(this._sourceInfoSelected, (info) => - dom.maybe(info.destTableId, () => { - const {mergeCols, updateExistingRecords, hasInvalidMergeCols} = this._mergeOptions[info.hiddenTableId]!; - return cssMergeOptions( - cssMergeOptionsToggle(labeledSquareCheckbox( - updateExistingRecords, - 'Update existing records', - testId('importer-update-existing-records') - )), - dom.maybe(updateExistingRecords, () => [ - cssMergeOptionsMessage( - 'Imported rows will be merged with records that have the same values for all of these fields:', - testId('importer-merge-fields-message') - ), - dom.domComputed(info.transformSection, section => { - // When changes are made to selected fields, reset the multiSelect error observable. - const invalidColsListener = mergeCols.addListener((val, _prev) => { - if (val.length !== 0 && hasInvalidMergeCols.get()) { - hasInvalidMergeCols.set(false); - } - }); - return [ - dom.autoDispose(invalidColsListener), - multiSelect( + dom.maybe(this._sourceInfoSelected, (info) => { + const {mergeCols, updateExistingRecords, hasInvalidMergeCols} = this._mergeOptions[info.hiddenTableId]!; + + return [ + dom.maybe(info.destTableId, (_dest) => { + const updateRecordsListener = updateExistingRecords.addListener(async () => { + await this._updateImportDiff(info); + }); + + return cssMergeOptions( + cssMergeOptionsToggle(labeledSquareCheckbox( + updateExistingRecords, + 'Update existing records', + dom.autoDispose(updateRecordsListener), + testId('importer-update-existing-records') + )), + dom.maybe(updateExistingRecords, () => [ + cssMergeOptionsMessage( + 'Imported rows will be merged with records that have the same values for all of these fields:', + testId('importer-merge-fields-message') + ), + dom.domComputed(info.transformSection, section => { + const mergeColsListener = mergeCols.addListener(async val => { + // Reset the error state of the multiSelect on change. + if (val.length !== 0 && hasInvalidMergeCols.get()) { + hasInvalidMergeCols.set(false); + } + + await this._updateImportDiff(info); + }); + return multiSelect( mergeCols, - section.viewFields().peek().map(field => field.label()), + section?.viewFields().peek().map(field => field.label()) ?? [], { placeholder: 'Select fields to match on', error: hasInvalidMergeCols }, + dom.autoDispose(mergeColsListener), testId('importer-merge-fields-select') - ), - ]; - }) - ]) - ); - }) - ), - dom.maybe(this._previewViewSection, () => cssSectionHeader('Preview')), - dom.maybe(this._previewViewSection, (viewSection) => { - const gridView = this._createPreview(viewSection); - return cssPreviewGrid( - dom.autoDispose(gridView), - gridView.viewPane, - testId('importer-preview'), - ); + ); + }) + ]) + ); + }), + cssSectionHeader('Preview'), + dom.domComputed(use => { + const previewSection = use(this._previewViewSection); + if (use(this._isLoadingDiff) || !previewSection) { + return cssPreviewSpinner(loadingSpinner()); + } + + const gridView = this._createPreview(previewSection); + + // When changes are made to the preview table, update the import diff. + gridView.listenTo(gridView.sortedRows, 'rowNotify', async () => { + await this._updateImportDiff(info); + }); + + return cssPreviewGrid( + dom.autoDispose(gridView), + gridView.viewPane, + testId('importer-preview'), + ); + }) + ]; }), ), cssModalButtons( bigPrimaryButton('Import', dom.on('click', () => this._maybeFinishImport(upload)), + dom.boolAttr('disabled', use => use(this._previewViewSection) === null), testId('modal-confirm'), ), bigBasicButton('Cancel', @@ -637,9 +765,17 @@ const cssTableSource = styled('div', ` overflow-wrap: anywhere; `); -const cssPreviewGrid = styled('div', ` +const cssPreview = styled('div', ` display: flex; height: 300px; +`); + +const cssPreviewSpinner = styled(cssPreview, ` + align-items: center; + justify-content: center; +`); + +const cssPreviewGrid = styled(cssPreview, ` border: 1px solid ${colors.darkGrey}; `); diff --git a/app/common/ActiveDocAPI.ts b/app/common/ActiveDocAPI.ts index 2113fd35..f8d02ffb 100644 --- a/app/common/ActiveDocAPI.ts +++ b/app/common/ActiveDocAPI.ts @@ -2,7 +2,7 @@ import {ActionGroup} from 'app/common/ActionGroup'; import {CellValue, TableDataAction, UserAction} from 'app/common/DocActions'; import {FormulaProperties} from 'app/common/GranularAccessClause'; import {FetchUrlOptions, UploadResult} from 'app/common/uploads'; -import {PermissionData, UserAccessData} from 'app/common/UserAPI'; +import {DocStateComparison, PermissionData, UserAccessData} from 'app/common/UserAPI'; import {ParseOptions} from 'app/plugin/FileParserAPI'; import {IMessage} from 'grain-rpc'; @@ -203,6 +203,13 @@ export interface ActiveDocAPI { */ cancelImportFiles(dataSource: DataSourceTransformed, prevTableIds: string[]): Promise; + /** + * Returns a diff of changes that will be applied to the destination table from `transformRule` + * if the data from `hiddenTableId` is imported with the specified `mergeOptions`. + */ + generateImportDiff(hiddenTableId: string, transformRule: TransformRule, + mergeOptions: MergeOptions): Promise; + /** * Saves attachments from a given upload and creates an entry for them in the database. It * returns the list of rowIds for the rows created in the _grist_Attachments table. diff --git a/app/server/lib/ActiveDoc.ts b/app/server/lib/ActiveDoc.ts index 8f509c10..9476625a 100644 --- a/app/server/lib/ActiveDoc.ts +++ b/app/server/lib/ActiveDoc.ts @@ -23,9 +23,11 @@ import { ForkResult, ImportOptions, ImportResult, + MergeOptions, PermissionDataWithExtraUsers, QueryResult, - ServerQuery + ServerQuery, + TransformRule } from 'app/common/ActiveDocAPI'; import {ApiError} from 'app/common/ApiError'; import {mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate'; @@ -47,7 +49,7 @@ import {byteString, countIf, safeJsonParse} from 'app/common/gutil'; import {InactivityTimer} from 'app/common/InactivityTimer'; import {schema, SCHEMA_VERSION} from 'app/common/schema'; import {FetchUrlOptions, UploadResult} from 'app/common/uploads'; -import {DocReplacementOptions, DocState} from 'app/common/UserAPI'; +import {DocReplacementOptions, DocState, DocStateComparison} from 'app/common/UserAPI'; import {ParseOptions} from 'app/plugin/FileParserAPI'; import {GristDocAPI} from 'app/plugin/GristAPI'; import {compileAclFormula} from 'app/server/lib/ACLFormula'; @@ -546,6 +548,24 @@ export class ActiveDoc extends EventEmitter { return this._activeDocImport.cancelImportFiles(docSession, dataSource, prevTableIds); } + /** + * Returns a diff of changes that will be applied to the destination table from `transformRule` + * if the data from `hiddenTableId` is imported with the specified `mergeOptions`. + * + * The diff is returned as a `DocStateComparison` of the same doc, with the `rightChanges` + * containing the updated cell values. Old values are pulled from the destination record (if + * a match was found), and new values are the result of merging in the new cell values with + * the merge strategy from `mergeOptions`. + * + * No distinction is currently made for added records vs. updated existing records; instead, + * we treat added records as an updated record in `hiddenTableId` where all the column + * values changed from blank to the original column values from `hiddenTableId`. + */ + public generateImportDiff(_docSession: DocSession, hiddenTableId: string, transformRule: TransformRule, + mergeOptions: MergeOptions): Promise { + return this._activeDocImport.generateImportDiff(hiddenTableId, transformRule, mergeOptions); + } + /** * Close the current document. */ diff --git a/app/server/lib/ActiveDocImport.ts b/app/server/lib/ActiveDocImport.ts index 456fafab..e4f2805a 100644 --- a/app/server/lib/ActiveDocImport.ts +++ b/app/server/lib/ActiveDocImport.ts @@ -3,12 +3,14 @@ import * as path from 'path'; import * as _ from 'underscore'; +import {ColumnDelta, createEmptyActionSummary} from 'app/common/ActionSummary'; import {ApplyUAResult, DataSourceTransformed, ImportOptions, ImportResult, ImportTableResult, MergeOptions, MergeOptionsMap, MergeStrategy, TransformColumn, TransformRule, TransformRuleMap} from 'app/common/ActiveDocAPI'; import {ApiError} from 'app/common/ApiError'; import {BulkColValues, CellValue, fromTableDataAction, TableRecordValue} from 'app/common/DocActions'; import * as gutil from 'app/common/gutil'; +import {DocStateComparison} from 'app/common/UserAPI'; import {ParseFileResult, ParseOptions} from 'app/plugin/FileParserAPI'; import {GristTable} from 'app/plugin/GristTable'; import {ActiveDoc} from 'app/server/lib/ActiveDoc'; @@ -106,6 +108,95 @@ export class ActiveDocImport { await globalUploadSet.cleanup(dataSource.uploadId); } + /** + * Returns a diff of changes that will be applied to the destination table from `transformRule` + * if the data from `hiddenTableId` is imported with the specified `mergeOptions`. + * + * The diff is returned as a `DocStateComparison` of the same doc, with the `rightChanges` + * containing the updated cell values. Old values are pulled from the destination record (if + * a match was found), and new values are the result of merging in the new cell values with + * the merge strategy from `mergeOptions`. + * + * No distinction is currently made for added records vs. updated existing records; instead, + * we treat added records as an updated record in `hiddenTableId` where all the column + * values changed from blank to the original column values from `hiddenTableId`. + * + * @param {string} hiddenTableId Source table. + * @param {TransformRule} transformRule Transform rule for the original source columns. + * The destination table id is populated in the rule. + * @param {MergeOptions} mergeOptions Merge options for how to match source rows + * with destination records, and how to merge their column values. + * @returns {Promise} Comparison data for the changes that will occur if + * `hiddenTableId` is merged into the destination table from `transformRule`. + */ + public async generateImportDiff(hiddenTableId: string, {destCols, destTableId}: TransformRule, + {mergeCols, mergeStrategy}: MergeOptions): Promise { + // Get column differences between `hiddenTableId` and `destTableId` for rows that exist in both tables. + const selectColumns: [string, string][] = + destCols.map(c => [c.colId!, c.colId!.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length)]); + const selectColumnsMap = new Map(selectColumns); + const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId!, selectColumnsMap, mergeCols); + + // Initialize container for updated column values in the expected format (ColumnDelta). + const updatedRecords: {[colId: string]: ColumnDelta} = {}; + const updatedRecordIds: number[] = []; + const srcColIds = selectColumns.map(([srcColId, _destColId]) => srcColId); + for (const id of srcColIds) { + updatedRecords[id] = {}; + } + + // Retrieve the function used to reconcile differences between source and destination. + const merge = getMergeFunction(mergeStrategy); + + const numResultRows = comparisonResult[hiddenTableId + '.id'].length; + for (let i = 0; i < numResultRows; i++) { + const srcRowId = comparisonResult[hiddenTableId + '.id'][i] as number; + + if (comparisonResult[destTableId + '.id'][i] === null) { + // No match in destination table found for source row, so it must be a new record. + for (const srcColId of srcColIds) { + updatedRecords[srcColId][srcRowId] = [[''], [(comparisonResult[`${hiddenTableId}.${srcColId}`][i])]]; + } + } else { + // Otherwise, a match was found between source and destination tables. + for (const srcColId of srcColIds) { + const matchingDestColId = selectColumnsMap.get(srcColId); + const srcVal = comparisonResult[`${hiddenTableId}.${srcColId}`][i]; + const destVal = comparisonResult[`${destTableId}.${matchingDestColId}`][i]; + + // Exclude unchanged cell values from the comparison. + if (srcVal === destVal) { continue; } + + updatedRecords[srcColId][srcRowId] = [[destVal], [merge(srcVal, destVal)]]; + } + } + + updatedRecordIds.push(srcRowId); + } + + return { + left: {n: 0, h: ''}, // NOTE: left, right, parent, and summary are not used by Importer. + right: {n: 0, h: ''}, + parent: null, + summary: 'right', + details: { + leftChanges: createEmptyActionSummary(), + rightChanges: { + tableRenames: [], + tableDeltas: { + [hiddenTableId]: { + removeRows: [], + updateRows: updatedRecordIds, + addRows: [], // Since deltas are relative to the source table, we can't (yet) use this. + columnRenames: [], + columnDeltas: updatedRecords, + } + } + } + } + }; + } + /** * Import the given upload as new tables in one step. This does not give the user a chance to * modify parse options or transforms. The caller is responsible for cleaning up the upload. @@ -340,6 +431,105 @@ export class ActiveDocImport { return destTableId; } + /** + * Merges matching records from `hiddenTableId` into `destTableId`, and finalizes import. + * + * @param {string} hiddenTableId Source table containing records to be imported. + * @param {string} destTableId Destination table that will be updated. + * @param {TransformRule} transformRule Rules for transforming source columns using formulas + * before merging/importing takes place. + * @param {MergeOptions} mergeOptions Options for how to merge matching records between + * the source and destination table. + */ + private async _mergeAndFinishImport(docSession: OptDocSession, hiddenTableId: string, destTableId: string, + {destCols, sourceCols}: TransformRule, + {mergeCols, mergeStrategy}: MergeOptions): Promise { + // Get column differences between `hiddenTableId` and `destTableId` for rows that exist in both tables. + const selectColumns: [string, string][] = destCols.map(destCol => { + const formula = destCol.formula.trim(); + const srcColId = formula.startsWith('$') && sourceCols.includes(formula.slice(1)) ? + formula.slice(1) : IMPORT_TRANSFORM_COLUMN_PREFIX + destCol.colId; + return [srcColId, destCol.colId!]; + }); + const selectColumnsMap = new Map(selectColumns); + const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId, selectColumnsMap, mergeCols); + + // Initialize containers for new and updated records in the expected formats. + const newRecords: BulkColValues = {}; + let numNewRecords = 0; + const updatedRecords: BulkColValues = {}; + const updatedRecordIds: number[] = []; + + const destColIds = [...selectColumnsMap.values()]; + for (const id of destColIds) { + newRecords[id] = []; + updatedRecords[id] = []; + } + + // Retrieve the function used to reconcile differences between source and destination. + const merge = getMergeFunction(mergeStrategy); + + const srcColIds = [...selectColumnsMap.keys()]; + const numResultRows = comparisonResult[hiddenTableId + '.id'].length; + for (let i = 0; i < numResultRows; i++) { + if (comparisonResult[destTableId + '.id'][i] === null) { + // No match in destination table found for source row, so it must be a new record. + for (const srcColId of srcColIds) { + const matchingDestColId = selectColumnsMap.get(srcColId); + newRecords[matchingDestColId!].push(comparisonResult[`${hiddenTableId}.${srcColId}`][i]); + } + numNewRecords++; + } else { + // Otherwise, a match was found between source and destination tables, so we merge their columns. + for (const srcColId of srcColIds) { + const matchingDestColId = selectColumnsMap.get(srcColId); + const srcVal = comparisonResult[`${hiddenTableId}.${srcColId}`][i]; + const destVal = comparisonResult[`${destTableId}.${matchingDestColId}`][i]; + updatedRecords[matchingDestColId!].push(merge(srcVal, destVal)); + } + updatedRecordIds.push(comparisonResult[destTableId + '.id'][i] as number); + } + } + + // We no longer need the temporary import table, so remove it. + await this._activeDoc.applyUserActions(docSession, [['RemoveTable', hiddenTableId]]); + + if (updatedRecordIds.length > 0) { + await this._activeDoc.applyUserActions(docSession, + [['BulkUpdateRecord', destTableId, updatedRecordIds, updatedRecords]]); + } + + if (numNewRecords > 0) { + await this._activeDoc.applyUserActions(docSession, + [['BulkAddRecord', destTableId, gutil.arrayRepeat(numNewRecords, null), newRecords]]); + } + } + + /** + * Builds and executes a SQL query that compares common columns from `hiddenTableId` + * and `destTableId`, returning matched rows that contain differences between both tables. + * + * The `mergeCols` parameter defines how rows from both tables are matched; we consider + * rows whose columns values for all columns in `mergeCols` to be the same record in both + * tables. + * + * @param {string} hiddenTableId Source table. + * @param {string} destTableId Destination table. + * @param {string} selectColumnsMap Map of source to destination column ids to include in the comparison results. + * @param {string[]} mergeCols List of (destination) column ids to use for matching. + * @returns {Promise, + mergeCols: string[]): Promise { + const joinColumns: [string, string][] = + [...selectColumnsMap.entries()].filter(([_srcColId, destColId]) => mergeCols.includes(destColId)); + const joinColumnsMap = new Map(joinColumns); + + const query = buildComparisonQuery(hiddenTableId, destTableId, selectColumnsMap, joinColumnsMap); + const result = await this._activeDoc.docStorage.fetchQuery(query); + return this._activeDoc.docStorage.decodeMarshalledDataFromTables(result); + } + /** * Returns a default TransformRule using column definitions from `destTableId`. If `destTableId` * is null (in the case when the import destination is a new table), the `srcCols` are used instead. @@ -438,95 +628,6 @@ export class ActiveDocImport { } } - - /** - * Merges matching records from `hiddenTableId` into `destTableId`, and finalizes import. - * - * @param {string} hiddenTableId Source table containing records to be imported. - * @param {string} destTableId Destination table that will be updated. - * @param {TransformRule} transformRule Rules for transforming source columns using formulas - * before merging/importing takes place. - * @param {MergeOptions} mergeOptions Options for how to merge matching records between - * the source and destination table. - */ - private async _mergeAndFinishImport(docSession: OptDocSession, hiddenTableId: string, destTableId: string, - transformRule: TransformRule, mergeOptions: MergeOptions): Promise { - // Prepare a set of column pairs (source and destination) for selecting and joining. - const selectColumns: [string, string][] = []; - const joinColumns: [string, string][] = []; - - for (const destCol of transformRule.destCols) { - const destColId = destCol.colId as string; - - const formula = destCol.formula.trim(); - const srcColId = formula.startsWith('$') && transformRule.sourceCols.includes(formula.slice(1)) ? - formula.slice(1) : IMPORT_TRANSFORM_COLUMN_PREFIX + destCol.colId; - - selectColumns.push([srcColId, destColId]); - - if (mergeOptions.mergeCols.includes(destColId)) { - joinColumns.push([srcColId, destColId]); - } - } - - const selectColumnsMap = new Map(selectColumns); - const joinColumnsMap = new Map(joinColumns); - - // Construct and execute a SQL query that will tell us the differences between source and destination. - const query = buildComparisonQuery(hiddenTableId, destTableId, selectColumnsMap, joinColumnsMap); - const result = await this._activeDoc.docStorage.fetchQuery(query); - const decodedResult = this._activeDoc.docStorage.decodeMarshalledDataFromTables(result); - - // Initialize containers for new and updated records in the expected formats. - const newRecords: BulkColValues = {}; - let numNewRecords = 0; - const updatedRecords: BulkColValues = {}; - const updatedRecordIds: number[] = []; - - const destColIds = [...selectColumnsMap.values()]; - for (const id of destColIds) { - newRecords[id] = []; - updatedRecords[id] = []; - } - - // Retrieve the function used to reconcile differences between source and destination. - const merge = getMergeFunction(mergeOptions.mergeStrategy); - - const srcColIds = [...selectColumnsMap.keys()]; - const numResultRows = decodedResult[hiddenTableId + '.id'].length; - for (let i = 0; i < numResultRows; i++) { - if (decodedResult[destTableId + '.id'][i] === null) { - // No match in destination table found for source row, so it must be a new record. - for (const srcColId of srcColIds) { - const matchingDestColId = selectColumnsMap.get(srcColId); - newRecords[matchingDestColId!].push(decodedResult[`${hiddenTableId}.${srcColId}`][i]); - } - numNewRecords++; - } else { - // Otherwise, a match was found between source and destination tables, so we merge their columns. - for (const srcColId of srcColIds) { - const matchingDestColId = selectColumnsMap.get(srcColId); - const srcVal = decodedResult[`${hiddenTableId}.${srcColId}`][i]; - const destVal = decodedResult[`${destTableId}.${matchingDestColId}`][i]; - updatedRecords[matchingDestColId!].push(merge(srcVal, destVal)); - } - updatedRecordIds.push(decodedResult[destTableId + '.id'][i] as number); - } - } - - // We no longer need the temporary import table, so remove it. - await this._activeDoc.applyUserActions(docSession, [['RemoveTable', hiddenTableId]]); - - if (updatedRecordIds.length > 0) { - await this._activeDoc.applyUserActions(docSession, - [['BulkUpdateRecord', destTableId, updatedRecordIds, updatedRecords]]); - } - - if (numNewRecords > 0) { - await this._activeDoc.applyUserActions(docSession, - [['BulkAddRecord', destTableId, gutil.arrayRepeat(numNewRecords, null), newRecords]]); - } - } } // Helper function that returns true if a given cell is blank (i.e. null or empty). @@ -555,14 +656,19 @@ type MergeFunction = (srcVal: CellValue, destVal: CellValue) => CellValue; */ function getMergeFunction({type}: MergeStrategy): MergeFunction { switch (type) { - case 'replace-with-nonblank-source': + case 'replace-with-nonblank-source': { return (srcVal, destVal) => isBlank(srcVal) ? destVal : srcVal; - case 'replace-all-fields': + } + case 'replace-all-fields': { return (srcVal, _destVal) => srcVal; - case 'replace-blank-fields-only': + } + case 'replace-blank-fields-only': { return (srcVal, destVal) => isBlank(destVal) ? srcVal : destVal; - default: - // Normally, we should never arrive here. If we somehow do, we throw an error. - throw new Error(`Unknown merge strategy: ${type}`); + } + default: { + // Normally, we should never arrive here. If we somehow do, throw an error. + const unknownStrategyType: never = type; + throw new Error(`Unknown merge strategy: ${unknownStrategyType}`); + } } } diff --git a/app/server/lib/DocWorker.ts b/app/server/lib/DocWorker.ts index 19a65a02..f4c74f11 100644 --- a/app/server/lib/DocWorker.ts +++ b/app/server/lib/DocWorker.ts @@ -98,6 +98,7 @@ export class DocWorker { importFiles: activeDocMethod.bind(null, 'editors', 'importFiles'), finishImportFiles: activeDocMethod.bind(null, 'editors', 'finishImportFiles'), cancelImportFiles: activeDocMethod.bind(null, 'editors', 'cancelImportFiles'), + generateImportDiff: activeDocMethod.bind(null, 'editors', 'generateImportDiff'), addAttachments: activeDocMethod.bind(null, 'editors', 'addAttachments'), removeInstanceFromDoc: activeDocMethod.bind(null, 'editors', 'removeInstanceFromDoc'), startBundleUserActions: activeDocMethod.bind(null, 'editors', 'startBundleUserActions'), diff --git a/app/server/lib/ExpandedQuery.ts b/app/server/lib/ExpandedQuery.ts index fe2d2cc5..55b8f366 100644 --- a/app/server/lib/ExpandedQuery.ts +++ b/app/server/lib/ExpandedQuery.ts @@ -178,14 +178,36 @@ export function buildComparisonQuery(leftTableId: string, rightTableId: string, ); }); - // Join both tables on `joinColumns`, including unmatched rows from `leftTableId`. + /** + * Performance can suffer when large (right) tables have many duplicates for their join columns. + * Specifically, the number of rows returned by the query can be unreasonably large if each + * row from the left table is joined against up to N rows from the right table. + * + * To work around this, we de-duplicate the right table before joining, returning the first row id + * we find for a given group of join column values. In practice, this means that each row from + * the left table can only be matched with at most 1 equivalent row from the right table. + */ + const dedupedRightTableQuery = + `SELECT MIN(id) AS id, ${[...joinColumns.values()].map(v => quoteIdent(v)).join(', ')} ` + + `FROM ${quoteIdent(rightTableId)} ` + + `GROUP BY ${[...joinColumns.values()].map(v => quoteIdent(v)).join(', ')}`; + const dedupedRightTableAlias = quoteIdent('deduped_' + rightTableId); + + // Join the left table to the (de-duplicated) right table, and include unmatched left rows. const joinConditions: string[] = []; joinColumns.forEach((rightTableColumn, leftTableColumn) => { const leftExpression = `${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)}`; - const rightExpression = `${quoteIdent(rightTableId)}.${quoteIdent(rightTableColumn)}`; + const rightExpression = `${dedupedRightTableAlias}.${quoteIdent(rightTableColumn)}`; joinConditions.push(`${leftExpression} = ${rightExpression}`); }); - joins.push(`LEFT JOIN ${quoteIdent(rightTableId)} ON ${joinConditions.join(' AND ')}`); + joins.push( + `LEFT JOIN (${dedupedRightTableQuery}) AS ${dedupedRightTableAlias} ` + + `ON ${joinConditions.join(' AND ')}`); + + // Finally, join the de-duplicated right table to the original right table to get all its columns. + joins.push( + `LEFT JOIN ${quoteIdent(rightTableId)} ` + + `ON ${dedupedRightTableAlias}.id = ${quoteIdent(rightTableId)}.id`); // Filter out matching rows where all non-join columns from both tables are identical. const whereConditions: string[] = []; diff --git a/test/nbrowser/gristUtils.ts b/test/nbrowser/gristUtils.ts index 41803879..aa72b1bb 100644 --- a/test/nbrowser/gristUtils.ts +++ b/test/nbrowser/gristUtils.ts @@ -692,10 +692,20 @@ export async function userActionsVerify(expectedUserActions: unknown[]): Promise * Helper to get the cells of the importer Preview section. The cell text is returned from the * requested rows and columns in row-wise order. */ -export async function getPreviewContents(cols: number[], rowNums: number[]): Promise { +export async function getPreviewContents(cols: number[], rowNums: number[], + mapper?: (e: WebElement) => Promise): Promise { await driver.findWait('.test-importer-preview .gridview_row', 1000); const section = await driver.find('.test-importer-preview'); - return getVisibleGridCells({cols, rowNums, section}); + return getVisibleGridCells({cols, rowNums, section, mapper}); +} + +/** + * Helper to get a cell from the importer Preview section. + */ + export async function getPreviewCell(col: string|number, rowNum: number): Promise { + await driver.findWait('.test-importer-preview .gridview_row', 1000); + const section = await driver.find('.test-importer-preview'); + return getCell({col, rowNum, section}); } /**