(core) Add column matching to Importer

Summary:
The Importer dialog is now maximized, showing additional column
matching options and information on the left, with the preview
table shown on the right. Columns can be mapped via a select menu
listing all source columns, or by clicking a formula field next to
the menu and directly editing the transform formula.

Test Plan: Browser tests.

Reviewers: jarek

Reviewed By: jarek

Differential Revision: https://phab.getgrist.com/D3096
This commit is contained in:
George Gevoian
2021-11-09 12:03:12 -08:00
parent 96fa7ad562
commit 08b1286f4f
11 changed files with 553 additions and 145 deletions

View File

@@ -18,6 +18,7 @@ import {DocSession, OptDocSession} from 'app/server/lib/DocSession';
import * as log from 'app/server/lib/log';
import {globalUploadSet, moveUpload, UploadInfo} from 'app/server/lib/uploads';
import {buildComparisonQuery} from 'app/server/lib/ExpandedQuery';
import flatten = require('lodash/flatten');
const IMPORT_TRANSFORM_COLUMN_PREFIX = 'gristHelper_Import_';
@@ -131,16 +132,19 @@ export class ActiveDocImport {
*/
public async generateImportDiff(hiddenTableId: string, {destCols, destTableId}: TransformRule,
{mergeCols, mergeStrategy}: MergeOptions): Promise<DocStateComparison> {
// Merge column ids from client have prefixes that need to be stripped.
mergeCols = stripPrefixes(mergeCols);
// Get column differences between `hiddenTableId` and `destTableId` for rows that exist in both tables.
const selectColumns: [string, string][] =
destCols.map(c => [c.colId!, c.colId!.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length)]);
const selectColumnsMap = new Map(selectColumns);
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId!, selectColumnsMap, mergeCols);
const srcAndDestColIds: [string, string[]][] =
destCols.map(c => [c.colId!, [c.colId!.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length)]]);
const srcToDestColIds = new Map(srcAndDestColIds);
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId!, srcToDestColIds, mergeCols);
// Initialize container for updated column values in the expected format (ColumnDelta).
const updatedRecords: {[colId: string]: ColumnDelta} = {};
const updatedRecordIds: number[] = [];
const srcColIds = selectColumns.map(([srcColId, _destColId]) => srcColId);
const srcColIds = srcAndDestColIds.map(([srcColId, _destColId]) => srcColId);
for (const id of srcColIds) {
updatedRecords[id] = {};
}
@@ -160,7 +164,7 @@ export class ActiveDocImport {
} else {
// Otherwise, a match was found between source and destination tables.
for (const srcColId of srcColIds) {
const matchingDestColId = selectColumnsMap.get(srcColId);
const matchingDestColId = srcToDestColIds.get(srcColId)![0];
const srcVal = comparisonResult[`${hiddenTableId}.${srcColId}`][i];
const destVal = comparisonResult[`${destTableId}.${matchingDestColId}`][i];
@@ -382,7 +386,7 @@ export class ActiveDocImport {
}
// Transform rules from client may have prefixed column ids, so we need to strip them.
stripPrefixes(transformRule);
stripRulePrefixes(transformRule);
if (intoNewTable) {
// Transform rules for new tables don't have filled in destination column ids.
@@ -444,15 +448,25 @@ export class ActiveDocImport {
private async _mergeAndFinishImport(docSession: OptDocSession, hiddenTableId: string, destTableId: string,
{destCols, sourceCols}: TransformRule,
{mergeCols, mergeStrategy}: MergeOptions): Promise<void> {
// Merge column ids from client have prefixes that need to be stripped.
mergeCols = stripPrefixes(mergeCols);
// Get column differences between `hiddenTableId` and `destTableId` for rows that exist in both tables.
const selectColumns: [string, string][] = destCols.map(destCol => {
const srcAndDestColIds: [string, string][] = destCols.map(destCol => {
const formula = destCol.formula.trim();
const srcColId = formula.startsWith('$') && sourceCols.includes(formula.slice(1)) ?
formula.slice(1) : IMPORT_TRANSFORM_COLUMN_PREFIX + destCol.colId;
return [srcColId, destCol.colId!];
});
const selectColumnsMap = new Map(selectColumns);
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId, selectColumnsMap, mergeCols);
const srcToDestColIds: Map<string, string[]> = new Map();
srcAndDestColIds.forEach(([srcColId, destColId]) => {
if (!srcToDestColIds.has(srcColId)) {
srcToDestColIds.set(srcColId, [destColId]);
} else {
srcToDestColIds.get(srcColId)!.push(destColId);
}
});
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId, srcToDestColIds, mergeCols);
// Initialize containers for new and updated records in the expected formats.
const newRecords: BulkColValues = {};
@@ -460,7 +474,7 @@ export class ActiveDocImport {
const updatedRecords: BulkColValues = {};
const updatedRecordIds: number[] = [];
const destColIds = [...selectColumnsMap.values()];
const destColIds = flatten([...srcToDestColIds.values()]);
for (const id of destColIds) {
newRecords[id] = [];
updatedRecords[id] = [];
@@ -469,23 +483,27 @@ export class ActiveDocImport {
// Retrieve the function used to reconcile differences between source and destination.
const merge = getMergeFunction(mergeStrategy);
const srcColIds = [...selectColumnsMap.keys()];
const srcColIds = [...srcToDestColIds.keys()];
const numResultRows = comparisonResult[hiddenTableId + '.id'].length;
for (let i = 0; i < numResultRows; i++) {
if (comparisonResult[destTableId + '.id'][i] === null) {
// No match in destination table found for source row, so it must be a new record.
for (const srcColId of srcColIds) {
const matchingDestColId = selectColumnsMap.get(srcColId);
newRecords[matchingDestColId!].push(comparisonResult[`${hiddenTableId}.${srcColId}`][i]);
const matchingDestColIds = srcToDestColIds.get(srcColId);
matchingDestColIds!.forEach(id => {
newRecords[id].push(comparisonResult[`${hiddenTableId}.${srcColId}`][i]);
});
}
numNewRecords++;
} else {
// Otherwise, a match was found between source and destination tables, so we merge their columns.
for (const srcColId of srcColIds) {
const matchingDestColId = selectColumnsMap.get(srcColId);
const matchingDestColIds = srcToDestColIds.get(srcColId);
const srcVal = comparisonResult[`${hiddenTableId}.${srcColId}`][i];
const destVal = comparisonResult[`${destTableId}.${matchingDestColId}`][i];
updatedRecords[matchingDestColId!].push(merge(srcVal, destVal));
matchingDestColIds!.forEach(id => {
const destVal = comparisonResult[`${destTableId}.${id}`][i];
updatedRecords[id].push(merge(srcVal, destVal));
});
}
updatedRecordIds.push(comparisonResult[destTableId + '.id'][i] as number);
}
@@ -515,17 +533,23 @@ export class ActiveDocImport {
*
* @param {string} hiddenTableId Source table.
* @param {string} destTableId Destination table.
* @param {string} selectColumnsMap Map of source to destination column ids to include in the comparison results.
* @param {Map<string, string[]>} srcToDestColIds Map of source to one or more destination column ids
* to include in the comparison results.
* @param {string[]} mergeCols List of (destination) column ids to use for matching.
* @returns {Promise<BulkColValues} Decoded column values from both tables that were matched, and had differences.
*/
private async _getTableComparison(hiddenTableId: string, destTableId: string, selectColumnsMap: Map<string, string>,
private async _getTableComparison(hiddenTableId: string, destTableId: string, srcToDestColIds: Map<string, string[]>,
mergeCols: string[]): Promise<BulkColValues> {
const joinColumns: [string, string][] =
[...selectColumnsMap.entries()].filter(([_srcColId, destColId]) => mergeCols.includes(destColId));
const joinColumnsMap = new Map(joinColumns);
const mergeColIds = new Set(mergeCols);
const destToSrcMergeColIds = new Map();
srcToDestColIds.forEach((destColIds, srcColId) => {
const maybeMergeColId = destColIds.find(colId => mergeColIds.has(colId));
if (maybeMergeColId !== undefined) {
destToSrcMergeColIds.set(maybeMergeColId, srcColId);
}
});
const query = buildComparisonQuery(hiddenTableId, destTableId, selectColumnsMap, joinColumnsMap);
const query = buildComparisonQuery(hiddenTableId, destTableId, srcToDestColIds, destToSrcMergeColIds);
const result = await this._activeDoc.docStorage.fetchQuery(query);
return this._activeDoc.docStorage.decodeMarshalledDataFromTables(result);
}
@@ -636,7 +660,7 @@ function isBlank(value: CellValue): boolean {
}
// Helper function that strips import prefixes from columns in transform rules (if ids are present).
function stripPrefixes({destCols}: TransformRule): void {
function stripRulePrefixes({destCols}: TransformRule): void {
for (const col of destCols) {
const colId = col.colId;
if (colId && colId.startsWith(IMPORT_TRANSFORM_COLUMN_PREFIX)) {
@@ -645,6 +669,12 @@ function stripPrefixes({destCols}: TransformRule): void {
}
}
// Helper function that returns new `colIds` with import prefixes stripped.
function stripPrefixes(colIds: string[]): string[] {
return colIds.map(id => id.startsWith(IMPORT_TRANSFORM_COLUMN_PREFIX) ?
id.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length) : id);
}
type MergeFunction = (srcVal: CellValue, destVal: CellValue) => CellValue;
/**

View File

@@ -148,13 +148,15 @@ export function expandQuery(iquery: ServerQuery, docData: DocData, onDemandFormu
*
* @param {string} leftTableId Name of the left table in the comparison.
* @param {string} rightTableId Name of the right table in the comparison.
* @param {Map<string, string>} selectColumns Map of left table column ids to their matching equivalent
* from the right table. All of these columns will be included in the result, aliased by table id.
* @param {Map<string, string>} joinColumns Map of left table column ids to their matching equivalent
* from the right table. These columns are used to join `leftTableID` to `rightTableId`.
* @param {Map<string, string[]>} selectColumns Map of left table column ids to their matching equivalent(s)
* from the right table. A single left column can be compared against 2 or more right columns, so the
* values of `selectColumns` are arrays. All of these columns will be included in the result, aliased by
* table id.
* @param {Map<string, string>} joinColumns Map of right table column ids to their matching equivalent
* from the left table. These columns are used to join `leftTableId` to `rightTableId`.
* @returns {ExpandedQuery} The constructed query.
*/
export function buildComparisonQuery(leftTableId: string, rightTableId: string, selectColumns: Map<string, string>,
export function buildComparisonQuery(leftTableId: string, rightTableId: string, selectColumns: Map<string, string[]>,
joinColumns: Map<string, string>): ExpandedQuery {
const query: ExpandedQuery = { tableId: leftTableId, filters: {} };
@@ -169,14 +171,16 @@ export function buildComparisonQuery(leftTableId: string, rightTableId: string,
`${quoteIdent(rightTableId)}.id AS ${quoteIdent(rightTableId + '.id')}`
);
// Select columns from both tables using the table id as a prefix for each column name.
selectColumns.forEach((rightTableColumn, leftTableColumn) => {
// Select columns from both tables, using the table id as a prefix for each column name.
selectColumns.forEach((rightTableColumns, leftTableColumn) => {
const leftColumnAlias = `${leftTableId}.${leftTableColumn}`;
const rightColumnAlias = `${rightTableId}.${rightTableColumn}`;
selects.push(
`${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)} AS ${quoteIdent(leftColumnAlias)}`,
`${quoteIdent(rightTableId)}.${quoteIdent(rightTableColumn)} AS ${quoteIdent(rightColumnAlias)}`
);
selects.push(`${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)} AS ${quoteIdent(leftColumnAlias)}`);
rightTableColumns.forEach(colId => {
const rightColumnAlias = `${rightTableId}.${colId}`;
selects.push(`${quoteIdent(rightTableId)}.${quoteIdent(colId)} AS ${quoteIdent(rightColumnAlias)}`
);
});
});
/**
@@ -189,14 +193,14 @@ export function buildComparisonQuery(leftTableId: string, rightTableId: string,
* the left table can only be matched with at most 1 equivalent row from the right table.
*/
const dedupedRightTableQuery =
`SELECT MIN(id) AS id, ${[...joinColumns.values()].map(v => quoteIdent(v)).join(', ')} ` +
`SELECT MIN(id) AS id, ${[...joinColumns.keys()].map(v => quoteIdent(v)).join(', ')} ` +
`FROM ${quoteIdent(rightTableId)} ` +
`GROUP BY ${[...joinColumns.values()].map(v => quoteIdent(v)).join(', ')}`;
`GROUP BY ${[...joinColumns.keys()].map(v => quoteIdent(v)).join(', ')}`;
const dedupedRightTableAlias = quoteIdent('deduped_' + rightTableId);
// Join the left table to the (de-duplicated) right table, and include unmatched left rows.
const joinConditions: string[] = [];
joinColumns.forEach((rightTableColumn, leftTableColumn) => {
joinColumns.forEach((leftTableColumn, rightTableColumn) => {
const leftExpression = `${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)}`;
const rightExpression = `${dedupedRightTableAlias}.${quoteIdent(rightTableColumn)}`;
joinConditions.push(`${leftExpression} = ${rightExpression}`);
@@ -212,16 +216,21 @@ export function buildComparisonQuery(leftTableId: string, rightTableId: string,
// Filter out matching rows where all non-join columns from both tables are identical.
const whereConditions: string[] = [];
for (const [leftTableColumn, rightTableColumn] of selectColumns.entries()) {
if (joinColumns.has(leftTableColumn)) { continue; }
for (const [leftTableColumnId, rightTableColumnIds] of selectColumns.entries()) {
const leftColumnAlias = quoteIdent(`${leftTableId}.${leftTableColumnId}`);
const leftColumnAlias = quoteIdent(`${leftTableId}.${leftTableColumn}`);
const rightColumnAlias = quoteIdent(`${rightTableId}.${rightTableColumn}`);
for (const rightTableColId of rightTableColumnIds) {
// If this left/right column id pair was already used for joining, skip it.
if (joinColumns.get(rightTableColId) === leftTableColumnId) { continue; }
// Only include rows that have differences in column values.
whereConditions.push(`${leftColumnAlias} IS NOT ${rightColumnAlias}`);
// Only include rows that have differences in column values.
const rightColumnAlias = quoteIdent(`${rightTableId}.${rightTableColId}`);
whereConditions.push(`${leftColumnAlias} IS NOT ${rightColumnAlias}`);
}
}
if (whereConditions.length > 0) {
wheres.push(`(${whereConditions.join(' OR ')})`);
}
wheres.push(`(${whereConditions.join(' OR ')})`);
// Copy decisions to the query object, and return.
query.joins = joins;