mirror of
https://github.com/gristlabs/grist-core.git
synced 2026-03-02 04:09:24 +00:00
(core) Add column matching to Importer
Summary: The Importer dialog is now maximized, showing additional column matching options and information on the left, with the preview table shown on the right. Columns can be mapped via a select menu listing all source columns, or by clicking a formula field next to the menu and directly editing the transform formula. Test Plan: Browser tests. Reviewers: jarek Reviewed By: jarek Differential Revision: https://phab.getgrist.com/D3096
This commit is contained in:
@@ -18,6 +18,7 @@ import {DocSession, OptDocSession} from 'app/server/lib/DocSession';
|
||||
import * as log from 'app/server/lib/log';
|
||||
import {globalUploadSet, moveUpload, UploadInfo} from 'app/server/lib/uploads';
|
||||
import {buildComparisonQuery} from 'app/server/lib/ExpandedQuery';
|
||||
import flatten = require('lodash/flatten');
|
||||
|
||||
const IMPORT_TRANSFORM_COLUMN_PREFIX = 'gristHelper_Import_';
|
||||
|
||||
@@ -131,16 +132,19 @@ export class ActiveDocImport {
|
||||
*/
|
||||
public async generateImportDiff(hiddenTableId: string, {destCols, destTableId}: TransformRule,
|
||||
{mergeCols, mergeStrategy}: MergeOptions): Promise<DocStateComparison> {
|
||||
// Merge column ids from client have prefixes that need to be stripped.
|
||||
mergeCols = stripPrefixes(mergeCols);
|
||||
|
||||
// Get column differences between `hiddenTableId` and `destTableId` for rows that exist in both tables.
|
||||
const selectColumns: [string, string][] =
|
||||
destCols.map(c => [c.colId!, c.colId!.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length)]);
|
||||
const selectColumnsMap = new Map(selectColumns);
|
||||
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId!, selectColumnsMap, mergeCols);
|
||||
const srcAndDestColIds: [string, string[]][] =
|
||||
destCols.map(c => [c.colId!, [c.colId!.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length)]]);
|
||||
const srcToDestColIds = new Map(srcAndDestColIds);
|
||||
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId!, srcToDestColIds, mergeCols);
|
||||
|
||||
// Initialize container for updated column values in the expected format (ColumnDelta).
|
||||
const updatedRecords: {[colId: string]: ColumnDelta} = {};
|
||||
const updatedRecordIds: number[] = [];
|
||||
const srcColIds = selectColumns.map(([srcColId, _destColId]) => srcColId);
|
||||
const srcColIds = srcAndDestColIds.map(([srcColId, _destColId]) => srcColId);
|
||||
for (const id of srcColIds) {
|
||||
updatedRecords[id] = {};
|
||||
}
|
||||
@@ -160,7 +164,7 @@ export class ActiveDocImport {
|
||||
} else {
|
||||
// Otherwise, a match was found between source and destination tables.
|
||||
for (const srcColId of srcColIds) {
|
||||
const matchingDestColId = selectColumnsMap.get(srcColId);
|
||||
const matchingDestColId = srcToDestColIds.get(srcColId)![0];
|
||||
const srcVal = comparisonResult[`${hiddenTableId}.${srcColId}`][i];
|
||||
const destVal = comparisonResult[`${destTableId}.${matchingDestColId}`][i];
|
||||
|
||||
@@ -382,7 +386,7 @@ export class ActiveDocImport {
|
||||
}
|
||||
|
||||
// Transform rules from client may have prefixed column ids, so we need to strip them.
|
||||
stripPrefixes(transformRule);
|
||||
stripRulePrefixes(transformRule);
|
||||
|
||||
if (intoNewTable) {
|
||||
// Transform rules for new tables don't have filled in destination column ids.
|
||||
@@ -444,15 +448,25 @@ export class ActiveDocImport {
|
||||
private async _mergeAndFinishImport(docSession: OptDocSession, hiddenTableId: string, destTableId: string,
|
||||
{destCols, sourceCols}: TransformRule,
|
||||
{mergeCols, mergeStrategy}: MergeOptions): Promise<void> {
|
||||
// Merge column ids from client have prefixes that need to be stripped.
|
||||
mergeCols = stripPrefixes(mergeCols);
|
||||
|
||||
// Get column differences between `hiddenTableId` and `destTableId` for rows that exist in both tables.
|
||||
const selectColumns: [string, string][] = destCols.map(destCol => {
|
||||
const srcAndDestColIds: [string, string][] = destCols.map(destCol => {
|
||||
const formula = destCol.formula.trim();
|
||||
const srcColId = formula.startsWith('$') && sourceCols.includes(formula.slice(1)) ?
|
||||
formula.slice(1) : IMPORT_TRANSFORM_COLUMN_PREFIX + destCol.colId;
|
||||
return [srcColId, destCol.colId!];
|
||||
});
|
||||
const selectColumnsMap = new Map(selectColumns);
|
||||
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId, selectColumnsMap, mergeCols);
|
||||
const srcToDestColIds: Map<string, string[]> = new Map();
|
||||
srcAndDestColIds.forEach(([srcColId, destColId]) => {
|
||||
if (!srcToDestColIds.has(srcColId)) {
|
||||
srcToDestColIds.set(srcColId, [destColId]);
|
||||
} else {
|
||||
srcToDestColIds.get(srcColId)!.push(destColId);
|
||||
}
|
||||
});
|
||||
const comparisonResult = await this._getTableComparison(hiddenTableId, destTableId, srcToDestColIds, mergeCols);
|
||||
|
||||
// Initialize containers for new and updated records in the expected formats.
|
||||
const newRecords: BulkColValues = {};
|
||||
@@ -460,7 +474,7 @@ export class ActiveDocImport {
|
||||
const updatedRecords: BulkColValues = {};
|
||||
const updatedRecordIds: number[] = [];
|
||||
|
||||
const destColIds = [...selectColumnsMap.values()];
|
||||
const destColIds = flatten([...srcToDestColIds.values()]);
|
||||
for (const id of destColIds) {
|
||||
newRecords[id] = [];
|
||||
updatedRecords[id] = [];
|
||||
@@ -469,23 +483,27 @@ export class ActiveDocImport {
|
||||
// Retrieve the function used to reconcile differences between source and destination.
|
||||
const merge = getMergeFunction(mergeStrategy);
|
||||
|
||||
const srcColIds = [...selectColumnsMap.keys()];
|
||||
const srcColIds = [...srcToDestColIds.keys()];
|
||||
const numResultRows = comparisonResult[hiddenTableId + '.id'].length;
|
||||
for (let i = 0; i < numResultRows; i++) {
|
||||
if (comparisonResult[destTableId + '.id'][i] === null) {
|
||||
// No match in destination table found for source row, so it must be a new record.
|
||||
for (const srcColId of srcColIds) {
|
||||
const matchingDestColId = selectColumnsMap.get(srcColId);
|
||||
newRecords[matchingDestColId!].push(comparisonResult[`${hiddenTableId}.${srcColId}`][i]);
|
||||
const matchingDestColIds = srcToDestColIds.get(srcColId);
|
||||
matchingDestColIds!.forEach(id => {
|
||||
newRecords[id].push(comparisonResult[`${hiddenTableId}.${srcColId}`][i]);
|
||||
});
|
||||
}
|
||||
numNewRecords++;
|
||||
} else {
|
||||
// Otherwise, a match was found between source and destination tables, so we merge their columns.
|
||||
for (const srcColId of srcColIds) {
|
||||
const matchingDestColId = selectColumnsMap.get(srcColId);
|
||||
const matchingDestColIds = srcToDestColIds.get(srcColId);
|
||||
const srcVal = comparisonResult[`${hiddenTableId}.${srcColId}`][i];
|
||||
const destVal = comparisonResult[`${destTableId}.${matchingDestColId}`][i];
|
||||
updatedRecords[matchingDestColId!].push(merge(srcVal, destVal));
|
||||
matchingDestColIds!.forEach(id => {
|
||||
const destVal = comparisonResult[`${destTableId}.${id}`][i];
|
||||
updatedRecords[id].push(merge(srcVal, destVal));
|
||||
});
|
||||
}
|
||||
updatedRecordIds.push(comparisonResult[destTableId + '.id'][i] as number);
|
||||
}
|
||||
@@ -515,17 +533,23 @@ export class ActiveDocImport {
|
||||
*
|
||||
* @param {string} hiddenTableId Source table.
|
||||
* @param {string} destTableId Destination table.
|
||||
* @param {string} selectColumnsMap Map of source to destination column ids to include in the comparison results.
|
||||
* @param {Map<string, string[]>} srcToDestColIds Map of source to one or more destination column ids
|
||||
* to include in the comparison results.
|
||||
* @param {string[]} mergeCols List of (destination) column ids to use for matching.
|
||||
* @returns {Promise<BulkColValues} Decoded column values from both tables that were matched, and had differences.
|
||||
*/
|
||||
private async _getTableComparison(hiddenTableId: string, destTableId: string, selectColumnsMap: Map<string, string>,
|
||||
private async _getTableComparison(hiddenTableId: string, destTableId: string, srcToDestColIds: Map<string, string[]>,
|
||||
mergeCols: string[]): Promise<BulkColValues> {
|
||||
const joinColumns: [string, string][] =
|
||||
[...selectColumnsMap.entries()].filter(([_srcColId, destColId]) => mergeCols.includes(destColId));
|
||||
const joinColumnsMap = new Map(joinColumns);
|
||||
const mergeColIds = new Set(mergeCols);
|
||||
const destToSrcMergeColIds = new Map();
|
||||
srcToDestColIds.forEach((destColIds, srcColId) => {
|
||||
const maybeMergeColId = destColIds.find(colId => mergeColIds.has(colId));
|
||||
if (maybeMergeColId !== undefined) {
|
||||
destToSrcMergeColIds.set(maybeMergeColId, srcColId);
|
||||
}
|
||||
});
|
||||
|
||||
const query = buildComparisonQuery(hiddenTableId, destTableId, selectColumnsMap, joinColumnsMap);
|
||||
const query = buildComparisonQuery(hiddenTableId, destTableId, srcToDestColIds, destToSrcMergeColIds);
|
||||
const result = await this._activeDoc.docStorage.fetchQuery(query);
|
||||
return this._activeDoc.docStorage.decodeMarshalledDataFromTables(result);
|
||||
}
|
||||
@@ -636,7 +660,7 @@ function isBlank(value: CellValue): boolean {
|
||||
}
|
||||
|
||||
// Helper function that strips import prefixes from columns in transform rules (if ids are present).
|
||||
function stripPrefixes({destCols}: TransformRule): void {
|
||||
function stripRulePrefixes({destCols}: TransformRule): void {
|
||||
for (const col of destCols) {
|
||||
const colId = col.colId;
|
||||
if (colId && colId.startsWith(IMPORT_TRANSFORM_COLUMN_PREFIX)) {
|
||||
@@ -645,6 +669,12 @@ function stripPrefixes({destCols}: TransformRule): void {
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function that returns new `colIds` with import prefixes stripped.
|
||||
function stripPrefixes(colIds: string[]): string[] {
|
||||
return colIds.map(id => id.startsWith(IMPORT_TRANSFORM_COLUMN_PREFIX) ?
|
||||
id.slice(IMPORT_TRANSFORM_COLUMN_PREFIX.length) : id);
|
||||
}
|
||||
|
||||
type MergeFunction = (srcVal: CellValue, destVal: CellValue) => CellValue;
|
||||
|
||||
/**
|
||||
|
||||
@@ -148,13 +148,15 @@ export function expandQuery(iquery: ServerQuery, docData: DocData, onDemandFormu
|
||||
*
|
||||
* @param {string} leftTableId Name of the left table in the comparison.
|
||||
* @param {string} rightTableId Name of the right table in the comparison.
|
||||
* @param {Map<string, string>} selectColumns Map of left table column ids to their matching equivalent
|
||||
* from the right table. All of these columns will be included in the result, aliased by table id.
|
||||
* @param {Map<string, string>} joinColumns Map of left table column ids to their matching equivalent
|
||||
* from the right table. These columns are used to join `leftTableID` to `rightTableId`.
|
||||
* @param {Map<string, string[]>} selectColumns Map of left table column ids to their matching equivalent(s)
|
||||
* from the right table. A single left column can be compared against 2 or more right columns, so the
|
||||
* values of `selectColumns` are arrays. All of these columns will be included in the result, aliased by
|
||||
* table id.
|
||||
* @param {Map<string, string>} joinColumns Map of right table column ids to their matching equivalent
|
||||
* from the left table. These columns are used to join `leftTableId` to `rightTableId`.
|
||||
* @returns {ExpandedQuery} The constructed query.
|
||||
*/
|
||||
export function buildComparisonQuery(leftTableId: string, rightTableId: string, selectColumns: Map<string, string>,
|
||||
export function buildComparisonQuery(leftTableId: string, rightTableId: string, selectColumns: Map<string, string[]>,
|
||||
joinColumns: Map<string, string>): ExpandedQuery {
|
||||
const query: ExpandedQuery = { tableId: leftTableId, filters: {} };
|
||||
|
||||
@@ -169,14 +171,16 @@ export function buildComparisonQuery(leftTableId: string, rightTableId: string,
|
||||
`${quoteIdent(rightTableId)}.id AS ${quoteIdent(rightTableId + '.id')}`
|
||||
);
|
||||
|
||||
// Select columns from both tables using the table id as a prefix for each column name.
|
||||
selectColumns.forEach((rightTableColumn, leftTableColumn) => {
|
||||
// Select columns from both tables, using the table id as a prefix for each column name.
|
||||
selectColumns.forEach((rightTableColumns, leftTableColumn) => {
|
||||
const leftColumnAlias = `${leftTableId}.${leftTableColumn}`;
|
||||
const rightColumnAlias = `${rightTableId}.${rightTableColumn}`;
|
||||
selects.push(
|
||||
`${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)} AS ${quoteIdent(leftColumnAlias)}`,
|
||||
`${quoteIdent(rightTableId)}.${quoteIdent(rightTableColumn)} AS ${quoteIdent(rightColumnAlias)}`
|
||||
);
|
||||
selects.push(`${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)} AS ${quoteIdent(leftColumnAlias)}`);
|
||||
|
||||
rightTableColumns.forEach(colId => {
|
||||
const rightColumnAlias = `${rightTableId}.${colId}`;
|
||||
selects.push(`${quoteIdent(rightTableId)}.${quoteIdent(colId)} AS ${quoteIdent(rightColumnAlias)}`
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
@@ -189,14 +193,14 @@ export function buildComparisonQuery(leftTableId: string, rightTableId: string,
|
||||
* the left table can only be matched with at most 1 equivalent row from the right table.
|
||||
*/
|
||||
const dedupedRightTableQuery =
|
||||
`SELECT MIN(id) AS id, ${[...joinColumns.values()].map(v => quoteIdent(v)).join(', ')} ` +
|
||||
`SELECT MIN(id) AS id, ${[...joinColumns.keys()].map(v => quoteIdent(v)).join(', ')} ` +
|
||||
`FROM ${quoteIdent(rightTableId)} ` +
|
||||
`GROUP BY ${[...joinColumns.values()].map(v => quoteIdent(v)).join(', ')}`;
|
||||
`GROUP BY ${[...joinColumns.keys()].map(v => quoteIdent(v)).join(', ')}`;
|
||||
const dedupedRightTableAlias = quoteIdent('deduped_' + rightTableId);
|
||||
|
||||
// Join the left table to the (de-duplicated) right table, and include unmatched left rows.
|
||||
const joinConditions: string[] = [];
|
||||
joinColumns.forEach((rightTableColumn, leftTableColumn) => {
|
||||
joinColumns.forEach((leftTableColumn, rightTableColumn) => {
|
||||
const leftExpression = `${quoteIdent(leftTableId)}.${quoteIdent(leftTableColumn)}`;
|
||||
const rightExpression = `${dedupedRightTableAlias}.${quoteIdent(rightTableColumn)}`;
|
||||
joinConditions.push(`${leftExpression} = ${rightExpression}`);
|
||||
@@ -212,16 +216,21 @@ export function buildComparisonQuery(leftTableId: string, rightTableId: string,
|
||||
|
||||
// Filter out matching rows where all non-join columns from both tables are identical.
|
||||
const whereConditions: string[] = [];
|
||||
for (const [leftTableColumn, rightTableColumn] of selectColumns.entries()) {
|
||||
if (joinColumns.has(leftTableColumn)) { continue; }
|
||||
for (const [leftTableColumnId, rightTableColumnIds] of selectColumns.entries()) {
|
||||
const leftColumnAlias = quoteIdent(`${leftTableId}.${leftTableColumnId}`);
|
||||
|
||||
const leftColumnAlias = quoteIdent(`${leftTableId}.${leftTableColumn}`);
|
||||
const rightColumnAlias = quoteIdent(`${rightTableId}.${rightTableColumn}`);
|
||||
for (const rightTableColId of rightTableColumnIds) {
|
||||
// If this left/right column id pair was already used for joining, skip it.
|
||||
if (joinColumns.get(rightTableColId) === leftTableColumnId) { continue; }
|
||||
|
||||
// Only include rows that have differences in column values.
|
||||
whereConditions.push(`${leftColumnAlias} IS NOT ${rightColumnAlias}`);
|
||||
// Only include rows that have differences in column values.
|
||||
const rightColumnAlias = quoteIdent(`${rightTableId}.${rightTableColId}`);
|
||||
whereConditions.push(`${leftColumnAlias} IS NOT ${rightColumnAlias}`);
|
||||
}
|
||||
}
|
||||
if (whereConditions.length > 0) {
|
||||
wheres.push(`(${whereConditions.join(' OR ')})`);
|
||||
}
|
||||
wheres.push(`(${whereConditions.join(' OR ')})`);
|
||||
|
||||
// Copy decisions to the query object, and return.
|
||||
query.joins = joins;
|
||||
|
||||
Reference in New Issue
Block a user