(core) Enable incremental imports

Summary:
The import dialog now has an option to 'Update existing records',
which when checked will allow for selection of 1 or more fields
to match source and destination tables on.

If all fields match, then the matched record in the
destination table will be merged with the incoming record
from the source table. This means the incoming values will
replace the destination table values, unless the incoming
values are blank.

Additional merge strategies are implemented in the data
engine, but the import dialog only uses one of the
strategies currently. The others can be exposed in the UI
in the future, and tweak the behavior of how source
and destination values should be merged in different contexts,
such as when blank values exist.

Test Plan: Python and browser tests.

Reviewers: paulfitz

Reviewed By: paulfitz

Subscribers: alexmojaki

Differential Revision: https://phab.getgrist.com/D3020
This commit is contained in:
George Gevoian
2021-09-14 23:12:34 -07:00
parent a543e5194a
commit 8a7edb6257
9 changed files with 914 additions and 135 deletions

View File

@@ -1,4 +1,7 @@
from collections import namedtuple
from collections import defaultdict, namedtuple
import six
from six.moves import zip, xrange
import column
import identifiers
@@ -37,6 +40,68 @@ def _strip_prefixes(transform_rule):
dest_col["colId"] = colId[len(_import_transform_col_prefix):]
def _is_blank(value):
"If value is blank (e.g. None, blank string), returns true."
if value is None:
return True
elif isinstance(value, six.string_types) and value.strip() == '':
return True
else:
return False
def _build_merge_col_map(column_data, merge_cols):
"""
Returns a dictionary with keys that are comprised of
the values from column_data for the columns in
merge_cols. The values are the row ids (index) in
column_data for that particular key; multiple row ids
imply that duplicates exist that contain the same values
for all columns in merge_cols.
Used for merging into tables where fast, constant-time lookups
are needed. For example, a source table can pass in its
column_data into this function to build the map, and the
destination table can then query the map using its own
values for the columns in merge_cols to check for any
matching rows that are candidates for updating.
"""
merge_col_map = defaultdict(list)
for row_id, key in enumerate(zip(*[column_data[col] for col in merge_cols])):
# If any part of the key is blank, don't include it in the map.
if any(_is_blank(val) for val in key):
continue
try:
merge_col_map[key].append(row_id + 1)
except TypeError:
pass # If key isn't hashable, don't include it in the map.
return merge_col_map
# Dictionary mapping merge strategy types from ActiveDocAPI.ts to functions
# that merge source and destination column values.
#
# NOTE: This dictionary should be kept in sync with the types in that file.
#
# All functions have the same signature: (src, dest) => output,
# where src and dest are column values from a source and destination
# table respectively, and output is either src or destination.
#
# For example, a key of replace-with-nonblank-source will return a merge function
# that returns the src argument if it's not blank. Otherwise it returns the
# dest argument. In the context of incremental imports, this is a function
# that update destination fields when the source field isn't blank, preserving
# existing values in the destination field that aren't replaced.
_merge_funcs = {
'replace-with-nonblank-source': lambda src, dest: dest if _is_blank(src) else src,
'replace-all-fields': lambda src, _: src,
'replace-blank-fields-only': lambda src, dest: src if _is_blank(dest) else dest
}
class ImportActions(object):
def __init__(self, useractions, docmodel, engine):
@@ -157,6 +222,68 @@ class ImportActions(object):
return new_cols
def _MergeColumnData(self, dest_table_id, column_data, merge_options):
"""
Merges column_data into table dest_table_id, replacing rows that
match all merge_cols with values from column_data, and adding
unmatched rows to the end of table dest_table_id.
dest_table_id: id of destination table
column_data: column data from source table to merge into destination table
merge_cols: list of column ids to use as keys for merging
"""
dest_table = self._engine.tables[dest_table_id]
merge_cols = merge_options['mergeCols']
merge_col_map = _build_merge_col_map(column_data, merge_cols)
updated_row_ids = []
updated_rows = {}
new_rows = {}
matched_src_table_rows = set()
# Initialize column data for new and updated rows.
for col_id in six.iterkeys(column_data):
updated_rows[col_id] = []
new_rows[col_id] = []
strategy_type = merge_options['mergeStrategy']['type']
merge = _merge_funcs[strategy_type]
# Compute which source table rows should update existing records in destination table.
dest_cols = [dest_table.get_column(col) for col in merge_cols]
for dest_row_id in dest_table.row_ids:
lookup_key = tuple(col.raw_get(dest_row_id) for col in dest_cols)
try:
src_row_ids = merge_col_map.get(lookup_key)
except TypeError:
# We can arrive here if lookup_key isn't hashable. If that's the case, skip
# this row since we can't efficiently search for a match in the source table.
continue
if src_row_ids:
matched_src_table_rows.update(src_row_ids)
updated_row_ids.append(dest_row_id)
for col_id, col_vals in six.iteritems(column_data):
src_val = col_vals[src_row_ids[-1] - 1]
dest_val = dest_table.get_column(col_id).raw_get(dest_row_id)
updated_rows[col_id].append(merge(src_val, dest_val))
num_src_rows = len(column_data[merge_cols[0]])
# Compute which source table rows should be added to destination table as new records.
for row_id in xrange(1, num_src_rows + 1):
# If we've matched against the row before, we shouldn't add it.
if row_id in matched_src_table_rows:
continue
for col_id, col_val in six.iteritems(column_data):
new_rows[col_id].append(col_val[row_id - 1])
self._useractions.BulkUpdateRecord(dest_table_id, updated_row_ids, updated_rows)
self._useractions.BulkAddRecord(dest_table_id,
[None] * (num_src_rows - len(matched_src_table_rows)), new_rows)
def DoGenImporterView(self, source_table_id, dest_table_id, transform_rule = None):
"""
@@ -224,7 +351,8 @@ class ImportActions(object):
def DoTransformAndFinishImport(self, hidden_table_id, dest_table_id,
into_new_table, transform_rule):
into_new_table, transform_rule,
merge_options):
"""
Finishes import into new or existing table depending on flag 'into_new_table'
Returns destination table id. (new or existing)
@@ -303,7 +431,10 @@ class ImportActions(object):
new_table = self._useractions.AddTable(dest_table_id, col_specs)
dest_table_id = new_table['table_id']
self._useractions.BulkAddRecord(dest_table_id, [None] * len(row_ids), column_data)
if not merge_options.get('mergeCols'):
self._useractions.BulkAddRecord(dest_table_id, [None] * len(row_ids), column_data)
else:
self._MergeColumnData(dest_table_id, column_data, merge_options)
log.debug("Finishing TransformAndFinishImport")

View File

@@ -13,68 +13,91 @@ class TestImportTransform(test_engine.EngineTestCase):
def init_state(self):
# Add source table
self.apply_user_action(['AddTable', 'Hidden_table', [
{'id': 'employee_id', 'type': 'Int'},
{'id': 'fname', 'type': 'Text'},
{'id': 'mname', 'type': 'Text'},
{'id': 'lname', 'type': 'Text'},
{'id': 'email', 'type': 'Text'},
]])
self.apply_user_action(['BulkAddRecord', 'Hidden_table', [1, 2], {'fname': ['Carry', 'Don'],
'mname': ['M.', 'B.'],
'lname': ['Jonson', "Yoon"]
}])
self.apply_user_action(['BulkAddRecord', 'Hidden_table', [1, 2, 3, 4, 5, 6, 7], {
'employee_id': [1, 2, 3, 4, 5, 6, 7],
'fname': ['Bob', 'Carry', 'Don', 'Amir', 'Ken', 'George', 'Barbara'],
'mname': ['F.', None, 'B.', '', 'C.', '', 'D.'],
'lname': ['Nike', 'Jonson', "Yoon", "Greene", "Foster", "Huang", "Kinney"],
'email': [
'bob@example.com', None, "don@example.com", "amir@example.com",
"ken@example.com", "", "barbara@example.com"
]
}])
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[1, "manualSort", "ManualSortPos", False, ""],
[2, "fname", "Text", False, ""],
[3, "mname", "Text", False, ""],
[4, "lname", "Text", False, ""],
[2, "employee_id", "Int", False, ""],
[3, "fname", "Text", False, ""],
[4, "mname", "Text", False, ""],
[5, "lname", "Text", False, ""],
[6, "email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 1)
#Filled in colids for existing table
self.TEMP_transform_rule_colids = {
"destCols": [
{ "colId": "First_Name", "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": "Last_Name", "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": "Middle_Initial", "label": "Middle Initial",
"type": "Text", "formula": "$mname[0]" },
#{ "colId": "Blank", "label": "Blank", //destination1 has no blank column
# "type": "Text", "formula": "" },
]
"destCols": [
{ "colId": "Employee_ID", "label": "Employee ID",
"type": "Int", "formula": "$employee_id" },
{ "colId": "First_Name", "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": "Last_Name", "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": "Middle_Initial", "label": "Middle Initial",
"type": "Text", "formula": "$mname[0] if $mname else ''" },
{ "colId": "Email", "label": "Email",
"type": "Text", "formula": "$email" },
#{ "colId": "Blank", "label": "Blank", // Destination1 has no blank column
# "type": "Text", "formula": "" },
]
}
#Then try it with blank in colIds (for new tables)
self.TEMP_transform_rule_no_colids = {
"destCols": [
{ "colId": None, "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": None, "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": None, "label": "Middle Initial",
"type": "Text", "formula": "$mname[0]" },
{ "colId": None, "label": "Blank",
"type": "Text", "formula": "" },
]
"destCols": [
{ "colId": None, "label": "Employee ID",
"type": "Int", "formula": "$employee_id" },
{ "colId": None, "label": "First Name",
"type": "Text", "formula": "$fname" },
{ "colId": None, "label": "Last Name",
"type": "Text", "formula": "$lname" },
{ "colId": None, "label": "Middle Initial",
"type": "Text", "formula": "$mname[0] if $mname else ''" },
{ "colId": None, "label": "Email",
"type": "Text", "formula": "$email" },
{ "colId": None, "label": "Blank",
"type": "Text", "formula": "" },
]
}
# Add destination table which contains columns corresponding to source table with different names
self.apply_user_action(['AddTable', 'Destination1', [
{'label': 'First Name', 'id': 'First_Name', 'type': 'Text'},
{'label': 'Last Name', 'id': 'Last_Name', 'type': 'Text'},
{'label': 'Middle Initial', 'id': 'Middle_Initial', 'type': 'Text'}]])
self.apply_user_action(['BulkAddRecord', 'Destination1', [1], {'First_Name': ['Bob'],
'Last_Name': ['Nike'],
'Middle_Initial': ['F.']}])
{'label': 'Employee ID', 'id': 'Employee_ID', 'type': 'Int'},
{'label': 'First Name', 'id': 'First_Name', 'type': 'Text'},
{'label': 'Last Name', 'id': 'Last_Name', 'type': 'Text'},
{'label': 'Middle Initial', 'id': 'Middle_Initial', 'type': 'Text'},
{'label': 'Email', 'id': 'Email', 'type': 'Text'}]])
self.apply_user_action(['BulkAddRecord', 'Destination1', [1, 2, 3], {
'Employee_ID': [1, 2, 3],
'First_Name': ['Bob', 'Carry', 'Don'],
'Last_Name': ['Nike', 'Jonson', "Yoon"],
'Middle_Initial': ['F.', 'M.', None],
'Email': ['', 'carry.m.jonson@example.com', 'don.b.yoon@example.com']
}])
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[5, "manualSort", "ManualSortPos", False, ""],
[6, "First_Name", "Text", False, ""],
[7, "Last_Name", "Text", False, ""],
[8, "Middle_Initial","Text", False, ""],
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Verify created tables
@@ -84,57 +107,62 @@ class TestImportTransform(test_engine.EngineTestCase):
])
def test_finish_import_into_new_table(self):
# Add source and destination tables
self.init_state()
#into_new_table = True, transform_rule : no colids (will be generated for new table)
#into_new_table = True, transform_rule : no colids (will be generated for new table), merge_options = {}
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, self.TEMP_transform_rule_no_colids])
['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, self.TEMP_transform_rule_no_colids, {}])
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0]", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 9, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0]", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 9.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3]],
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 9]],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["AddTable", "NewTable", [{"formula": "", "id": "manualSort", "isFormula": False, "type": "ManualSortPos"}, {"formula": "", "id": "First_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Last_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Middle_Initial", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Blank", "isFormula": False, "type": "Text"}]],
["AddTable", "NewTable", [{"formula": "", "id": "manualSort", "isFormula": False, "type": "ManualSortPos"}, {"formula": "", "id": "Employee_ID", "isFormula": False, "type": "Int"}, {"formula": "", "id": "First_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Last_Name", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Middle_Initial", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Email", "isFormula": False, "type": "Text"}, {"formula": "", "id": "Blank", "isFormula": False, "type": "Text"}]],
["AddRecord", "_grist_Tables", 3, {"primaryViewId": 0, "tableId": "NewTable"}],
["BulkAddRecord", "_grist_Tables_column", [9, 10, 11, 12, 13], {"colId": ["manualSort", "First_Name", "Last_Name", "Middle_Initial", "Blank"], "formula": ["", "", "", "", ""], "isFormula": [False, False, False, False, False], "label": ["manualSort", "First Name", "Last Name", "Middle Initial", "Blank"], "parentId": [3, 3, 3, 3, 3], "parentPos": [9.0, 10.0, 11.0, 12.0, 13.0], "type": ["ManualSortPos", "Text", "Text", "Text", "Text"], "widgetOptions": ["", "", "", "", ""]}],
["BulkAddRecord", "_grist_Tables_column", [13, 14, 15, 16, 17, 18, 19], {"colId": ["manualSort", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "Blank"], "formula": ["", "", "", "", "", "", ""], "isFormula": [False, False, False, False, False, False, False], "label": ["manualSort", "Employee ID", "First Name", "Last Name", "Middle Initial", "Email", "Blank"], "parentId": [3, 3, 3, 3, 3, 3, 3], "parentPos": [13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0], "type": ["ManualSortPos", "Int", "Text", "Text", "Text", "Text", "Text"], "widgetOptions": ["", "", "", "", "", "", ""]}],
["AddRecord", "_grist_Views", 3, {"name": "NewTable", "type": "raw_data"}],
["AddRecord", "_grist_TabBar", 3, {"tabPos": 3.0, "viewRef": 3}],
["AddRecord", "_grist_Pages", 3, {"indentation": 0, "pagePos": 3.0, "viewRef": 3}],
["AddRecord", "_grist_Views_section", 3, {"borderWidth": 1, "defaultWidth": 100, "parentId": 3, "parentKey": "record", "sortColRefs": "[]", "tableRef": 3, "title": ""}],
["BulkAddRecord", "_grist_Views_section_field", [7, 8, 9, 10], {"colRef": [10, 11, 12, 13], "parentId": [3, 3, 3, 3], "parentPos": [7.0, 8.0, 9.0, 10.0]}],
["BulkAddRecord", "_grist_Views_section_field", [11, 12, 13, 14, 15, 16], {"colRef": [14, 15, 16, 17, 18, 19], "parentId": [3, 3, 3, 3, 3, 3], "parentPos": [11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}],
["UpdateRecord", "_grist_Tables", 3, {"primaryViewId": 3}],
["BulkAddRecord", "NewTable", [1, 2], {"First_Name": ["Carry", "Don"], "Last_Name": ["Jonson", "Yoon"], "Middle_Initial": ["M", "B"], "manualSort": [1.0, 2.0]}],
["BulkAddRecord", "NewTable", [1, 2, 3, 4, 5, 6, 7], {"Email": ["bob@example.com", None, "don@example.com", "amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [1, 2, 3, 4, 5, 6, 7], "First_Name": ["Bob", "Carry", "Don", "Amir", "Ken", "George", "Barbara"], "Last_Name": ["Nike", "Jonson", "Yoon", "Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["F", "", "B", "", "C", "", "D"], "manualSort": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]}],
]
})
#1-4 in hidden table, 5-8 in destTable, 9-13 for new table
#1-6 in hidden table, 7-12 in destTable, 13-19 for new table
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[ 9, "manualSort", "ManualSortPos", False, ""],
[10, "First_Name", "Text", False, ""],
[11, "Last_Name", "Text", False, ""],
[12, "Middle_Initial", "Text", False, ""],
[13, "Blank", "Text", False, ""],
[13, "manualSort", "ManualSortPos", False, ""],
[14, "Employee_ID", "Int", False, ""],
[15, "First_Name", "Text", False, ""],
[16, "Last_Name", "Text", False, ""],
[17, "Middle_Initial", "Text", False, ""],
[18, "Email", "Text", False, ""],
[19, "Blank", "Text", False, ""],
], rows=lambda r: r.parentId.id == 3)
self.assertTableData('NewTable', cols="all", data=[
["id", "First_Name", "Last_Name", "Middle_Initial", "Blank", "manualSort"],
[1, "Carry", "Jonson", "M", "", 1.0],
[2, "Don", "Yoon", "B", "", 2.0]
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "Blank", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", "", 1.0],
[2, 2, "Carry", "Jonson", "", None, "", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", "", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", "", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", "", 5.0],
[6, 6, "George", "Huang", "", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", "", 7.0],
])
# Verify removed hidden table and add the new one
self.assertPartialData("_grist_Tables", ["id", "tableId"], [
[2, "Destination1"],
@@ -142,28 +170,36 @@ class TestImportTransform(test_engine.EngineTestCase):
])
def test_finish_import_into_existing_table(self):
# Add source and destination tables
self.init_state()
#into_new_table false, transform_rule=null
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids])
#into_new_table = False, transform_rule : colids, merge_options = None
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids, None])
#1-4 in hidden table, 5-8 in destTable
#1-6 in hidden table, 7-12 in destTable
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[5, "manualSort", "ManualSortPos", False, ""],
[6, "First_Name", "Text", False, ""],
[7, "Last_Name", "Text", False, ""],
[8, "Middle_Initial", "Text", False, ""],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# First 3 rows were already in Destination1 before import
self.assertTableData('Destination1', cols="all", data=[
["id", "First_Name", "Last_Name", "Middle_Initial", "manualSort"],
[1, "Bob", "Nike", "F.", 1.0], #F. was there to begin with
[2, "Carry", "Jonson", "M", 2.0], #others imported with $mname[0]
[3, "Don", "Yoon", "B", 3.0],
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", None, "don.b.yoon@example.com", 3.0],
[4, 1, "Bob", "Nike", "F", "bob@example.com", 4.0],
[5, 2, "Carry", "Jonson", "", None, 5.0],
[6, 3, "Don", "Yoon", "B", "don@example.com", 6.0],
[7, 4, "Amir", "Greene", "", "amir@example.com", 7.0],
[8, 5, "Ken", "Foster", "C", "ken@example.com", 8.0],
[9, 6, "George", "Huang", "", "", 9.0],
[10, 7, "Barbara", "Kinney", "D", "barbara@example.com", 10.0],
])
# Verify removed hidden table
@@ -174,22 +210,29 @@ class TestImportTransform(test_engine.EngineTestCase):
# Add source and destination tables
self.init_state()
#into_new_table = True, transform_rule : no colids (will be generated for new table)
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, None])
#into_new_table = True, transform_rule = None, merge_options = None
self.apply_user_action(['TransformAndFinishImport', 'Hidden_table', 'NewTable', True, None, None])
#1-4 in src table, 5-8 in hiddentable
#1-6 in src table, 7-12 in hiddentable
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[9, "manualSort", "ManualSortPos", False, ""],
[10, "fname", "Text", False, ""],
[11, "mname", "Text", False, ""],
[12, "lname", "Text", False, ""],
[13, "manualSort", "ManualSortPos", False, ""],
[14, "employee_id", "Int", False, ""],
[15, "fname", "Text", False, ""],
[16, "mname", "Text", False, ""],
[17, "lname", "Text", False, ""],
[18, "email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 3)
self.assertTableData('NewTable', cols="all", data=[
["id", "fname", "lname", "mname", "manualSort"],
[1, "Carry", "Jonson", "M.", 1.0],
[2, "Don", "Yoon", "B.", 2.0]
["id", "employee_id", "fname", "lname", "mname", "email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", None, None, 2.0],
[3, 3, "Don", "Yoon", "B.", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C.", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D.", "barbara@example.com", 7.0],
])
@@ -198,3 +241,338 @@ class TestImportTransform(test_engine.EngineTestCase):
[2, "Destination1"],
[3, "NewTable"]
])
def test_finish_import_into_existing_table_with_single_merge_col(self):
# Add source and destination tables.
self.init_state()
# Use 'Employee_ID' as the merge column, updating existing employees in Destination1 with the same employee id.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don@example.com"], "Middle_Initial": ["F", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that Destination1 has no duplicates and that previous records (1 - 3) are updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_finish_import_into_existing_table_with_multiple_merge_cols(self):
# Add source and destination tables.
self.init_state()
# Use 'First_Name' and 'Last_Name' as the merge columns, updating existing employees in Destination1 with the same name.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['First_Name', 'Last_Name'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don@example.com"], "Middle_Initial": ["F", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that Destination1 has no duplicates and that previous records (1 - 3) are updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_finish_import_into_existing_table_with_no_matching_merge_cols(self):
# Add source and destination tables.
self.init_state()
# Use 'Email' as the merge column: existing employees in Destination1 have different emails, so none should match incoming data.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Email'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkAddRecord", "Destination1", [4, 5, 6, 7, 8, 9, 10], {"Email": ["bob@example.com", None, "don@example.com", "amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [1, 2, 3, 4, 5, 6, 7], "First_Name": ["Bob", "Carry", "Don", "Amir", "Ken", "George", "Barbara"], "Last_Name": ["Nike", "Jonson", "Yoon", "Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["F", "", "B", "", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that no existing records were updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", None, "don.b.yoon@example.com", 3.0],
[4, 1, "Bob", "Nike", "F", "bob@example.com", 4.0],
[5, 2, "Carry", "Jonson", "", None, 5.0],
[6, 3, "Don", "Yoon", "B", "don@example.com", 6.0],
[7, 4, "Amir", "Greene", "", "amir@example.com", 7.0],
[8, 5, "Ken", "Foster", "C", "ken@example.com", 8.0],
[9, 6, "George", "Huang", "", "", 9.0],
[10, 7, "Barbara", "Kinney", "D", "barbara@example.com", 10.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_replace_all_fields_merge_strategy(self):
# Add source and destination tables.
self.init_state()
# Use replace all fields strategy on the 'Employee_ID' column.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-all-fields'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 2, 3], {"Email": ["bob@example.com", None, "don@example.com"], "Middle_Initial": ["F", "", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that existing fields were replaced with incoming fields.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "", None, 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_replace_blank_fields_only_merge_strategy(self):
# Add source and destination tables.
self.init_state()
# Use replace blank fields only strategy on the 'Employee_ID' column.
out_actions = self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-blank-fields-only'}}]
)
# Check that the right actions were created.
self.assertPartialOutActions(out_actions, {
"stored": [
["AddColumn", "Hidden_table", "gristHelper_Import_Middle_Initial", {"formula": "$mname[0] if $mname else ''", "isFormula": True, "type": "Text"}],
["AddRecord", "_grist_Tables_column", 13, {"colId": "gristHelper_Import_Middle_Initial", "formula": "$mname[0] if $mname else ''", "isFormula": True, "label": "Middle Initial", "parentId": 1, "parentPos": 13.0, "type": "Text", "widgetOptions": ""}],
["BulkRemoveRecord", "_grist_Views_section_field", [1, 2, 3, 4, 5]],
["RemoveRecord", "_grist_Views_section", 1],
["RemoveRecord", "_grist_TabBar", 1],
["RemoveRecord", "_grist_Pages", 1],
["RemoveRecord", "_grist_Views", 1],
["UpdateRecord", "_grist_Tables", 1, {"primaryViewId": 0}],
["BulkRemoveRecord", "_grist_Tables_column", [1, 2, 3, 4, 5, 6, 13]],
["RemoveRecord", "_grist_Tables", 1],
["RemoveTable", "Hidden_table"],
["BulkUpdateRecord", "Destination1", [1, 3], {"Email": ["bob@example.com", "don.b.yoon@example.com"], "Middle_Initial": ["F.", "B"]}],
["BulkAddRecord", "Destination1", [4, 5, 6, 7], {"Email": ["amir@example.com", "ken@example.com", "", "barbara@example.com"], "Employee_ID": [4, 5, 6, 7], "First_Name": ["Amir", "Ken", "George", "Barbara"], "Last_Name": ["Greene", "Foster", "Huang", "Kinney"], "Middle_Initial": ["", "C", "", "D"], "manualSort": [4.0, 5.0, 6.0, 7.0]}],
]
})
self.assertTableData('_grist_Tables_column', cols="subset", data=[
["id", "colId", "type", "isFormula", "formula"],
[7, "manualSort", "ManualSortPos", False, ""],
[8, "Employee_ID", "Int", False, ""],
[9, "First_Name", "Text", False, ""],
[10, "Last_Name", "Text", False, ""],
[11, "Middle_Initial", "Text", False, ""],
[12, "Email", "Text", False, ""],
], rows=lambda r: r.parentId.id == 2)
# Check that only blank existing fields were updated.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F.", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don.b.yoon@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_merging_updates_all_duplicates_in_destination_table(self):
# Add source and destination tables.
self.init_state()
# Add duplicates to the destination table with different values than original.
self.apply_user_action(['BulkAddRecord', 'Destination1', [4, 5], {
'Employee_ID': [3, 3],
'First_Name': ['Don', 'Don'],
'Last_Name': ["Yoon", "Yoon"],
'Middle_Initial': [None, 'B'],
'Email': ['don.yoon@example.com', 'yoon.don@example.com']
}])
# Use replace with nonblank source strategy on the 'Employee_ID' column.
self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that all duplicates were updated with new data from the source table.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "Yoon", "B", "don@example.com", 3.0],
[4, 3, "Don", "Yoon", "B", "don@example.com", 4.0],
[5, 3, "Don", "Yoon", "B", "don@example.com", 5.0],
[6, 4, "Amir", "Greene", "", "amir@example.com", 6.0],
[7, 5, "Ken", "Foster", "C", "ken@example.com", 7.0],
[8, 6, "George", "Huang", "", "", 8.0],
[9, 7, "Barbara", "Kinney", "D", "barbara@example.com", 9.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])
def test_merging_uses_latest_duplicate_in_source_table_for_matching(self):
# Add source and destination tables.
self.init_state()
# Add duplicates to the source table with different values than the original.
self.apply_user_action(['BulkAddRecord', 'Hidden_table', [8, 9], {
'employee_id': [3, 3],
'fname': ['Don', 'Don'],
'lname': ["Yoon", "yoon"],
'mname': [None, None],
'email': ['d.yoon@example.com', 'yoon.don@example.com']
}])
# Use replace with nonblank source strategy on the 'Employee_ID' column.
self.apply_user_action(
['TransformAndFinishImport', 'Hidden_table', 'Destination1', False, self.TEMP_transform_rule_colids,
{'mergeCols': ['Employee_ID'], 'mergeStrategy': {'type': 'replace-with-nonblank-source'}}]
)
# Check that the last record for Don Yoon in the source table was used for updating the destination table.
self.assertTableData('Destination1', cols="all", data=[
["id", "Employee_ID", "First_Name", "Last_Name", "Middle_Initial", "Email", "manualSort"],
[1, 1, "Bob", "Nike", "F", "bob@example.com", 1.0],
[2, 2, "Carry", "Jonson", "M.", "carry.m.jonson@example.com", 2.0],
[3, 3, "Don", "yoon", None, "yoon.don@example.com", 3.0],
[4, 4, "Amir", "Greene", "", "amir@example.com", 4.0],
[5, 5, "Ken", "Foster", "C", "ken@example.com", 5.0],
[6, 6, "George", "Huang", "", "", 6.0],
[7, 7, "Barbara", "Kinney", "D", "barbara@example.com", 7.0],
])
self.assertPartialData("_grist_Tables", ["id", "tableId"], [[2, "Destination1"]])

View File

@@ -1511,7 +1511,8 @@ class UserActions(object):
return self._import_actions.DoGenImporterView(source_table_id, dest_table_id, transform_rule)
@useraction
def TransformAndFinishImport(self, hidden_table_id, dest_table_id,
into_new_table, transform_rule):
return self._import_actions.DoTransformAndFinishImport(
hidden_table_id, dest_table_id, into_new_table, transform_rule)
def TransformAndFinishImport(self, hidden_table_id, dest_table_id, into_new_table,
transform_rule, merge_options = None):
return self._import_actions.DoTransformAndFinishImport(hidden_table_id, dest_table_id,
into_new_table, transform_rule,
merge_options or {})