mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
2b2e19c5b5
Summary: The problem is that the implementation for a summary update was relying on type consistency to get columns (ie: matches agains colId and type). Type consistency is an attempt at maintaining consistent type across same-named column for summaries of same table. But the problem is that the consistency of types is NOT a strict guarantee or an invariant, more of a best-effort attempt (there are too many possible sequences of operations possible with renaming/adding/removing in summary tables and the underlying table). With current implementation and with a document violating the type consistency, a summary table could end up with fields referencing columns to the former summary table (more detail below(1)). Which is a bad state (yields js errors on the client). This diff fixes this issue by relaxing the type comparison when search for same-named column. (1) __Below is a description of how a violation of type consistency could end-up in bad state document (example taken from the reported bug):__ > In this document, let's assume two summary tables `Table1 [by A]` and `Table1 [Totals]`. Let's also assume Table1 and `Table1 [Totals]` both have an `Amount(Numeric)` column, and that `Table1 [by A]` has one `Amount(Any)` column (violating the type consistency principle). Now when users wanted to change the `Table1 [Totals]` section to group by 'A', grist found that there is already a summary table with same grouping. But it couldn't find a matching column for `Amount(Numeric)` so it created a new one. Except that because there was still an `Amount(Any)` the new column was named `Amount2` which caused following code to ignore it and in particular forgetting to update it's corresponding section's field which was then pointing toward the column of a different table (which is bad). Test Plan: Added python test. Reviewers: georgegevoian Reviewed By: georgegevoian Differential Revision: https://phab.getgrist.com/D3809
427 lines
18 KiB
Python
427 lines
18 KiB
Python
from collections import namedtuple
|
|
import json
|
|
|
|
import six
|
|
|
|
from column import is_visible_column
|
|
import sort_specs
|
|
|
|
import logger
|
|
log = logger.Logger(__name__, logger.INFO)
|
|
|
|
ColInfo = namedtuple('ColInfo', ('colId', 'type', 'isFormula', 'formula',
|
|
'widgetOptions', 'label'))
|
|
|
|
|
|
def make_col_info(col=None, **values):
|
|
"""Return a ColInfo() with the given fields, optionally copying values from the given column."""
|
|
for key in ColInfo._fields:
|
|
values.setdefault(key, getattr(col, key) if col else None)
|
|
return ColInfo(**values)
|
|
|
|
def _make_sum_col_info(col):
|
|
"""Return a ColInfo() for the sum formula column for column col."""
|
|
return make_col_info(col=col, isFormula=True,
|
|
formula='SUM($group.%s)' % col.colId)
|
|
|
|
|
|
def get_colinfo_dict(col_info, with_id=False):
|
|
"""Return a dict suitable to use with AddColumn or AddTable (when with_id=True) actions."""
|
|
col_values = {k: v for k, v in six.iteritems(col_info._asdict())
|
|
if v is not None and k != 'colId'}
|
|
if with_id:
|
|
col_values['id'] = col_info.colId
|
|
return col_values
|
|
|
|
|
|
def skip_rules_update(col, col_values):
|
|
"""
|
|
Rules for summary tables can't be derived from source columns. This function
|
|
removes (and kips original) rules settings when updating summary tables.
|
|
"""
|
|
|
|
# Remove rules from updates.
|
|
col_values = {k: v for k, v in six.iteritems(col_values) if k != 'rules'}
|
|
|
|
try:
|
|
# New widgetOptions to use.
|
|
new_widgetOptions = json.loads(col_values.get('widgetOptions', ''))
|
|
except ValueError:
|
|
# If we are not updating widgetOptions (or they are
|
|
# not a valid json string, i.e. in tests), just return the original updates.
|
|
return col_values
|
|
|
|
try:
|
|
# Original widgetOptions (maybe with styling rules "ruleOptions").
|
|
widgetOptions = json.loads(col.widgetOptions or '')
|
|
except ValueError:
|
|
widgetOptions = {}
|
|
|
|
# Keep the original rulesOptions if any, and ignore any new one.
|
|
new_widgetOptions.pop("rulesOptions", "")
|
|
rulesOptions = widgetOptions.get('rulesOptions')
|
|
if rulesOptions:
|
|
new_widgetOptions['rulesOptions'] = rulesOptions
|
|
|
|
col_values['widgetOptions'] = json.dumps(new_widgetOptions)
|
|
return col_values
|
|
|
|
|
|
def _copy_widget_options(options):
|
|
"""Copies widgetOptions for a summary group-by column (omitting conditional formatting rules)"""
|
|
if not options:
|
|
return options
|
|
try:
|
|
options = json.loads(options)
|
|
except ValueError:
|
|
# widgetOptions are not always a valid json value (especially in tests)
|
|
return options
|
|
return json.dumps({k: v for k, v in options.items() if k != "rulesOptions"})
|
|
|
|
|
|
def encode_summary_table_name(source_table_id, groupby_col_ids):
|
|
"""
|
|
Create a summary table name based on the source table ID and the groupby column IDs.
|
|
"""
|
|
result = source_table_id + '_summary'
|
|
if groupby_col_ids:
|
|
result += '_' + '_'.join(sorted(groupby_col_ids))
|
|
return result
|
|
|
|
|
|
def decode_summary_table_name(summary_table_info):
|
|
"""
|
|
Extract the name of the source table from the summary table schema info.
|
|
"""
|
|
# To generate code, we need to know for each summary table, what its source table is. It would be
|
|
# easy if we had access to metadata records, but (at least for now) we generate all code based on
|
|
# schema only. So we use the type of special 'group' column in the summary table.
|
|
group_col = summary_table_info.columns.get('group')
|
|
if (
|
|
group_col
|
|
and 'getSummarySourceGroup' in group_col.formula
|
|
and group_col.type.startswith('RefList:')
|
|
):
|
|
return group_col.type[8:]
|
|
return None
|
|
|
|
|
|
def _group_colinfo(source_table):
|
|
"""Returns ColInfo() for the 'group' column that must be present in every summary table."""
|
|
return make_col_info(colId='group', type='RefList:%s' % source_table.tableId,
|
|
isFormula=True, formula='table.getSummarySourceGroup(rec)')
|
|
|
|
|
|
def _update_sort_spec(sort_spec, old_table, new_table):
|
|
"""
|
|
Replace column references in the sort spec (which is a JSON string encoding a list of column
|
|
refs, negated for descending) with references to the new table. Returns the new JSON string,
|
|
or empty string in case of a problem.
|
|
"""
|
|
old_cols_map = {c.id: c.colId for c in old_table.columns}
|
|
new_cols_map = {c.colId: c.id for c in new_table.columns}
|
|
|
|
# When adjusting, we take a possibly negated old colRef, and produce a new colRef.
|
|
# If anything is gone, we return 0, which will be excluded from the new sort spec.
|
|
def adjust(col_spec):
|
|
old_colref = sort_specs.col_ref(col_spec)
|
|
new_colref = new_cols_map.get(old_cols_map.get(old_colref), 0)
|
|
return sort_specs.swap_col_ref(col_spec, new_colref)
|
|
|
|
try:
|
|
old_sort_spec = json.loads(sort_spec)
|
|
new_sort_spec = [adjust(col_spec) for col_spec in old_sort_spec]
|
|
new_sort_spec = [col_spec for col_spec in new_sort_spec if sort_specs.col_ref(col_spec)]
|
|
return json.dumps(new_sort_spec, separators=(',', ':'))
|
|
except Exception:
|
|
log.warn("update_summary_section: can't parse sortColRefs JSON; clearing sortColRefs")
|
|
return ''
|
|
|
|
|
|
def summary_groupby_col_type(source_type):
|
|
"""
|
|
Returns the type of a groupby column in a summary table
|
|
given the type of the corresponding column in the source table.
|
|
Most types are returned unchanged.
|
|
When a source table is grouped by a list-type (RefList/ChoiceList) column
|
|
the column is 'flattened' into the corresponding non-list type
|
|
in the summary table.
|
|
"""
|
|
if source_type == 'ChoiceList':
|
|
return 'Choice'
|
|
else:
|
|
return source_type.replace('RefList:', 'Ref:')
|
|
|
|
|
|
class SummaryActions(object):
|
|
|
|
def __init__(self, useractions, docmodel):
|
|
self.useractions = useractions
|
|
self.docmodel = docmodel
|
|
|
|
def _get_or_add_columns(self, table, all_colinfo):
|
|
"""
|
|
Given a table record and a list of ColInfo objects, generates a list of corresponding column
|
|
records in the table, creating appropriate columns if they don't yet exist.
|
|
"""
|
|
prior = {c.colId: c for c in table.columns}
|
|
for ci in all_colinfo:
|
|
col = prior.get(ci.colId)
|
|
if col and col.formula == ci.formula:
|
|
yield col
|
|
else:
|
|
result = self.useractions.doAddColumn(table.tableId, ci.colId,
|
|
get_colinfo_dict(ci, with_id=False))
|
|
yield self.docmodel.columns.table.get_record(result['colRef'])
|
|
|
|
|
|
def _get_or_create_summary(self, source_table, source_groupby_columns, formula_colinfo):
|
|
"""
|
|
Finds a summary table or creates a new one, based on source_table, grouped by the columns
|
|
in groupby_colinfo, and containing formulas in formula_colinfo. Source_table should be a
|
|
Record from _grist_Tables, and other arguments should be lists of ColInfo objects.
|
|
Returns the tuple (summary_table, groupby_columns, formula_columns).
|
|
"""
|
|
key = tuple(sorted(int(c) for c in source_groupby_columns))
|
|
|
|
groupby_colinfo = [
|
|
make_col_info(
|
|
col=c,
|
|
isFormula=False,
|
|
formula='',
|
|
widgetOptions=_copy_widget_options(c.widgetOptions),
|
|
type=summary_groupby_col_type(c.type)
|
|
)
|
|
for c in source_groupby_columns
|
|
]
|
|
summary_table = next((t for t in source_table.summaryTables if t.summaryKey == key), None)
|
|
created = False
|
|
if not summary_table:
|
|
groupby_col_ids = [c.colId for c in groupby_colinfo]
|
|
result = self.useractions.doAddTable(
|
|
encode_summary_table_name(source_table.tableId, groupby_col_ids),
|
|
[get_colinfo_dict(ci, with_id=True) for ci in groupby_colinfo + formula_colinfo],
|
|
summarySourceTableRef=source_table.id,
|
|
raw_section=True)
|
|
summary_table = self.docmodel.tables.table.get_record(result['id'])
|
|
created = True
|
|
# Note that in this case, _get_or_add_columns() below should not add any new columns,
|
|
# but only return existing ones. (The table may contain extra columns, e.g. 'manualSort',
|
|
# at least in theory.)
|
|
|
|
groupby_columns = list(self._get_or_add_columns(summary_table, groupby_colinfo))
|
|
formula_columns = list(self._get_or_add_columns(summary_table, formula_colinfo))
|
|
|
|
if created:
|
|
# Set the summarySourceCol field for all the group-by columns in the table.
|
|
self.docmodel.update(groupby_columns,
|
|
summarySourceCol=[c.id for c in source_groupby_columns],
|
|
visibleCol=[c.visibleCol for c in source_groupby_columns])
|
|
for col in groupby_columns:
|
|
self.useractions.maybe_copy_display_formula(col.summarySourceCol, col)
|
|
assert summary_table.summaryKey == key
|
|
|
|
return (summary_table, groupby_columns, formula_columns)
|
|
|
|
|
|
def update_summary_section(self, view_section, source_table, source_groupby_columns):
|
|
source_groupby_colset = set(source_groupby_columns)
|
|
groupby_colids = {c.colId for c in source_groupby_columns}
|
|
|
|
# Go through columns figuring out which ones we'll keep.
|
|
prev_group_cols, formula_colinfo = [], []
|
|
for col in view_section.tableRef.columns:
|
|
srcCol = col.summarySourceCol
|
|
# Records implement __hash__, so we can look them up in sets.
|
|
if srcCol in source_groupby_colset:
|
|
prev_group_cols.append(col)
|
|
elif col.isFormula and col.colId not in groupby_colids:
|
|
formula_colinfo.append(make_col_info(col))
|
|
else:
|
|
# if user is removing a numeric column from the group by columns we must add it back as a
|
|
# sum formula column
|
|
self._append_sister_column_if_any(formula_colinfo, source_table, srcCol)
|
|
|
|
# All fields with a column that we don't keep, must be deleted
|
|
colid_keep_set = set(c.colId for c in prev_group_cols + formula_colinfo)
|
|
delete_fields = [f for f in view_section.fields if f.colRef.colId not in colid_keep_set]
|
|
|
|
have_group_col = any(ci.colId == 'group' for ci in formula_colinfo)
|
|
if not have_group_col:
|
|
formula_colinfo.append(_group_colinfo(source_table))
|
|
|
|
# Get column records for all the columns we should have in our section.
|
|
summary_table, groupby_columns, formula_columns = self._get_or_create_summary(
|
|
source_table, source_groupby_columns, formula_colinfo)
|
|
|
|
if not have_group_col:
|
|
# We've added the "group" column; now restore the lists to match what we want in fields.
|
|
formula_colinfo.pop()
|
|
formula_columns.pop()
|
|
|
|
# Remember the original table, which we need later to adjust the sort spec (sortColRefs).
|
|
orig_table = view_section.tableRef
|
|
|
|
# This line is a bit hard to explain: we unset viewSection.tableRef before updating all the
|
|
# fields, and then set it to the correct value. Note how undo will reverse the operations, and
|
|
# produce the same sequence (unset, update fields, set). Client-side code relies on this to
|
|
# avoid having to deal with inconsistent view sections while fields are being updated.
|
|
self.docmodel.update([view_section], tableRef=0)
|
|
|
|
# Delete fields no longer relevant.
|
|
self.docmodel.remove(delete_fields)
|
|
|
|
# Update fields for all formula fields and reused group-by fields to point to new columns.
|
|
colid_to_field_map = {field.colRef.colId: field for field in view_section.fields}
|
|
prev_group_fields = [
|
|
colid_to_field_map[col.colId] for col in prev_group_cols
|
|
if col.colId in colid_to_field_map
|
|
]
|
|
source_col_map = dict(zip(source_groupby_columns, groupby_columns))
|
|
prev_group_columns = [source_col_map[f.colRef.summarySourceCol] for f in prev_group_fields]
|
|
visible_formula_columns = [c for c in formula_columns if c.colId in colid_to_field_map]
|
|
formula_fields = [colid_to_field_map[c.colId] for c in visible_formula_columns]
|
|
self.docmodel.update(formula_fields + prev_group_fields,
|
|
colRef=[c.id for c in visible_formula_columns + prev_group_columns])
|
|
|
|
# Finally, we need to create fields for newly-added group-by columns. If there were missing
|
|
# fields for any group-by columns before, they'll be created now.
|
|
new_group_columns = [c for c in groupby_columns if c not in prev_group_columns]
|
|
|
|
# Insert these after the last existing group-by field.
|
|
insert_pos = prev_group_fields[-1].parentPos if prev_group_fields else None
|
|
new_group_fields = self.docmodel.insert_after(view_section.fields, insert_pos,
|
|
colRef=[c.id for c in new_group_columns])
|
|
|
|
# Reorder the group-by fields if needed, to match the order requested.
|
|
group_col_to_field = {f.colRef: f for f in prev_group_fields + new_group_fields}
|
|
group_fields = [group_col_to_field[c] for c in groupby_columns]
|
|
group_positions = [field.parentPos for field in group_fields]
|
|
sorted_positions = sorted(group_positions)
|
|
if sorted_positions != group_positions:
|
|
self.docmodel.update(group_fields, parentPos=sorted_positions)
|
|
|
|
update_args = {}
|
|
if view_section.sortColRefs:
|
|
# Fix the sortSpec to refer to the new columns.
|
|
update_args['sortColRefs'] = _update_sort_spec(
|
|
view_section.sortColRefs, orig_table, summary_table)
|
|
|
|
# Finally update the section to point to the new table.
|
|
self.docmodel.update([view_section], tableRef=summary_table.id, **update_args)
|
|
|
|
|
|
def _find_sister_column(self, source_table, col_id):
|
|
"""Returns a summary formula column for source_table with the given col_id, or None."""
|
|
for t in source_table.summaryTables:
|
|
c = self.docmodel.columns.lookupOne(parentId=t.id, colId=col_id, isFormula=True)
|
|
if c:
|
|
return c
|
|
return None
|
|
|
|
def _append_sister_column_if_any(self, all_colinfo, source_table, col):
|
|
"""
|
|
Appends a col info for one sister column of col (in source_table) if it finds one, else, and if
|
|
col is of numeric type appends the col info for the sum col, else do nothing.
|
|
"""
|
|
c = self._find_sister_column(source_table, col.colId)
|
|
if c:
|
|
all_colinfo.append(make_col_info(col=c))
|
|
elif col.type in ('Int', 'Numeric'):
|
|
all_colinfo.append(_make_sum_col_info(col))
|
|
|
|
|
|
def _create_summary_colinfo(self, source_table, source_groupby_columns):
|
|
"""Come up automatically with a list of columns to include into a summary table."""
|
|
# Column 'group' defines the group of records that map to this summary line.
|
|
all_colinfo = [_group_colinfo(source_table)]
|
|
|
|
# For every column in the source data, if there is a same-named formula column in another
|
|
# summary table, use it here; otherwise if it's a numerical column, automatically add a
|
|
# same-named column with the sum of the values in the group.
|
|
groupby_col_ids = {c.colId for c in source_groupby_columns}
|
|
for col in source_table.columns:
|
|
if col.colId in groupby_col_ids or col.colId == 'group' or not is_visible_column(col.colId):
|
|
continue
|
|
self._append_sister_column_if_any(all_colinfo, source_table, col)
|
|
|
|
# Add a default 'count' column for the number of records in the group, unless a different
|
|
# 'count' was already added (which we would then prefer as presumably more useful). We add the
|
|
# default 'count' right after 'group', to make it the first of the visible formula columns.
|
|
if not any(c.colId == 'count' for c in all_colinfo):
|
|
all_colinfo.insert(1, make_col_info(colId='count', type='Int',
|
|
isFormula=True, formula='len($group)'))
|
|
return all_colinfo
|
|
|
|
|
|
def create_new_summary_section(self, source_table, source_groupby_columns, view, section_type):
|
|
formula_colinfo = list(self._create_summary_colinfo(source_table, source_groupby_columns))
|
|
summary_table, groupby_columns, formula_columns = self._get_or_create_summary(
|
|
source_table, source_groupby_columns, formula_colinfo)
|
|
|
|
section = self.docmodel.add(view.viewSections, tableRef=summary_table.id,
|
|
parentKey=section_type)[0]
|
|
self.docmodel.add(section.fields,
|
|
colRef=[c.id for c in groupby_columns + formula_columns
|
|
if c.colId != "group"])
|
|
return section
|
|
|
|
|
|
def detach_summary_section(self, view_section):
|
|
"""
|
|
Create a real table equivalent to the given summary section, and update the section to show
|
|
the new table instead of the summary.
|
|
"""
|
|
source_table_id = view_section.tableRef.summarySourceTable.tableId
|
|
|
|
# Get a list of columns that we need for the new table.
|
|
fields = view_section.fields
|
|
field_col_recs = [f.colRef for f in fields]
|
|
|
|
# Prepare the column info for each column.
|
|
col_info = [make_col_info(col=c) for c in field_col_recs if c.colId != 'group']
|
|
|
|
# Prepare the 'group' column, which is that one column that's different from the original.
|
|
group_args = ', '.join(
|
|
'%s=%s' % (
|
|
c.summarySourceCol.colId,
|
|
(
|
|
'CONTAINS($%s, match_empty="")' if c.summarySourceCol.type == 'ChoiceList' else
|
|
'CONTAINS($%s, match_empty=0)' if c.summarySourceCol.type.startswith('Reflist') else
|
|
'$%s'
|
|
) % c.colId,
|
|
)
|
|
for c in field_col_recs if c.summarySourceCol
|
|
)
|
|
col_info.append(make_col_info(colId='group', type='RefList:%s' % source_table_id,
|
|
isFormula=True,
|
|
formula='%s.lookupRecords(%s)' % (source_table_id, group_args)))
|
|
|
|
# Create the new table.
|
|
res = self.useractions.AddTable(None, [get_colinfo_dict(ci, with_id=True) for ci in col_info])
|
|
new_table = self.docmodel.tables.table.get_record(res["id"])
|
|
|
|
# Remember the original table, which we need later e.g. to adjust the sort spec (sortColRefs).
|
|
orig_table = view_section.tableRef
|
|
|
|
# Populate the new table.
|
|
old_data = self.useractions._engine.fetch_table(orig_table.tableId, formulas=False)
|
|
self.useractions.ReplaceTableData(new_table.tableId, old_data.row_ids, old_data.columns)
|
|
|
|
# Unset viewSection.tableRef before updating the fields, to avoid having inconsistencies. (See
|
|
# longer explanation in update_summary_section().)
|
|
self.docmodel.update([view_section], tableRef=0)
|
|
|
|
# Update all fields to point to new columns.
|
|
new_col_dict = {c.colId: c.id for c in new_table.columns}
|
|
self.docmodel.update(fields, colRef=[new_col_dict[c.colId] for c in field_col_recs])
|
|
|
|
# If the section is sorted, fix the sortSpec to refer to the new columns.
|
|
update_args = {}
|
|
if view_section.sortColRefs:
|
|
update_args['sortColRefs'] = _update_sort_spec(
|
|
view_section.sortColRefs, orig_table, new_table)
|
|
|
|
# Update the section to point to the new table.
|
|
self.docmodel.update([view_section], tableRef=new_table.id, **update_args)
|