from collections import namedtuple import json import re import six from column import is_visible_column import sort_specs import logger log = logger.Logger(__name__, logger.INFO) ColInfo = namedtuple('ColInfo', ('colId', 'type', 'isFormula', 'formula', 'widgetOptions', 'label')) def _make_col_info(col=None, **values): """Return a ColInfo() with the given fields, optionally copying values from the given column.""" for key in ColInfo._fields: values.setdefault(key, getattr(col, key) if col else None) return ColInfo(**values) def _make_sum_col_info(col): """Return a ColInfo() for the sum formula column for column col.""" return _make_col_info(col=col, isFormula=True, formula='SUM($group.%s)' % col.colId) def _get_colinfo_dict(col_info, with_id=False): """Return a dict suitable to use with AddColumn or AddTable (when with_id=True) actions.""" col_values = {k: v for k, v in six.iteritems(col_info._asdict()) if v is not None and k != 'colId'} if with_id: col_values['id'] = col_info.colId return col_values def _copy_widget_options(options): """Copies widgetOptions for a summary group-by column (omitting conditional formatting rules)""" if not options: return options try: options = json.loads(options) except ValueError: # widgetOptions are not always a valid json value (especially in tests) return options return json.dumps({k: v for k, v in options.items() if k != "rulesOptions"}) # To generate code, we need to know for each summary table, what its source table is. It would be # easy if we had access to metadata records, but (at least for now) we generate all code based on # schema only. So we encode the source table name inside of the summary table name. # # The encoding includes the length of the source table name, to avoid the possibility of ambiguity # between the second summary table for "Foo", and the first summary table for "Foo2". # # Note that it means we need to rename summary tables when the source table is renamed. def encode_summary_table_name(source_table_name): """ Create a summary table name that reliably encodes the source table name. It can be decoded even if a suffix is added to the returned name. """ return "GristSummary_%d_%s" % (len(source_table_name), source_table_name) _summary_re = re.compile(r'GristSummary_(\d+)_') def decode_summary_table_name(summary_table_name): """ Extract the name of the source table from the summary table name. """ m = _summary_re.match(summary_table_name) if m: start = m.end(0) length = int(m.group(1)) source_name = summary_table_name[start : start + length] if len(source_name) == length: return source_name return None def _group_colinfo(source_table): """Returns ColInfo() for the 'group' column that must be present in every summary table.""" return _make_col_info(colId='group', type='RefList:%s' % source_table.tableId, isFormula=True, formula='table.getSummarySourceGroup(rec)') def _update_sort_spec(sort_spec, old_table, new_table): """ Replace column references in the sort spec (which is a JSON string encoding a list of column refs, negated for descending) with references to the new table. Returns the new JSON string, or empty string in case of a problem. """ old_cols_map = {c.id: c.colId for c in old_table.columns} new_cols_map = {c.colId: c.id for c in new_table.columns} # When adjusting, we take a possibly negated old colRef, and produce a new colRef. # If anything is gone, we return 0, which will be excluded from the new sort spec. def adjust(col_spec): old_colref = sort_specs.col_ref(col_spec) new_colref = new_cols_map.get(old_cols_map.get(old_colref), 0) return sort_specs.swap_col_ref(col_spec, new_colref) try: old_sort_spec = json.loads(sort_spec) new_sort_spec = [adjust(col_spec) for col_spec in old_sort_spec] new_sort_spec = [col_spec for col_spec in new_sort_spec if sort_specs.col_ref(col_spec)] return json.dumps(new_sort_spec, separators=(',', ':')) except Exception: log.warn("update_summary_section: can't parse sortColRefs JSON; clearing sortColRefs") return '' def summary_groupby_col_type(source_type): """ Returns the type of a groupby column in a summary table given the type of the corresponding column in the source table. Most types are returned unchanged. When a source table is grouped by a list-type (RefList/ChoiceList) column the column is 'flattened' into the corresponding non-list type in the summary table. """ if source_type == 'ChoiceList': return 'Choice' else: return source_type.replace('RefList:', 'Ref:') class SummaryActions(object): def __init__(self, useractions, docmodel): self.useractions = useractions self.docmodel = docmodel def _get_or_add_columns(self, table, all_colinfo): """ Given a table record and a list of ColInfo objects, generates a list of corresponding column records in the table, creating appropriate columns if they don't yet exist. """ prior = {c.colId: c for c in table.columns} for ci in all_colinfo: col = prior.get(ci.colId) if col and col.type == ci.type and col.formula == ci.formula: yield col else: result = self.useractions.doAddColumn(table.tableId, ci.colId, _get_colinfo_dict(ci, with_id=False)) yield self.docmodel.columns.table.get_record(result['colRef']) def _get_or_create_summary(self, source_table, source_groupby_columns, formula_colinfo): """ Finds a summary table or creates a new one, based on source_table, grouped by the columns in groupby_colinfo, and containing formulas in formula_colinfo. Source_table should be a Record from _grist_Tables, and other arguments should be lists of ColInfo objects. Returns the tuple (summary_table, groupby_columns, formula_columns). """ key = tuple(sorted(int(c) for c in source_groupby_columns)) groupby_colinfo = [ _make_col_info( col=c, isFormula=False, formula='', widgetOptions=_copy_widget_options(c.widgetOptions), type=summary_groupby_col_type(c.type) ) for c in source_groupby_columns ] summary_table = next((t for t in source_table.summaryTables if t.summaryKey == key), None) created = False if not summary_table: result = self.useractions.doAddTable( encode_summary_table_name(source_table.tableId), [_get_colinfo_dict(ci, with_id=True) for ci in groupby_colinfo + formula_colinfo], summarySourceTableRef=source_table.id) summary_table = self.docmodel.tables.table.get_record(result['id']) created = True # Note that in this case, _get_or_add_columns() below should not add any new columns, # but only return existing ones. (The table may contain extra columns, e.g. 'manualSort', # at least in theory.) groupby_columns = list(self._get_or_add_columns(summary_table, groupby_colinfo)) formula_columns = list(self._get_or_add_columns(summary_table, formula_colinfo)) if created: # Set the summarySourceCol field for all the group-by columns in the table. self.docmodel.update(groupby_columns, summarySourceCol=[c.id for c in source_groupby_columns], visibleCol=[c.visibleCol for c in source_groupby_columns]) for col in groupby_columns: self.useractions.maybe_copy_display_formula(col.summarySourceCol, col) assert summary_table.summaryKey == key return (summary_table, groupby_columns, formula_columns) def update_summary_section(self, view_section, source_table, source_groupby_columns): source_groupby_colset = set(source_groupby_columns) groupby_colids = {c.colId for c in source_groupby_columns} prev_fields = list(view_section.fields) # Go through fields figuring out which ones we'll keep. prev_group_fields, formula_fields, delete_fields, missing_colinfo = [], [], [], [] for field in prev_fields: srcCol = field.colRef.summarySourceCol # Records implement __hash__, so we can look them up in sets. if srcCol in source_groupby_colset: prev_group_fields.append(field) elif field.colRef.isFormula and field.colRef.colId not in groupby_colids: formula_fields.append(field) else: # if user is removing a numeric column from the group by columns we must add it back as a # sum formula column self._append_sister_column_if_any(missing_colinfo, source_table, srcCol) delete_fields.append(field) # Prepare ColInfo for all columns we want to keep. formula_colinfo = [_make_col_info(f.colRef) for f in formula_fields] have_group_col = any(f.colRef.colId == 'group' for f in formula_fields) if not have_group_col: formula_colinfo.append(_group_colinfo(source_table)) # Get column records for all the columns we should have in our section. summary_table, groupby_columns, formula_columns = self._get_or_create_summary( source_table, source_groupby_columns, formula_colinfo) if not have_group_col: # We've added the "group" column; now restore the lists to match what we want in fields. formula_colinfo.pop() formula_columns.pop() # Remember the original table, which we need later to adjust the sort spec (sortColRefs). orig_table = view_section.tableRef # This line is a bit hard to explain: we unset viewSection.tableRef before updating all the # fields, and then set it to the correct value. Note how undo will reverse the operations, and # produce the same sequence (unset, update fields, set). Client-side code relies on this to # avoid having to deal with inconsistent view sections while fields are being updated. self.docmodel.update([view_section], tableRef=0) # Delete fields no longer relevant. self.docmodel.remove(delete_fields) # Add missing sum column for ci in missing_colinfo: col = self.useractions.AddColumn(summary_table.tableId, ci.colId, _get_colinfo_dict(ci, with_id=False)) # AddColumn user action did not add the fields as the view section was not yet updated with # new table, hence adds it manually self.docmodel.add(view_section.fields, colRef=[col['colRef']]) # Update fields for all formula fields and reused group-by fields to point to new columns. source_col_map = dict(zip(source_groupby_columns, groupby_columns)) prev_group_columns = [source_col_map[f.colRef.summarySourceCol] for f in prev_group_fields] self.docmodel.update(formula_fields + prev_group_fields, colRef=[c.id for c in formula_columns + prev_group_columns]) # Finally, we need to create fields for newly-added group-by columns. If there were missing # fields for any group-by columns before, they'll be created now. new_group_columns = [c for c in groupby_columns if c not in prev_group_columns] # Insert these after the last existing group-by field. insert_pos = prev_group_fields[-1].parentPos if prev_group_fields else None new_group_fields = self.docmodel.insert_after(view_section.fields, insert_pos, colRef=[c.id for c in new_group_columns]) # Reorder the group-by fields if needed, to match the order requested. group_col_to_field = {f.colRef: f for f in prev_group_fields + new_group_fields} group_fields = [group_col_to_field[c] for c in groupby_columns] group_positions = [field.parentPos for field in group_fields] sorted_positions = sorted(group_positions) if sorted_positions != group_positions: self.docmodel.update(group_fields, parentPos=sorted_positions) update_args = {} if view_section.sortColRefs: # Fix the sortSpec to refer to the new columns. update_args['sortColRefs'] = _update_sort_spec( view_section.sortColRefs, orig_table, summary_table) # Finally update the section to point to the new table. self.docmodel.update([view_section], tableRef=summary_table.id, **update_args) def _find_sister_column(self, source_table, col_id): """Returns a summary formula column for source_table with the given col_id, or None.""" for t in source_table.summaryTables: c = self.docmodel.columns.lookupOne(parentId=t.id, colId=col_id, isFormula=True) if c: return c return None def _append_sister_column_if_any(self, all_colinfo, source_table, col): """ Appends a col info for one sister column of col (in source_table) if it finds one, else, and if col is of numeric type appends the col info for the sum col, else do nothing. """ c = self._find_sister_column(source_table, col.colId) if c: all_colinfo.append(_make_col_info(col=c)) elif col.type in ('Int', 'Numeric'): all_colinfo.append(_make_sum_col_info(col)) def _create_summary_colinfo(self, source_table, source_groupby_columns): """Come up automatically with a list of columns to include into a summary table.""" # Column 'group' defines the group of records that map to this summary line. all_colinfo = [_group_colinfo(source_table)] # For every column in the source data, if there is a same-named formula column in another # summary table, use it here; otherwise if it's a numerical column, automatically add a # same-named column with the sum of the values in the group. groupby_col_ids = {c.colId for c in source_groupby_columns} for col in source_table.columns: if col.colId in groupby_col_ids or col.colId == 'group' or not is_visible_column(col.colId): continue self._append_sister_column_if_any(all_colinfo, source_table, col) # Add a default 'count' column for the number of records in the group, unless a different # 'count' was already added (which we would then prefer as presumably more useful). We add the # default 'count' right after 'group', to make it the first of the visible formula columns. if not any(c.colId == 'count' for c in all_colinfo): all_colinfo.insert(1, _make_col_info(colId='count', type='Int', isFormula=True, formula='len($group)')) return all_colinfo def create_new_summary_section(self, source_table, source_groupby_columns, view, section_type): formula_colinfo = list(self._create_summary_colinfo(source_table, source_groupby_columns)) summary_table, groupby_columns, formula_columns = self._get_or_create_summary( source_table, source_groupby_columns, formula_colinfo) section = self.docmodel.add(view.viewSections, tableRef=summary_table.id, parentKey=section_type)[0] self.docmodel.add(section.fields, colRef=[c.id for c in groupby_columns + formula_columns if c.colId != "group"]) return section def detach_summary_section(self, view_section): """ Create a real table equivalent to the given summary section, and update the section to show the new table instead of the summary. """ source_table_id = view_section.tableRef.summarySourceTable.tableId # Get a list of columns that we need for the new table. fields = view_section.fields field_col_recs = [f.colRef for f in fields] # Prepare the column info for each column. col_info = [_make_col_info(col=c) for c in field_col_recs if c.colId != 'group'] # Prepare the 'group' column, which is that one column that's different from the original. group_args = ', '.join( '%s=%s' % ( c.summarySourceCol.colId, 'CONTAINS($%s)' % c.colId if c.summarySourceCol.type.startswith(('ChoiceList', 'RefList:')) else '$%s' % c.colId, ) for c in field_col_recs if c.summarySourceCol ) col_info.append(_make_col_info(colId='group', type='RefList:%s' % source_table_id, isFormula=True, formula='%s.lookupRecords(%s)' % (source_table_id, group_args))) # Create the new table. res = self.useractions.AddTable(None, [_get_colinfo_dict(ci, with_id=True) for ci in col_info]) new_table = self.docmodel.tables.table.get_record(res["id"]) # Remember the original table, which we need later e.g. to adjust the sort spec (sortColRefs). orig_table = view_section.tableRef # Populate the new table. old_data = self.useractions._engine.fetch_table(orig_table.tableId, formulas=False) self.useractions.ReplaceTableData(new_table.tableId, old_data.row_ids, old_data.columns) # Unset viewSection.tableRef before updating the fields, to avoid having inconsistencies. (See # longer explanation in update_summary_section().) self.docmodel.update([view_section], tableRef=0) # Update all fields to point to new columns. new_col_dict = {c.colId: c.id for c in new_table.columns} self.docmodel.update(fields, colRef=[new_col_dict[c.colId] for c in field_col_recs]) # If the section is sorted, fix the sortSpec to refer to the new columns. update_args = {} if view_section.sortColRefs: update_args['sortColRefs'] = _update_sort_spec( view_section.sortColRefs, orig_table, new_table) # Update the section to point to the new table. self.docmodel.update([view_section], tableRef=new_table.id, **update_args)