mirror of
https://github.com/gristlabs/grist-core.git
synced 2026-03-02 04:09:24 +00:00
(core) move data engine code to core
Summary: this moves sandbox/grist to core, and adds a requirements.txt file for reconstructing the content of sandbox/thirdparty. Test Plan: existing tests pass. Tested core functionality manually. Tested docker build manually. Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D2563
This commit is contained in:
319
sandbox/grist/summary.py
Normal file
319
sandbox/grist/summary.py
Normal file
@@ -0,0 +1,319 @@
|
||||
from collections import namedtuple
|
||||
import json
|
||||
import re
|
||||
import logger
|
||||
log = logger.Logger(__name__, logger.INFO)
|
||||
|
||||
ColInfo = namedtuple('ColInfo', ('colId', 'type', 'isFormula', 'formula',
|
||||
'widgetOptions', 'label'))
|
||||
|
||||
|
||||
def _make_col_info(col=None, **values):
|
||||
"""Return a ColInfo() with the given fields, optionally copying values from the given column."""
|
||||
for key in ColInfo._fields:
|
||||
values.setdefault(key, getattr(col, key) if col else None)
|
||||
return ColInfo(**values)
|
||||
|
||||
|
||||
def _get_colinfo_dict(col_info, with_id=False):
|
||||
"""Return a dict suitable to use with AddColumn or AddTable (when with_id=True) actions."""
|
||||
col_values = {k: v for k, v in col_info._asdict().iteritems() if v is not None and k != 'colId'}
|
||||
if with_id:
|
||||
col_values['id'] = col_info.colId
|
||||
return col_values
|
||||
|
||||
|
||||
# To generate code, we need to know for each summary table, what its source table is. It would be
|
||||
# easy if we had access to metadata records, but (at least for now) we generate all code based on
|
||||
# schema only. So we encode the source table name inside of the summary table name.
|
||||
#
|
||||
# The encoding includes the length of the source table name, to avoid the possibility of ambiguity
|
||||
# between the second summary table for "Foo", and the first summary table for "Foo2".
|
||||
#
|
||||
# Note that it means we need to rename summary tables when the source table is renamed.
|
||||
|
||||
def encode_summary_table_name(source_table_name):
|
||||
"""
|
||||
Create a summary table name that reliably encodes the source table name. It can be decoded even
|
||||
if a suffix is added to the returned name.
|
||||
"""
|
||||
return "GristSummary_%d_%s" % (len(source_table_name), source_table_name)
|
||||
|
||||
|
||||
_summary_re = re.compile(r'GristSummary_(\d+)_')
|
||||
|
||||
def decode_summary_table_name(summary_table_name):
|
||||
"""
|
||||
Extract the name of the source table from the summary table name.
|
||||
"""
|
||||
m = _summary_re.match(summary_table_name)
|
||||
if m:
|
||||
start = m.end(0)
|
||||
length = int(m.group(1))
|
||||
source_name = summary_table_name[start : start + length]
|
||||
if len(source_name) == length:
|
||||
return source_name
|
||||
return None
|
||||
|
||||
def _group_colinfo(source_table):
|
||||
"""Returns ColInfo() for the 'group' column that must be present in every summary table."""
|
||||
return _make_col_info(colId='group', type='RefList:%s' % source_table.tableId,
|
||||
isFormula=True, formula='table.getSummarySourceGroup(rec)')
|
||||
|
||||
|
||||
def _update_sort_spec(sort_spec, old_table, new_table):
|
||||
"""
|
||||
Replace column references in the sort spec (which is a JSON string encoding a list of column
|
||||
refs, negated for descending) with references to the new table. Returns the new JSON string,
|
||||
or empty string in case of a problem.
|
||||
"""
|
||||
old_cols_map = {c.id: c.colId for c in old_table.columns}
|
||||
new_cols_map = {c.colId: c.id for c in new_table.columns}
|
||||
|
||||
# When adjusting, we take a possibly negated old colRef, and produce a new colRef.
|
||||
# If anything is gone, we return 0, which will be excluded from the new sort spec.
|
||||
def adjust(col_spec):
|
||||
sign = 1 if col_spec >= 0 else -1
|
||||
return sign * new_cols_map.get(old_cols_map.get(abs(col_spec)), 0)
|
||||
|
||||
try:
|
||||
old_sort_spec = json.loads(sort_spec)
|
||||
new_sort_spec = filter(None, [adjust(col_spec) for col_spec in old_sort_spec])
|
||||
return json.dumps(new_sort_spec, separators=(',', ':'))
|
||||
except Exception, e:
|
||||
log.warn("update_summary_section: can't parse sortColRefs JSON; clearing sortColRefs")
|
||||
return ''
|
||||
|
||||
|
||||
class SummaryActions(object):
|
||||
|
||||
def __init__(self, useractions, docmodel):
|
||||
self.useractions = useractions
|
||||
self.docmodel = docmodel
|
||||
|
||||
def _get_or_add_columns(self, table, all_colinfo):
|
||||
"""
|
||||
Given a table record and a list of ColInfo objects, generates a list of corresponding column
|
||||
records in the table, creating appropriate columns if they don't yet exist.
|
||||
"""
|
||||
prior = {c.colId: c for c in table.columns}
|
||||
for ci in all_colinfo:
|
||||
col = prior.get(ci.colId)
|
||||
if col and col.type == ci.type and col.formula == ci.formula:
|
||||
yield col
|
||||
else:
|
||||
result = self.useractions.doAddColumn(table.tableId, ci.colId,
|
||||
_get_colinfo_dict(ci, with_id=False))
|
||||
yield self.docmodel.columns.table.get_record(result['colRef'])
|
||||
|
||||
def _get_or_create_summary(self, source_table, source_groupby_columns, formula_colinfo):
|
||||
"""
|
||||
Finds a summary table or creates a new one, based on source_table, grouped by the columns
|
||||
in groupby_colinfo, and containing formulas in formula_colinfo. Source_table should be a
|
||||
Record from _grist_Tables, and other arguments should be lists of ColInfo objects.
|
||||
Returns the tuple (summary_table, groupby_columns, formula_columns).
|
||||
"""
|
||||
key = tuple(sorted(int(c) for c in source_groupby_columns))
|
||||
|
||||
groupby_colinfo = [_make_col_info(col=c, isFormula=False, formula='')
|
||||
for c in source_groupby_columns]
|
||||
summary_table = next((t for t in source_table.summaryTables if t.summaryKey == key), None)
|
||||
created = False
|
||||
if not summary_table:
|
||||
result = self.useractions.doAddTable(
|
||||
encode_summary_table_name(source_table.tableId),
|
||||
[_get_colinfo_dict(ci, with_id=True) for ci in groupby_colinfo + formula_colinfo],
|
||||
summarySourceTableRef=source_table.id)
|
||||
summary_table = self.docmodel.tables.table.get_record(result['id'])
|
||||
created = True
|
||||
# Note that in this case, _get_or_add_columns() below should not add any new columns,
|
||||
# but only return existing ones. (The table may contain extra columns, e.g. 'manualSort',
|
||||
# at least in theory.)
|
||||
|
||||
groupby_columns = list(self._get_or_add_columns(summary_table, groupby_colinfo))
|
||||
formula_columns = list(self._get_or_add_columns(summary_table, formula_colinfo))
|
||||
|
||||
if created:
|
||||
# Set the summarySourceCol field for all the group-by columns in the table.
|
||||
self.docmodel.update(groupby_columns,
|
||||
summarySourceCol=[c.id for c in source_groupby_columns])
|
||||
assert summary_table.summaryKey == key
|
||||
|
||||
return (summary_table, groupby_columns, formula_columns)
|
||||
|
||||
|
||||
def update_summary_section(self, view_section, source_table, source_groupby_columns):
|
||||
source_groupby_colset = set(source_groupby_columns)
|
||||
groupby_colids = {c.colId for c in source_groupby_columns}
|
||||
prev_fields = list(view_section.fields)
|
||||
|
||||
# Go through fields figuring out which ones we'll keep.
|
||||
prev_group_fields, formula_fields, delete_fields = [], [], []
|
||||
for field in prev_fields:
|
||||
# Records implement __hash__, so we can look them up in sets.
|
||||
if field.colRef.summarySourceCol in source_groupby_colset:
|
||||
prev_group_fields.append(field)
|
||||
elif field.colRef.isFormula and field.colRef.colId not in groupby_colids:
|
||||
formula_fields.append(field)
|
||||
else:
|
||||
delete_fields.append(field)
|
||||
|
||||
# Prepare ColInfo for all columns we want to keep.
|
||||
formula_colinfo = [_make_col_info(f.colRef) for f in formula_fields]
|
||||
|
||||
have_group_col = any(f.colRef.colId == 'group' for f in formula_fields)
|
||||
if not have_group_col:
|
||||
formula_colinfo.append(_group_colinfo(source_table))
|
||||
|
||||
# Get column records for all the columns we should have in our section.
|
||||
summary_table, groupby_columns, formula_columns = self._get_or_create_summary(
|
||||
source_table, source_groupby_columns, formula_colinfo)
|
||||
|
||||
if not have_group_col:
|
||||
# We've added the "group" column; now restore the lists to match what we want in fields.
|
||||
formula_colinfo.pop()
|
||||
formula_columns.pop()
|
||||
|
||||
# Remember the original table, which we need later to adjust the sort spec (sortColRefs).
|
||||
orig_table = view_section.tableRef
|
||||
|
||||
# This line is a bit hard to explain: we unset viewSection.tableRef before updating all the
|
||||
# fields, and then set it to the correct value. Note how undo will reverse the operations, and
|
||||
# produce the same sequence (unset, update fields, set). Client-side code relies on this to
|
||||
# avoid having to deal with inconsistent view sections while fields are being updated.
|
||||
self.docmodel.update([view_section], tableRef=0)
|
||||
|
||||
# Delete fields no longer relevant.
|
||||
self.docmodel.remove(delete_fields)
|
||||
|
||||
# Update fields for all formula fields and reused group-by fields to point to new columns.
|
||||
source_col_map = dict(zip(source_groupby_columns, groupby_columns))
|
||||
prev_group_columns = [source_col_map[f.colRef.summarySourceCol] for f in prev_group_fields]
|
||||
self.docmodel.update(formula_fields + prev_group_fields,
|
||||
colRef=[c.id for c in formula_columns + prev_group_columns])
|
||||
|
||||
# Finally, we need to create fields for newly-added group-by columns. If there were missing
|
||||
# fields for any group-by columns before, they'll be created now.
|
||||
new_group_columns = [c for c in groupby_columns if c not in prev_group_columns]
|
||||
|
||||
# Insert these after the last existing group-by field.
|
||||
insert_pos = prev_group_fields[-1].parentPos if prev_group_fields else None
|
||||
new_group_fields = self.docmodel.insert_after(view_section.fields, insert_pos,
|
||||
colRef=[c.id for c in new_group_columns])
|
||||
|
||||
# Reorder the group-by fields if needed, to match the order requested.
|
||||
group_col_to_field = {f.colRef: f for f in prev_group_fields + new_group_fields}
|
||||
group_fields = [group_col_to_field[c] for c in groupby_columns]
|
||||
group_positions = [field.parentPos for field in group_fields]
|
||||
sorted_positions = sorted(group_positions)
|
||||
if sorted_positions != group_positions:
|
||||
self.docmodel.update(group_fields, parentPos=sorted_positions)
|
||||
|
||||
update_args = {}
|
||||
if view_section.sortColRefs:
|
||||
# Fix the sortSpec to refer to the new columns.
|
||||
update_args['sortColRefs'] = _update_sort_spec(
|
||||
view_section.sortColRefs, orig_table, summary_table)
|
||||
|
||||
# Finally update the section to point to the new table.
|
||||
self.docmodel.update([view_section], tableRef=summary_table.id, **update_args)
|
||||
|
||||
|
||||
def _find_sister_column(self, source_table, col_id):
|
||||
"""Returns a summary formula column for source_table with the given col_id, or None."""
|
||||
for t in source_table.summaryTables:
|
||||
c = self.docmodel.columns.lookupOne(parentId=t.id, colId=col_id, isFormula=True)
|
||||
if c:
|
||||
return c
|
||||
return None
|
||||
|
||||
|
||||
def _create_summary_colinfo(self, source_table, source_groupby_columns):
|
||||
"""Come up automatically with a list of columns to include into a summary table."""
|
||||
# Column 'group' defines the group of records that map to this summary line.
|
||||
all_colinfo = [_group_colinfo(source_table)]
|
||||
|
||||
# For every column in the source data, if there is a same-named formula column in another
|
||||
# summary table, use it here; otherwise if it's a numerical column, automatically add a
|
||||
# same-named column with the sum of the values in the group.
|
||||
groupby_col_ids = {c.colId for c in source_groupby_columns}
|
||||
for col in source_table.columns:
|
||||
if col.colId in groupby_col_ids or col.colId == 'group':
|
||||
continue
|
||||
c = self._find_sister_column(source_table, col.colId)
|
||||
if c:
|
||||
all_colinfo.append(_make_col_info(col=c))
|
||||
elif col.type in ('Int', 'Numeric'):
|
||||
all_colinfo.append(_make_col_info(col=col, isFormula=True,
|
||||
formula='SUM($group.%s)' % col.colId))
|
||||
|
||||
# Add a default 'count' column for the number of records in the group, unless a different
|
||||
# 'count' was already added (which we would then prefer as presumably more useful). We add the
|
||||
# default 'count' right after 'group', to make it the first of the visible formula columns.
|
||||
if not any(c.colId == 'count' for c in all_colinfo):
|
||||
all_colinfo.insert(1, _make_col_info(colId='count', type='Int',
|
||||
isFormula=True, formula='len($group)'))
|
||||
return all_colinfo
|
||||
|
||||
|
||||
def create_new_summary_section(self, source_table, source_groupby_columns, view, section_type):
|
||||
formula_colinfo = list(self._create_summary_colinfo(source_table, source_groupby_columns))
|
||||
summary_table, groupby_columns, formula_columns = self._get_or_create_summary(
|
||||
source_table, source_groupby_columns, formula_colinfo)
|
||||
|
||||
section = self.docmodel.add(view.viewSections, tableRef=summary_table.id,
|
||||
parentKey=section_type)[0]
|
||||
self.docmodel.add(section.fields,
|
||||
colRef=[c.id for c in groupby_columns + formula_columns
|
||||
if c.colId != "group"])
|
||||
return section
|
||||
|
||||
|
||||
def detach_summary_section(self, view_section):
|
||||
"""
|
||||
Create a real table equivalent to the given summary section, and update the section to show
|
||||
the new table instead of the summary.
|
||||
"""
|
||||
source_table_id = view_section.tableRef.summarySourceTable.tableId
|
||||
|
||||
# Get a list of columns that we need for the new table.
|
||||
fields = view_section.fields
|
||||
field_col_recs = [f.colRef for f in fields]
|
||||
|
||||
# Prepare the column info for each column.
|
||||
col_info = [_make_col_info(col=c) for c in field_col_recs if c.colId != 'group']
|
||||
|
||||
# Prepare the 'group' column, which is that one column that's different from the original.
|
||||
group_args = ', '.join('%s=$%s' % (c.summarySourceCol.colId, c.colId)
|
||||
for c in field_col_recs if c.summarySourceCol)
|
||||
col_info.append(_make_col_info(colId='group', type='RefList:%s' % source_table_id,
|
||||
isFormula=True,
|
||||
formula='%s.lookupRecords(%s)' % (source_table_id, group_args)))
|
||||
|
||||
# Create the new table.
|
||||
res = self.useractions.AddTable(None, [_get_colinfo_dict(ci, with_id=True) for ci in col_info])
|
||||
new_table = self.docmodel.tables.table.get_record(res["id"])
|
||||
|
||||
# Remember the original table, which we need later e.g. to adjust the sort spec (sortColRefs).
|
||||
orig_table = view_section.tableRef
|
||||
|
||||
# Populate the new table.
|
||||
old_data = self.useractions._engine.fetch_table(orig_table.tableId, formulas=False)
|
||||
self.useractions.ReplaceTableData(new_table.tableId, old_data.row_ids, old_data.columns)
|
||||
|
||||
# Unset viewSection.tableRef before updating the fields, to avoid having inconsistencies. (See
|
||||
# longer explanation in update_summary_section().)
|
||||
self.docmodel.update([view_section], tableRef=0)
|
||||
|
||||
# Update all fields to point to new columns.
|
||||
new_col_dict = {c.colId: c.id for c in new_table.columns}
|
||||
self.docmodel.update(fields, colRef=[new_col_dict[c.colId] for c in field_col_recs])
|
||||
|
||||
# If the section is sorted, fix the sortSpec to refer to the new columns.
|
||||
update_args = {}
|
||||
if view_section.sortColRefs:
|
||||
update_args['sortColRefs'] = _update_sort_spec(
|
||||
view_section.sortColRefs, orig_table, new_table)
|
||||
|
||||
# Update the section to point to the new table.
|
||||
self.docmodel.update([view_section], tableRef=new_table.id, **update_args)
|
||||
Reference in New Issue
Block a user