mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) CONTAINS() and summarising by ChoiceList columns with flattening
Summary: Added CONTAINS 'function' which can be used in lookups Changed LookupMapColumn._row_key_map to use right=set so one row can have many keys when CONTAINS is used. Use CONTAINS to implement group column in summary table, while helper column in source table can reference and create multiple rows in summary table, especially when summarising by ChoiceList columns. Use itertools.product to generate all combinations of lookup keys and groupby values. cleanup Test Plan: Added python unit tests. Reviewers: dsagal Reviewed By: dsagal Subscribers: paulfitz, dsagal Differential Revision: https://phab.getgrist.com/D2900
This commit is contained in:
parent
693f2f6325
commit
f7a9638992
@ -35,7 +35,7 @@ class AutocompleteContext(object):
|
|||||||
# TODO It would be nice to include builtin functions too, but getargspec doesn't work there.
|
# TODO It would be nice to include builtin functions too, but getargspec doesn't work there.
|
||||||
self._functions = {}
|
self._functions = {}
|
||||||
for key, value in six.iteritems(self._context):
|
for key, value in six.iteritems(self._context):
|
||||||
if value and callable(value):
|
if value and callable(value) and not isinstance(value, type):
|
||||||
argspec = inspect.formatargspec(*inspect.getargspec(value))
|
argspec = inspect.formatargspec(*inspect.getargspec(value))
|
||||||
self._functions[key] = Completion(key, argspec, is_grist_func(value))
|
self._functions[key] = Completion(key, argspec, is_grist_func(value))
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# pylint: disable=redefined-builtin, line-too-long
|
# pylint: disable=redefined-builtin, line-too-long
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict, namedtuple
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import six
|
import six
|
||||||
@ -150,3 +150,25 @@ def VLOOKUP(table, **field_value_pairs):
|
|||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
return table.lookupOne(**field_value_pairs)
|
return table.lookupOne(**field_value_pairs)
|
||||||
|
|
||||||
|
class CONTAINS(namedtuple("CONTAINS", "value")):
|
||||||
|
"""
|
||||||
|
Use this marker with `Table.lookupRecords` to find records
|
||||||
|
where a column contains the given value, e.g:
|
||||||
|
|
||||||
|
MoviesTable.lookupRecords(genre=CONTAINS("Drama"))
|
||||||
|
|
||||||
|
will return records in `MoviesTable` where the column `genre`
|
||||||
|
is a list or other container such as `["Comedy", "Drama"]`,
|
||||||
|
i.e. `"Drama" in $genre`.
|
||||||
|
|
||||||
|
Note that the column being looked up (e.g. `genre`)
|
||||||
|
must have values of a container type such as list, tuple, or set.
|
||||||
|
In particular the values mustn't be strings, e.g. `"Comedy-Drama"` won't match
|
||||||
|
even though `"Drama" in "Comedy-Drama"` is `True` in Python.
|
||||||
|
It also won't match substrings within container elements, e.g. `["Comedy-Drama"]`.
|
||||||
|
"""
|
||||||
|
# While users should apply this marker to values in queries, internally
|
||||||
|
# the marker is moved to the column ID so that the LookupMapColumn knows how to
|
||||||
|
# update its index correctly for that column.
|
||||||
|
pass
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
import itertools
|
||||||
|
from abc import abstractmethod
|
||||||
|
|
||||||
import six
|
import six
|
||||||
|
|
||||||
import column
|
import column
|
||||||
@ -6,6 +9,7 @@ import records
|
|||||||
import relation
|
import relation
|
||||||
import twowaymap
|
import twowaymap
|
||||||
import usertypes
|
import usertypes
|
||||||
|
from functions.lookup import CONTAINS
|
||||||
|
|
||||||
import logger
|
import logger
|
||||||
log = logger.Logger(__name__, logger.INFO)
|
log = logger.Logger(__name__, logger.INFO)
|
||||||
@ -21,7 +25,7 @@ def _extract(cell_value):
|
|||||||
return cell_value
|
return cell_value
|
||||||
|
|
||||||
|
|
||||||
class LookupMapColumn(column.BaseColumn):
|
class BaseLookupMapColumn(column.BaseColumn):
|
||||||
"""
|
"""
|
||||||
Conceptually a LookupMapColumn is associated with a table ("target table") and maintains for
|
Conceptually a LookupMapColumn is associated with a table ("target table") and maintains for
|
||||||
each row a key (which is a tuple of values from the named columns), which is fast to look up.
|
each row a key (which is a tuple of values from the named columns), which is fast to look up.
|
||||||
@ -39,15 +43,17 @@ class LookupMapColumn(column.BaseColumn):
|
|||||||
def __init__(self, table, col_id, col_ids_tuple):
|
def __init__(self, table, col_id, col_ids_tuple):
|
||||||
# Note that self._recalc_rec_method is passed in as the formula's "method".
|
# Note that self._recalc_rec_method is passed in as the formula's "method".
|
||||||
col_info = column.ColInfo(usertypes.Any(), is_formula=True, method=self._recalc_rec_method)
|
col_info = column.ColInfo(usertypes.Any(), is_formula=True, method=self._recalc_rec_method)
|
||||||
super(LookupMapColumn, self).__init__(table, col_id, col_info)
|
super(BaseLookupMapColumn, self).__init__(table, col_id, col_info)
|
||||||
|
|
||||||
self._col_ids_tuple = col_ids_tuple
|
self._col_ids_tuple = col_ids_tuple
|
||||||
self._engine = table._engine
|
self._engine = table._engine
|
||||||
|
|
||||||
# Two-way map between rowIds of the target table (on the left) and key tuples (on the right).
|
# Two-way map between rowIds of the target table (on the left) and key tuples (on the right).
|
||||||
# Multiple rows can map to the same key. The map is populated by engine's _recompute when this
|
# Multiple rows can naturally map to the same key.
|
||||||
|
# Multiple keys can map to the same row if CONTAINS() is used
|
||||||
|
# The map is populated by engine's _recompute when this
|
||||||
# node is brought up-to-date.
|
# node is brought up-to-date.
|
||||||
self._row_key_map = twowaymap.TwoWayMap(left=set, right="single")
|
self._row_key_map = self._make_row_key_map()
|
||||||
self._engine.invalidate_column(self)
|
self._engine.invalidate_column(self)
|
||||||
|
|
||||||
# Map of referring Node to _LookupRelation. Different tables may do lookups using this
|
# Map of referring Node to _LookupRelation. Different tables may do lookups using this
|
||||||
@ -55,6 +61,11 @@ class LookupMapColumn(column.BaseColumn):
|
|||||||
# between referring rows and the lookup keys. This map stores these relations.
|
# between referring rows and the lookup keys. This map stores these relations.
|
||||||
self._lookup_relations = {}
|
self._lookup_relations = {}
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _make_row_key_map(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def _recalc_rec_method(self, rec, table):
|
def _recalc_rec_method(self, rec, table):
|
||||||
"""
|
"""
|
||||||
LookupMapColumn acts as a formula column, and this method is the "formula" called whenever
|
LookupMapColumn acts as a formula column, and this method is the "formula" called whenever
|
||||||
@ -62,27 +73,21 @@ class LookupMapColumn(column.BaseColumn):
|
|||||||
cause the LookupMapColumn to be invalidated for the corresponding rows, and brought up to date
|
cause the LookupMapColumn to be invalidated for the corresponding rows, and brought up to date
|
||||||
during formula recomputation by calling this method. It shold take O(1) time per affected row.
|
during formula recomputation by calling this method. It shold take O(1) time per affected row.
|
||||||
"""
|
"""
|
||||||
old_key = self._row_key_map.lookup_left(rec._row_id)
|
raise NotImplementedError
|
||||||
|
|
||||||
# Note that getattr(rec, col_id) is what creates the correct dependency, as well as ensures
|
@abstractmethod
|
||||||
# that the columns used to index by are brought up-to-date (in case they are formula columns).
|
def _get_keys(self, target_row_id):
|
||||||
new_key = tuple(_extract(rec._get_col(_col_id)) for _col_id in self._col_ids_tuple)
|
"""
|
||||||
|
Get the keys associated with the given target row id.
|
||||||
try:
|
"""
|
||||||
self._row_key_map.insert(rec._row_id, new_key)
|
raise NotImplementedError
|
||||||
except TypeError:
|
|
||||||
# If key is not hashable, ignore it, just remove the old_key then.
|
|
||||||
self._row_key_map.remove(rec._row_id, old_key)
|
|
||||||
new_key = None
|
|
||||||
|
|
||||||
# It's OK if None is one of the values, since None will just never be found as a key.
|
|
||||||
self._invalidate_affected({old_key, new_key})
|
|
||||||
|
|
||||||
def unset(self, row_id):
|
def unset(self, row_id):
|
||||||
# This is called on record removal, and is necessary to deal with removed records.
|
# This is called on record removal, and is necessary to deal with removed records.
|
||||||
old_key = self._row_key_map.lookup_left(row_id)
|
old_keys = self._get_keys(row_id)
|
||||||
|
for old_key in old_keys:
|
||||||
self._row_key_map.remove(row_id, old_key)
|
self._row_key_map.remove(row_id, old_key)
|
||||||
self._invalidate_affected({old_key})
|
self._invalidate_affected(old_keys)
|
||||||
|
|
||||||
def _invalidate_affected(self, affected_keys):
|
def _invalidate_affected(self, affected_keys):
|
||||||
# For each known relation, figure out which referring rows are affected, and invalidate them.
|
# For each known relation, figure out which referring rows are affected, and invalidate them.
|
||||||
@ -129,12 +134,6 @@ class LookupMapColumn(column.BaseColumn):
|
|||||||
|
|
||||||
return row_ids, rel
|
return row_ids, rel
|
||||||
|
|
||||||
def _get_key(self, target_row_id):
|
|
||||||
"""
|
|
||||||
Helper used by _LookupRelation to get the key associated with the given target row id.
|
|
||||||
"""
|
|
||||||
return self._row_key_map.lookup_left(target_row_id)
|
|
||||||
|
|
||||||
# Override various column methods, since LookupMapColumn doesn't care to store any values. To
|
# Override various column methods, since LookupMapColumn doesn't care to store any values. To
|
||||||
# outside code, it looks like a column of None's.
|
# outside code, it looks like a column of None's.
|
||||||
def raw_get(self, value):
|
def raw_get(self, value):
|
||||||
@ -146,6 +145,83 @@ class LookupMapColumn(column.BaseColumn):
|
|||||||
def set(self, row_id, value):
|
def set(self, row_id, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# For performance, prefer SimpleLookupMapColumn when no CONTAINS is used
|
||||||
|
# in lookups, although the two implementations should be equivalent
|
||||||
|
# See also table._add_update_summary_col
|
||||||
|
|
||||||
|
class SimpleLookupMapColumn(BaseLookupMapColumn):
|
||||||
|
def _make_row_key_map(self):
|
||||||
|
return twowaymap.TwoWayMap(left=set, right="single")
|
||||||
|
|
||||||
|
def _recalc_rec_method(self, rec, table):
|
||||||
|
old_key = self._row_key_map.lookup_left(rec._row_id)
|
||||||
|
|
||||||
|
# Note that rec._get_col(_col_id) is what creates the correct dependency, as well as ensures
|
||||||
|
# that the columns used to index by are brought up-to-date (in case they are formula columns).
|
||||||
|
new_key = tuple(_extract(rec._get_col(_col_id)) for _col_id in self._col_ids_tuple)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._row_key_map.insert(rec._row_id, new_key)
|
||||||
|
except TypeError:
|
||||||
|
# If key is not hashable, ignore it, just remove the old_key then.
|
||||||
|
self._row_key_map.remove(rec._row_id, old_key)
|
||||||
|
new_key = None
|
||||||
|
|
||||||
|
# It's OK if None is one of the values, since None will just never be found as a key.
|
||||||
|
self._invalidate_affected({old_key, new_key})
|
||||||
|
|
||||||
|
def _get_keys(self, target_row_id):
|
||||||
|
return {self._row_key_map.lookup_left(target_row_id)}
|
||||||
|
|
||||||
|
|
||||||
|
class ContainsLookupMapColumn(BaseLookupMapColumn):
|
||||||
|
def _make_row_key_map(self):
|
||||||
|
return twowaymap.TwoWayMap(left=set, right=set)
|
||||||
|
|
||||||
|
def _recalc_rec_method(self, rec, table):
|
||||||
|
# Create a key in the index for every combination of values in columns
|
||||||
|
# looked up with CONTAINS()
|
||||||
|
new_keys_groups = []
|
||||||
|
for col_id in self._col_ids_tuple:
|
||||||
|
# Note that _get_col is what creates the correct dependency, as well as ensures
|
||||||
|
# that the columns used to index by are brought up-to-date (in case they are formula columns).
|
||||||
|
group = rec._get_col(extract_column_id(col_id))
|
||||||
|
|
||||||
|
if isinstance(col_id, CONTAINS):
|
||||||
|
# Check that the cell targeted by CONTAINS() has an appropriate type.
|
||||||
|
# Don't iterate over characters of a string.
|
||||||
|
# group = [] essentially means there are no new keys in this call
|
||||||
|
if isinstance(group, (six.binary_type, six.text_type)):
|
||||||
|
group = []
|
||||||
|
else:
|
||||||
|
group = [group]
|
||||||
|
|
||||||
|
try:
|
||||||
|
# We only care about the unique key values
|
||||||
|
group = set(group)
|
||||||
|
except TypeError:
|
||||||
|
group = []
|
||||||
|
|
||||||
|
new_keys_groups.append([_extract(v) for v in group])
|
||||||
|
|
||||||
|
new_keys = set(itertools.product(*new_keys_groups))
|
||||||
|
|
||||||
|
row_id = rec._row_id
|
||||||
|
old_keys = self._get_keys(row_id)
|
||||||
|
for old_key in old_keys - new_keys:
|
||||||
|
self._row_key_map.remove(row_id, old_key)
|
||||||
|
|
||||||
|
for new_key in new_keys - old_keys:
|
||||||
|
self._row_key_map.insert(row_id, new_key)
|
||||||
|
|
||||||
|
# Invalidate all keys which were either inserted or removed
|
||||||
|
self._invalidate_affected(new_keys ^ old_keys)
|
||||||
|
|
||||||
|
def _get_keys(self, target_row_id):
|
||||||
|
# Need to copy the return value since it's the actual set
|
||||||
|
# stored in the map and may be modified
|
||||||
|
return set(self._row_key_map.lookup_left(target_row_id, ()))
|
||||||
|
|
||||||
|
|
||||||
#----------------------------------------------------------------------
|
#----------------------------------------------------------------------
|
||||||
|
|
||||||
@ -174,11 +250,14 @@ class _LookupRelation(relation.Relation):
|
|||||||
def get_affected_rows(self, target_row_ids):
|
def get_affected_rows(self, target_row_ids):
|
||||||
if target_row_ids == depend.ALL_ROWS:
|
if target_row_ids == depend.ALL_ROWS:
|
||||||
return depend.ALL_ROWS
|
return depend.ALL_ROWS
|
||||||
# Each target row (result of a lookup by key) is associated with a key, and all rows that
|
# Each target row (result of a lookup by key)
|
||||||
|
# is associated with a set of keys,and all rows that
|
||||||
# looked up an affected key are affected by a change to any associated row. We remember which
|
# looked up an affected key are affected by a change to any associated row. We remember which
|
||||||
# rows looked up which key in self._row_key_map, so that when some target row changes to a new
|
# rows looked up which key in self._row_key_map, so that when some target row changes to a new
|
||||||
# key, we can know which referring rows need to be recomputed.
|
# key, we can know which referring rows need to be recomputed.
|
||||||
return self.get_affected_rows_by_keys({ self._lookup_map._get_key(r) for r in target_row_ids })
|
return self.get_affected_rows_by_keys(
|
||||||
|
set().union(*[self._lookup_map._get_keys(r) for r in target_row_ids])
|
||||||
|
)
|
||||||
|
|
||||||
def get_affected_rows_by_keys(self, keys):
|
def get_affected_rows_by_keys(self, keys):
|
||||||
"""
|
"""
|
||||||
@ -218,3 +297,10 @@ class _LookupRelation(relation.Relation):
|
|||||||
# lookup map can get cleaned up.
|
# lookup map can get cleaned up.
|
||||||
self._row_key_map.clear()
|
self._row_key_map.clear()
|
||||||
self._lookup_map._delete_relation(self._referring_node)
|
self._lookup_map._delete_relation(self._referring_node)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_column_id(c):
|
||||||
|
if isinstance(c, CONTAINS):
|
||||||
|
return c.value
|
||||||
|
else:
|
||||||
|
return c
|
||||||
|
@ -121,7 +121,10 @@ class SummaryActions(object):
|
|||||||
"""
|
"""
|
||||||
key = tuple(sorted(int(c) for c in source_groupby_columns))
|
key = tuple(sorted(int(c) for c in source_groupby_columns))
|
||||||
|
|
||||||
groupby_colinfo = [_make_col_info(col=c, isFormula=False, formula='')
|
groupby_colinfo = [_make_col_info(col=c,
|
||||||
|
isFormula=False,
|
||||||
|
formula='',
|
||||||
|
type='Choice' if c.type == 'ChoiceList' else c.type)
|
||||||
for c in source_groupby_columns]
|
for c in source_groupby_columns]
|
||||||
summary_table = next((t for t in source_table.summaryTables if t.summaryKey == key), None)
|
summary_table = next((t for t in source_table.summaryTables if t.summaryKey == key), None)
|
||||||
created = False
|
created = False
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import collections
|
import collections
|
||||||
|
import itertools
|
||||||
import types
|
import types
|
||||||
|
|
||||||
import six
|
import six
|
||||||
@ -7,12 +8,12 @@ from six.moves import xrange
|
|||||||
import column
|
import column
|
||||||
import depend
|
import depend
|
||||||
import docmodel
|
import docmodel
|
||||||
|
import logger
|
||||||
import lookup
|
import lookup
|
||||||
import records
|
import records
|
||||||
import relation as relation_module # "relation" is used too much as a variable name below.
|
import relation as relation_module # "relation" is used too much as a variable name below.
|
||||||
import usertypes
|
import usertypes
|
||||||
|
|
||||||
import logger
|
|
||||||
log = logger.Logger(__name__, logger.INFO)
|
log = logger.Logger(__name__, logger.INFO)
|
||||||
|
|
||||||
|
|
||||||
@ -205,6 +206,11 @@ class Table(object):
|
|||||||
# For a summary table, the name of the special helper column auto-added to the source table.
|
# For a summary table, the name of the special helper column auto-added to the source table.
|
||||||
self._summary_helper_col_id = None
|
self._summary_helper_col_id = None
|
||||||
|
|
||||||
|
# For a summary table, True in the common case where every source record belongs
|
||||||
|
# to just one group in the summary table, False if grouping by list columns
|
||||||
|
# which are 'flattened' so source records may appear in multiple groups
|
||||||
|
self._summary_simple = None
|
||||||
|
|
||||||
# Add Record and RecordSet subclasses which fill in this table as the first argument
|
# Add Record and RecordSet subclasses which fill in this table as the first argument
|
||||||
class Record(records.Record):
|
class Record(records.Record):
|
||||||
def __init__(inner_self, *args, **kwargs): # pylint: disable=no-self-argument
|
def __init__(inner_self, *args, **kwargs): # pylint: disable=no-self-argument
|
||||||
@ -254,20 +260,84 @@ class Table(object):
|
|||||||
if summary_src not in self._engine.tables:
|
if summary_src not in self._engine.tables:
|
||||||
self._summary_source_table = None
|
self._summary_source_table = None
|
||||||
self._summary_helper_col_id = None
|
self._summary_helper_col_id = None
|
||||||
|
self._summary_simple = None
|
||||||
else:
|
else:
|
||||||
self._summary_source_table = self._engine.tables[summary_src]
|
self._summary_source_table = self._engine.tables[summary_src]
|
||||||
self._summary_helper_col_id = "#summary#%s" % self.table_id
|
self._summary_helper_col_id = "#summary#%s" % self.table_id
|
||||||
# Figure out the group-by columns: these are all the non-formula columns.
|
# Figure out the group-by columns: these are all the non-formula columns.
|
||||||
groupby_cols = tuple(sorted(col_id for (col_id, col_model) in col_items
|
groupby_cols = tuple(sorted(col_id for (col_id, col_model) in col_items
|
||||||
if not isinstance(col_model, types.FunctionType)))
|
if not isinstance(col_model, types.FunctionType)))
|
||||||
|
self._summary_simple = not any(
|
||||||
|
isinstance(
|
||||||
|
self._summary_source_table.all_columns.get(group_col),
|
||||||
|
column.ChoiceListColumn
|
||||||
|
)
|
||||||
|
for group_col in groupby_cols
|
||||||
|
)
|
||||||
# Add the special helper column to the source table.
|
# Add the special helper column to the source table.
|
||||||
self._summary_source_table._add_update_summary_col(self, groupby_cols)
|
self._summary_source_table._add_update_summary_col(self, groupby_cols)
|
||||||
|
|
||||||
def _add_update_summary_col(self, summary_table, groupby_cols):
|
def _add_update_summary_col(self, summary_table, groupby_cols):
|
||||||
# TODO: things need to be removed also from summary_cols when a summary table is deleted.
|
# TODO: things need to be removed also from summary_cols when a summary table is deleted.
|
||||||
|
|
||||||
|
# Grouping by list columns is significantly more complex and this comes with a
|
||||||
|
# performance cost, so in the common case we use the simpler older implementation
|
||||||
|
# In particular _updateSummary returns (possibly creating) just one reference
|
||||||
|
# instead of a list, which getSummarySourceGroup looks up directly instead
|
||||||
|
# of using CONTAINS, which in turn allows using SimpleLookupMapColumn
|
||||||
|
# instead of the similarly slower and more complicated ContainsLookupMapColumn
|
||||||
|
# All of these branches should be interchangeable and produce equivalent results
|
||||||
|
# when no list columns or CONTAINS are involved,
|
||||||
|
# especially since we need to be able to summarise by a combination of list and non-list
|
||||||
|
# columns or lookupRecords with a combination of CONTAINS and normal values,
|
||||||
|
# these are just performance optimisations
|
||||||
|
if summary_table._summary_simple:
|
||||||
@usertypes.formulaType(usertypes.Reference(summary_table.table_id))
|
@usertypes.formulaType(usertypes.Reference(summary_table.table_id))
|
||||||
def _updateSummary(rec, table): # pylint: disable=unused-argument
|
def _updateSummary(rec, table): # pylint: disable=unused-argument
|
||||||
return summary_table.lookupOrAddDerived(**{c: getattr(rec, c) for c in groupby_cols})
|
return summary_table.lookupOrAddDerived(**{c: getattr(rec, c) for c in groupby_cols})
|
||||||
|
else:
|
||||||
|
@usertypes.formulaType(usertypes.ReferenceList(summary_table.table_id))
|
||||||
|
def _updateSummary(rec, table): # pylint: disable=unused-argument
|
||||||
|
# Create a row in the summary table for every combination of values in
|
||||||
|
# ChoiceList columns
|
||||||
|
lookup_values = []
|
||||||
|
for group_col in groupby_cols:
|
||||||
|
lookup_value = getattr(rec, group_col)
|
||||||
|
if isinstance(self.all_columns[group_col], column.ChoiceListColumn):
|
||||||
|
# Check that ChoiceList cells have appropriate types.
|
||||||
|
# Don't iterate over characters of a string.
|
||||||
|
if isinstance(lookup_value, (six.binary_type, six.text_type)):
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
# We only care about the unique choices
|
||||||
|
lookup_value = set(lookup_value)
|
||||||
|
except TypeError:
|
||||||
|
return []
|
||||||
|
else:
|
||||||
|
lookup_value = [lookup_value]
|
||||||
|
lookup_values.append(lookup_value)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
values_to_add = {}
|
||||||
|
new_row_ids = []
|
||||||
|
|
||||||
|
for values_tuple in sorted(itertools.product(*lookup_values)):
|
||||||
|
values_dict = dict(zip(groupby_cols, values_tuple))
|
||||||
|
row_id = summary_table.lookup_one_record(**values_dict)._row_id
|
||||||
|
if row_id:
|
||||||
|
result.append(row_id)
|
||||||
|
else:
|
||||||
|
for col, value in six.iteritems(values_dict):
|
||||||
|
values_to_add.setdefault(col, []).append(value)
|
||||||
|
new_row_ids.append(None)
|
||||||
|
|
||||||
|
if new_row_ids and not self._engine.is_triggered_by_table_action(summary_table.table_id):
|
||||||
|
result += self._engine.user_actions.BulkAddRecord(
|
||||||
|
summary_table.table_id, new_row_ids, values_to_add
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
_updateSummary.is_private = True
|
_updateSummary.is_private = True
|
||||||
col_id = summary_table._summary_helper_col_id
|
col_id = summary_table._summary_helper_col_id
|
||||||
col_obj = self._create_or_update_col(col_id, _updateSummary)
|
col_obj = self._create_or_update_col(col_id, _updateSummary)
|
||||||
@ -343,8 +413,20 @@ class Table(object):
|
|||||||
"""
|
"""
|
||||||
# The tuple of keys used determines the LookupMap we need.
|
# The tuple of keys used determines the LookupMap we need.
|
||||||
sort_by = kwargs.pop('sort_by', None)
|
sort_by = kwargs.pop('sort_by', None)
|
||||||
col_ids = tuple(sorted(kwargs))
|
key = []
|
||||||
key = tuple(kwargs[c] for c in col_ids)
|
col_ids = []
|
||||||
|
for col_id in sorted(kwargs):
|
||||||
|
value = kwargs[col_id]
|
||||||
|
if isinstance(value, lookup.CONTAINS):
|
||||||
|
value = value.value
|
||||||
|
# While users should use CONTAINS on lookup values,
|
||||||
|
# the marker is moved to col_id so that the LookupMapColumn knows how to
|
||||||
|
# update its index correctly for that column.
|
||||||
|
col_id = lookup.CONTAINS(col_id)
|
||||||
|
key.append(value)
|
||||||
|
col_ids.append(col_id)
|
||||||
|
col_ids = tuple(col_ids)
|
||||||
|
key = tuple(key)
|
||||||
|
|
||||||
lookup_map = self._get_lookup_map(col_ids)
|
lookup_map = self._get_lookup_map(col_ids)
|
||||||
row_id_set, rel = lookup_map.do_lookup(key)
|
row_id_set, rel = lookup_map.do_lookup(key)
|
||||||
@ -365,14 +447,19 @@ class Table(object):
|
|||||||
"""
|
"""
|
||||||
# LookupMapColumn is a Node, so identified by (table_id, col_id) pair, so we make up a col_id
|
# LookupMapColumn is a Node, so identified by (table_id, col_id) pair, so we make up a col_id
|
||||||
# to identify this lookup object uniquely in this Table.
|
# to identify this lookup object uniquely in this Table.
|
||||||
lookup_col_id = "#lookup#" + ":".join(col_ids_tuple)
|
lookup_col_id = "#lookup#" + ":".join(map(str, col_ids_tuple))
|
||||||
lmap = self._special_cols.get(lookup_col_id)
|
lmap = self._special_cols.get(lookup_col_id)
|
||||||
if not lmap:
|
if not lmap:
|
||||||
# Check that the table actually has all the columns we looking up.
|
# Check that the table actually has all the columns we looking up.
|
||||||
for c in col_ids_tuple:
|
for c in col_ids_tuple:
|
||||||
|
c = lookup.extract_column_id(c)
|
||||||
if not self.has_column(c):
|
if not self.has_column(c):
|
||||||
raise KeyError("Table %s has no column %s" % (self.table_id, c))
|
raise KeyError("Table %s has no column %s" % (self.table_id, c))
|
||||||
lmap = lookup.LookupMapColumn(self, lookup_col_id, col_ids_tuple)
|
if any(isinstance(col_id, lookup.CONTAINS) for col_id in col_ids_tuple):
|
||||||
|
column_class = lookup.ContainsLookupMapColumn
|
||||||
|
else:
|
||||||
|
column_class = lookup.SimpleLookupMapColumn
|
||||||
|
lmap = column_class(self, lookup_col_id, col_ids_tuple)
|
||||||
self._special_cols[lookup_col_id] = lmap
|
self._special_cols[lookup_col_id] = lmap
|
||||||
self.all_columns[lookup_col_id] = lmap
|
self.all_columns[lookup_col_id] = lmap
|
||||||
return lmap
|
return lmap
|
||||||
@ -389,8 +476,17 @@ class Table(object):
|
|||||||
return record
|
return record
|
||||||
|
|
||||||
def getSummarySourceGroup(self, rec):
|
def getSummarySourceGroup(self, rec):
|
||||||
return (self._summary_source_table.lookup_records(**{self._summary_helper_col_id: int(rec)})
|
if self._summary_source_table:
|
||||||
if self._summary_source_table else None)
|
# See comment in _add_update_summary_col.
|
||||||
|
# _summary_source_table._summary_simple determines whether
|
||||||
|
# the column named self._summary_helper_col_id is a single reference
|
||||||
|
# or a reference list.
|
||||||
|
lookup_value = rec if self._summary_simple else lookup.CONTAINS(rec)
|
||||||
|
return self._summary_source_table.lookup_records(**{
|
||||||
|
self._summary_helper_col_id: lookup_value
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
def get(self, **kwargs):
|
def get(self, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
@ -706,3 +706,43 @@ return ",".join(str(r.id) for r in Students.lookupRecords(firstName=fn, lastName
|
|||||||
[5, "Eureka", SchoolsRec(1), "New York" ],
|
[5, "Eureka", SchoolsRec(1), "New York" ],
|
||||||
[6, "Yale", SchoolsRec(3), "New Haven" ],
|
[6, "Yale", SchoolsRec(3), "New Haven" ],
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def test_contains(self):
|
||||||
|
sample = testutil.parse_test_sample({
|
||||||
|
"SCHEMA": [
|
||||||
|
[1, "Source", [
|
||||||
|
[11, "choicelist1", "ChoiceList", False, "", "choicelist1", ""],
|
||||||
|
[12, "choicelist2", "ChoiceList", False, "", "choicelist2", ""],
|
||||||
|
[13, "text1", "Text", False, "", "text1", ""],
|
||||||
|
[14, "text2", "Text", False, "", "text1", ""],
|
||||||
|
[15, "contains1", "RefList:Source", True,
|
||||||
|
"Source.lookupRecords(choicelist1=CONTAINS($text1))",
|
||||||
|
"contains1", ""],
|
||||||
|
[16, "contains2", "RefList:Source", True,
|
||||||
|
"Source.lookupRecords(choicelist2=CONTAINS($text2))",
|
||||||
|
"contains2", ""],
|
||||||
|
[17, "contains_both", "RefList:Source", True,
|
||||||
|
"Source.lookupRecords(choicelist1=CONTAINS($text1), choicelist2=CONTAINS($text2))",
|
||||||
|
"contains_both", ""],
|
||||||
|
[17, "combined", "RefList:Source", True,
|
||||||
|
"Source.lookupRecords(choicelist1=CONTAINS($text1), text2='x')",
|
||||||
|
"combined", ""],
|
||||||
|
]]
|
||||||
|
],
|
||||||
|
"DATA": {
|
||||||
|
"Source": [
|
||||||
|
["id", "choicelist1", "text1", "choicelist2", "text2"],
|
||||||
|
[101, ["a"], "a", ["x"], "y"],
|
||||||
|
[102, ["b"], "b", ["y"], "x"],
|
||||||
|
[103, ["a", "b"], "c", ["x", "y"], "c"],
|
||||||
|
]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
self.load_sample(sample)
|
||||||
|
|
||||||
|
self.assertTableData("Source", cols="subset", data=[
|
||||||
|
["id", "contains1", "contains2", "contains_both", "combined"],
|
||||||
|
[101, [101, 103], [102, 103], [103], []],
|
||||||
|
[102, [102, 103], [101, 103], [103], [102]],
|
||||||
|
[103, [], [], [], []],
|
||||||
|
])
|
||||||
|
267
sandbox/grist/test_summary_choicelist.py
Normal file
267
sandbox/grist/test_summary_choicelist.py
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
"""
|
||||||
|
Test of Summary tables grouped by ChoiceList columns.
|
||||||
|
"""
|
||||||
|
import column
|
||||||
|
import logger
|
||||||
|
import lookup
|
||||||
|
import testutil
|
||||||
|
from test_engine import EngineTestCase, Table, Column
|
||||||
|
|
||||||
|
log = logger.Logger(__name__, logger.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSummaryChoiceList(EngineTestCase):
|
||||||
|
sample = testutil.parse_test_sample({
|
||||||
|
"SCHEMA": [
|
||||||
|
[1, "Source", [
|
||||||
|
[10, "other", "Text", False, "", "other", ""],
|
||||||
|
[11, "choices1", "ChoiceList", False, "", "choices1", ""],
|
||||||
|
[12, "choices2", "ChoiceList", False, "", "choices2", ""],
|
||||||
|
]]
|
||||||
|
],
|
||||||
|
"DATA": {
|
||||||
|
"Source": [
|
||||||
|
["id", "choices1", "choices2", "other"],
|
||||||
|
[21, ["a", "b"], ["c", "d"], "foo"],
|
||||||
|
]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
starting_table = Table(1, "Source", primaryViewId=0, summarySourceTable=0, columns=[
|
||||||
|
Column(10, "other", "Text", isFormula=False, formula="", summarySourceCol=0),
|
||||||
|
Column(11, "choices1", "ChoiceList", isFormula=False, formula="", summarySourceCol=0),
|
||||||
|
Column(12, "choices2", "ChoiceList", isFormula=False, formula="", summarySourceCol=0),
|
||||||
|
])
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_create_view_section(self):
|
||||||
|
self.load_sample(self.sample)
|
||||||
|
|
||||||
|
# Verify the starting table; there should be no views yet.
|
||||||
|
self.assertTables([self.starting_table])
|
||||||
|
self.assertViews([])
|
||||||
|
|
||||||
|
# Create a summary section, grouped by the "choices1" column.
|
||||||
|
self.apply_user_action(["CreateViewSection", 1, 0, "record", [11]])
|
||||||
|
|
||||||
|
summary_table1 = Table(
|
||||||
|
2, "GristSummary_6_Source", primaryViewId=0, summarySourceTable=1,
|
||||||
|
columns=[
|
||||||
|
Column(13, "choices1", "Choice", isFormula=False, formula="", summarySourceCol=11),
|
||||||
|
Column(14, "group", "RefList:Source", isFormula=True, summarySourceCol=0,
|
||||||
|
formula="table.getSummarySourceGroup(rec)"),
|
||||||
|
Column(15, "count", "Int", isFormula=True, summarySourceCol=0,
|
||||||
|
formula="len($group)"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create another summary section, grouped by both choicelist columns.
|
||||||
|
self.apply_user_action(["CreateViewSection", 1, 0, "record", [11, 12]])
|
||||||
|
|
||||||
|
summary_table2 = Table(
|
||||||
|
3, "GristSummary_6_Source2", primaryViewId=0, summarySourceTable=1,
|
||||||
|
columns=[
|
||||||
|
Column(16, "choices1", "Choice", isFormula=False, formula="", summarySourceCol=11),
|
||||||
|
Column(17, "choices2", "Choice", isFormula=False, formula="", summarySourceCol=12),
|
||||||
|
Column(18, "group", "RefList:Source", isFormula=True, summarySourceCol=0,
|
||||||
|
formula="table.getSummarySourceGroup(rec)"),
|
||||||
|
Column(19, "count", "Int", isFormula=True, summarySourceCol=0,
|
||||||
|
formula="len($group)"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create another summary section, grouped by the non-choicelist column
|
||||||
|
self.apply_user_action(["CreateViewSection", 1, 0, "record", [10]])
|
||||||
|
|
||||||
|
summary_table3 = Table(
|
||||||
|
4, "GristSummary_6_Source3", primaryViewId=0, summarySourceTable=1,
|
||||||
|
columns=[
|
||||||
|
Column(20, "other", "Text", isFormula=False, formula="", summarySourceCol=10),
|
||||||
|
Column(21, "group", "RefList:Source", isFormula=True, summarySourceCol=0,
|
||||||
|
formula="table.getSummarySourceGroup(rec)"),
|
||||||
|
Column(22, "count", "Int", isFormula=True, summarySourceCol=0,
|
||||||
|
formula="len($group)"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create another summary section, grouped by the non-choicelist column and choices1
|
||||||
|
self.apply_user_action(["CreateViewSection", 1, 0, "record", [10, 11]])
|
||||||
|
|
||||||
|
summary_table4 = Table(
|
||||||
|
5, "GristSummary_6_Source4", primaryViewId=0, summarySourceTable=1,
|
||||||
|
columns=[
|
||||||
|
Column(23, "other", "Text", isFormula=False, formula="", summarySourceCol=10),
|
||||||
|
Column(24, "choices1", "Choice", isFormula=False, formula="", summarySourceCol=11),
|
||||||
|
Column(25, "group", "RefList:Source", isFormula=True, summarySourceCol=0,
|
||||||
|
formula="table.getSummarySourceGroup(rec)"),
|
||||||
|
Column(26, "count", "Int", isFormula=True, summarySourceCol=0,
|
||||||
|
formula="len($group)"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTables(
|
||||||
|
[self.starting_table, summary_table1, summary_table2, summary_table3, summary_table4]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the summarized data.
|
||||||
|
self.assertTableData('GristSummary_6_Source', data=[
|
||||||
|
["id", "choices1", "group", "count"],
|
||||||
|
[1, "a", [21], 1],
|
||||||
|
[2, "b", [21], 1],
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertTableData('GristSummary_6_Source2', data=[
|
||||||
|
["id", "choices1", "choices2", "group", "count"],
|
||||||
|
[1, "a", "c", [21], 1],
|
||||||
|
[2, "a", "d", [21], 1],
|
||||||
|
[3, "b", "c", [21], 1],
|
||||||
|
[4, "b", "d", [21], 1],
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertTableData('GristSummary_6_Source3', data=[
|
||||||
|
["id", "other", "group", "count"],
|
||||||
|
[1, "foo", [21], 1],
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertTableData('GristSummary_6_Source4', data=[
|
||||||
|
["id", "other", "choices1", "group", "count"],
|
||||||
|
[1, "foo", "a", [21], 1],
|
||||||
|
[2, "foo", "b", [21], 1],
|
||||||
|
])
|
||||||
|
|
||||||
|
# Verify the optimisation works for the table without choicelists
|
||||||
|
self.assertIs(self.engine.tables["Source"]._summary_simple, None)
|
||||||
|
self.assertIs(self.engine.tables["GristSummary_6_Source"]._summary_simple, False)
|
||||||
|
self.assertIs(self.engine.tables["GristSummary_6_Source2"]._summary_simple, False)
|
||||||
|
# simple summary and lookup
|
||||||
|
self.assertIs(self.engine.tables["GristSummary_6_Source3"]._summary_simple, True)
|
||||||
|
self.assertIs(self.engine.tables["GristSummary_6_Source4"]._summary_simple, False)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
{k: type(v) for k, v in self.engine.tables["Source"]._special_cols.items()},
|
||||||
|
{
|
||||||
|
'#summary#GristSummary_6_Source': column.ReferenceListColumn,
|
||||||
|
"#lookup#CONTAINS(value='#summary#GristSummary_6_Source')":
|
||||||
|
lookup.ContainsLookupMapColumn,
|
||||||
|
'#summary#GristSummary_6_Source2': column.ReferenceListColumn,
|
||||||
|
"#lookup#CONTAINS(value='#summary#GristSummary_6_Source2')":
|
||||||
|
lookup.ContainsLookupMapColumn,
|
||||||
|
|
||||||
|
# simple summary and lookup
|
||||||
|
'#summary#GristSummary_6_Source3': column.ReferenceColumn,
|
||||||
|
'#lookup##summary#GristSummary_6_Source3': lookup.SimpleLookupMapColumn,
|
||||||
|
|
||||||
|
'#summary#GristSummary_6_Source4': column.ReferenceListColumn,
|
||||||
|
"#lookup#CONTAINS(value='#summary#GristSummary_6_Source4')":
|
||||||
|
lookup.ContainsLookupMapColumn,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove 'b' from choices1
|
||||||
|
self.update_record("Source", 21, choices1=["L", "a"])
|
||||||
|
|
||||||
|
self.assertTableData('Source', data=[
|
||||||
|
["id", "choices1", "choices2", "other"],
|
||||||
|
[21, ["a"], ["c", "d"], "foo"],
|
||||||
|
])
|
||||||
|
|
||||||
|
# Verify that the summary table rows containing 'b' are empty
|
||||||
|
self.assertTableData('GristSummary_6_Source', data=[
|
||||||
|
["id", "choices1", "group", "count"],
|
||||||
|
[1, "a", [21], 1],
|
||||||
|
[2, "b", [], 0],
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertTableData('GristSummary_6_Source2', data=[
|
||||||
|
["id", "choices1", "choices2", "group", "count"],
|
||||||
|
[1, "a", "c", [21], 1],
|
||||||
|
[2, "a", "d", [21], 1],
|
||||||
|
[3, "b", "c", [], 0],
|
||||||
|
[4, "b", "d", [], 0],
|
||||||
|
])
|
||||||
|
|
||||||
|
# Add 'e' to choices2
|
||||||
|
self.update_record("Source", 21, choices2=["L", "c", "d", "e"])
|
||||||
|
|
||||||
|
# First summary table unaffected
|
||||||
|
self.assertTableData('GristSummary_6_Source', data=[
|
||||||
|
["id", "choices1", "group", "count"],
|
||||||
|
[1, "a", [21], 1],
|
||||||
|
[2, "b", [], 0],
|
||||||
|
])
|
||||||
|
|
||||||
|
# New row added for 'e'
|
||||||
|
self.assertTableData('GristSummary_6_Source2', data=[
|
||||||
|
["id", "choices1", "choices2", "group", "count"],
|
||||||
|
[1, "a", "c", [21], 1],
|
||||||
|
[2, "a", "d", [21], 1],
|
||||||
|
[3, "b", "c", [], 0],
|
||||||
|
[4, "b", "d", [], 0],
|
||||||
|
[5, "a", "e", [21], 1],
|
||||||
|
])
|
||||||
|
|
||||||
|
# Remove record from source
|
||||||
|
self.remove_record("Source", 21)
|
||||||
|
|
||||||
|
# All summary rows are now empty
|
||||||
|
self.assertTableData('GristSummary_6_Source', data=[
|
||||||
|
["id", "choices1", "group", "count"],
|
||||||
|
[1, "a", [], 0],
|
||||||
|
[2, "b", [], 0],
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertTableData('GristSummary_6_Source2', data=[
|
||||||
|
["id", "choices1", "choices2", "group", "count"],
|
||||||
|
[1, "a", "c", [], 0],
|
||||||
|
[2, "a", "d", [], 0],
|
||||||
|
[3, "b", "c", [], 0],
|
||||||
|
[4, "b", "d", [], 0],
|
||||||
|
[5, "a", "e", [], 0],
|
||||||
|
])
|
||||||
|
|
||||||
|
# Make rows with every combination of {a,b,ab} and {c,d,cd}
|
||||||
|
self.add_records(
|
||||||
|
'Source',
|
||||||
|
["id", "choices1", "choices2"],
|
||||||
|
[
|
||||||
|
[101, ["L", "a"], ["L", "c"]],
|
||||||
|
[102, ["L", "b"], ["L", "c"]],
|
||||||
|
[103, ["L", "a", "b"], ["L", "c"]],
|
||||||
|
[104, ["L", "a"], ["L", "d"]],
|
||||||
|
[105, ["L", "b"], ["L", "d"]],
|
||||||
|
[106, ["L", "a", "b"], ["L", "d"]],
|
||||||
|
[107, ["L", "a"], ["L", "c", "d"]],
|
||||||
|
[108, ["L", "b"], ["L", "c", "d"]],
|
||||||
|
[109, ["L", "a", "b"], ["L", "c", "d"]],
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTableData('Source', cols="subset", data=[
|
||||||
|
["id", "choices1", "choices2"],
|
||||||
|
[101, ["a"], ["c"]],
|
||||||
|
[102, ["b"], ["c"]],
|
||||||
|
[103, ["a", "b"], ["c"]],
|
||||||
|
[104, ["a"], ["d"]],
|
||||||
|
[105, ["b"], ["d"]],
|
||||||
|
[106, ["a", "b"], ["d"]],
|
||||||
|
[107, ["a"], ["c", "d"]],
|
||||||
|
[108, ["b"], ["c", "d"]],
|
||||||
|
[109, ["a", "b"], ["c", "d"]],
|
||||||
|
])
|
||||||
|
|
||||||
|
# Summary tables now have an even distribution of combinations
|
||||||
|
self.assertTableData('GristSummary_6_Source', data=[
|
||||||
|
["id", "choices1", "group", "count"],
|
||||||
|
[1, "a", [101, 103, 104, 106, 107, 109], 6],
|
||||||
|
[2, "b", [102, 103, 105, 106, 108, 109], 6],
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertTableData('GristSummary_6_Source2', data=[
|
||||||
|
["id", "choices1", "choices2", "group", "count"],
|
||||||
|
[1, "a", "c", [101, 103, 107, 109], 4],
|
||||||
|
[2, "a", "d", [104, 106, 107, 109], 4],
|
||||||
|
[3, "b", "c", [102, 103, 108, 109], 4],
|
||||||
|
[4, "b", "d", [105, 106, 108, 109], 4],
|
||||||
|
[5, "a", "e", [], 0],
|
||||||
|
])
|
Loading…
Reference in New Issue
Block a user