mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
1c89d08ea3
Summary: Adds some special handling to summary table and lookup logic: - Source rows with empty choicelists/reflists get a corresponding summary row with an empty string/reference when grouping by that column, instead of excluding them from any group - Adds a new `QueryOperation` 'empty' in the client which is used in `LinkingState`, `QuerySet`, and `recursiveMoveToCursorPos` to match empty lists in source tables against falsy values in linked summary tables. - Adds a new parameter `match_empty` to the Python `CONTAINS` function so that regular formulas can implement the same behaviour as summary tables. See https://grist.slack.com/archives/C0234CPPXPA/p1654030490932119 - Uses the new `match_empty` argument in the formula generated for the `group` column when detaching a summary table. Test Plan: Updated and extended Python and nbrowser tests of summary tables grouped by choicelists to test for new behaviour with empty lists. Reviewers: georgegevoian Reviewed By: georgegevoian Differential Revision: https://phab.getgrist.com/D3471
314 lines
12 KiB
Python
314 lines
12 KiB
Python
import itertools
|
|
from abc import abstractmethod
|
|
|
|
import six
|
|
|
|
import column
|
|
import depend
|
|
import records
|
|
import relation
|
|
import twowaymap
|
|
import usertypes
|
|
from functions.lookup import _Contains
|
|
|
|
import logger
|
|
log = logger.Logger(__name__, logger.INFO)
|
|
|
|
|
|
def _extract(cell_value):
|
|
"""
|
|
When cell_value is a Record, returns its rowId. Otherwise returns the value unchanged.
|
|
This is to allow lookups to work with reference columns.
|
|
"""
|
|
if isinstance(cell_value, records.Record):
|
|
return cell_value._row_id
|
|
return cell_value
|
|
|
|
|
|
class BaseLookupMapColumn(column.BaseColumn):
|
|
"""
|
|
Conceptually a LookupMapColumn is associated with a table ("target table") and maintains for
|
|
each row a key (which is a tuple of values from the named columns), which is fast to look up.
|
|
The lookup is generally performed in a formula in a different table ("referring table").
|
|
|
|
LookupMapColumn is similar to a FormulaColumn in that it needs to do some computation whenever
|
|
one of its dependencies changes: namely, it needs to update the index.
|
|
|
|
Although it acts as a column, a LookupMapColumn isn't included among its table's columns, and
|
|
doesn't have a column id.
|
|
|
|
Compared to relational database, LookupMapColumn is analogous to a database index.
|
|
"""
|
|
|
|
def __init__(self, table, col_id, col_ids_tuple):
|
|
# Note that self._recalc_rec_method is passed in as the formula's "method".
|
|
col_info = column.ColInfo(usertypes.Any(), is_formula=True, method=self._recalc_rec_method)
|
|
super(BaseLookupMapColumn, self).__init__(table, col_id, col_info)
|
|
|
|
self._col_ids_tuple = col_ids_tuple
|
|
self._engine = table._engine
|
|
|
|
# Two-way map between rowIds of the target table (on the left) and key tuples (on the right).
|
|
# Multiple rows can naturally map to the same key.
|
|
# Multiple keys can map to the same row if CONTAINS() is used
|
|
# The map is populated by engine's _recompute when this
|
|
# node is brought up-to-date.
|
|
self._row_key_map = self._make_row_key_map()
|
|
self._engine.invalidate_column(self)
|
|
|
|
# Map of referring Node to _LookupRelation. Different tables may do lookups using this
|
|
# LookupMapColumn, and that creates a dependency from other Nodes to us, with a relation
|
|
# between referring rows and the lookup keys. This map stores these relations.
|
|
self._lookup_relations = {}
|
|
|
|
@abstractmethod
|
|
def _make_row_key_map(self):
|
|
raise NotImplementedError
|
|
|
|
@abstractmethod
|
|
def _recalc_rec_method(self, rec, table):
|
|
"""
|
|
LookupMapColumn acts as a formula column, and this method is the "formula" called whenever
|
|
a dependency changes. If LookupMapColumn indexes columns (A,B), then a change to A or B would
|
|
cause the LookupMapColumn to be invalidated for the corresponding rows, and brought up to date
|
|
during formula recomputation by calling this method. It shold take O(1) time per affected row.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@abstractmethod
|
|
def _get_keys(self, target_row_id):
|
|
"""
|
|
Get the keys associated with the given target row id.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def unset(self, row_id):
|
|
# This is called on record removal, and is necessary to deal with removed records.
|
|
old_keys = self._get_keys(row_id)
|
|
for old_key in old_keys:
|
|
self._row_key_map.remove(row_id, old_key)
|
|
self._invalidate_affected(old_keys)
|
|
|
|
def _invalidate_affected(self, affected_keys):
|
|
# For each known relation, figure out which referring rows are affected, and invalidate them.
|
|
# The engine will notice that there have been more invalidations, and recompute things again.
|
|
for node, rel in six.iteritems(self._lookup_relations):
|
|
affected_rows = rel.get_affected_rows_by_keys(affected_keys)
|
|
self._engine.invalidate_records(node.table_id, affected_rows, col_ids=(node.col_id,))
|
|
|
|
def _get_relation(self, referring_node):
|
|
"""
|
|
Helper which returns an existing or new _LookupRelation object for the given referring Node.
|
|
"""
|
|
rel = self._lookup_relations.get(referring_node)
|
|
if not rel:
|
|
rel = _LookupRelation(self, referring_node)
|
|
self._lookup_relations[referring_node] = rel
|
|
return rel
|
|
|
|
def _delete_relation(self, referring_node):
|
|
self._lookup_relations.pop(referring_node, None)
|
|
if not self._lookup_relations:
|
|
self._engine.mark_lookupmap_for_cleanup(self)
|
|
|
|
def _do_fast_empty_lookup(self):
|
|
"""
|
|
Simplified version of do_lookup for a lookup column with no key columns
|
|
to make Table._num_rows as fast as possible.
|
|
"""
|
|
return self._row_key_map.lookup_right((), default=())
|
|
|
|
def do_lookup(self, key):
|
|
"""
|
|
Looks up key in the lookup map and returns a tuple with two elements: the set of matching
|
|
records (as a set object, not ordered), and the Relation object for those records, relating
|
|
the current frame to the returned records. Returns an empty set if no records match.
|
|
"""
|
|
key = tuple(_extract(val) for val in key)
|
|
engine = self._engine
|
|
if engine._current_node:
|
|
rel = self._get_relation(engine._current_node)
|
|
rel._add_lookup(engine._current_row_id, key)
|
|
else:
|
|
rel = None
|
|
|
|
# The _use_node call both brings LookupMapColumn up-to-date, and creates a dependency on it.
|
|
# Relation of None isn't valid, but it happens to be unused when there is no current_frame.
|
|
engine._use_node(self.node, rel)
|
|
|
|
row_ids = self._row_key_map.lookup_right(key, set())
|
|
return row_ids, rel
|
|
|
|
# Override various column methods, since LookupMapColumn doesn't care to store any values. To
|
|
# outside code, it looks like a column of None's.
|
|
def raw_get(self, value):
|
|
return None
|
|
def convert(self, value):
|
|
return None
|
|
def get_cell_value(self, row_id):
|
|
return None
|
|
def set(self, row_id, value):
|
|
pass
|
|
|
|
# For performance, prefer SimpleLookupMapColumn when no CONTAINS is used
|
|
# in lookups, although the two implementations should be equivalent
|
|
# See also table._add_update_summary_col
|
|
|
|
class SimpleLookupMapColumn(BaseLookupMapColumn):
|
|
def _make_row_key_map(self):
|
|
return twowaymap.TwoWayMap(left=set, right="single")
|
|
|
|
def _recalc_rec_method(self, rec, table):
|
|
old_key = self._row_key_map.lookup_left(rec._row_id)
|
|
|
|
# Note that rec._get_col(_col_id) is what creates the correct dependency, as well as ensures
|
|
# that the columns used to index by are brought up-to-date (in case they are formula columns).
|
|
new_key = tuple(_extract(rec._get_col(_col_id)) for _col_id in self._col_ids_tuple)
|
|
|
|
try:
|
|
self._row_key_map.insert(rec._row_id, new_key)
|
|
except TypeError:
|
|
# If key is not hashable, ignore it, just remove the old_key then.
|
|
self._row_key_map.remove(rec._row_id, old_key)
|
|
new_key = None
|
|
|
|
# It's OK if None is one of the values, since None will just never be found as a key.
|
|
self._invalidate_affected({old_key, new_key})
|
|
|
|
def _get_keys(self, target_row_id):
|
|
return {self._row_key_map.lookup_left(target_row_id)}
|
|
|
|
|
|
class ContainsLookupMapColumn(BaseLookupMapColumn):
|
|
def _make_row_key_map(self):
|
|
return twowaymap.TwoWayMap(left=set, right=set)
|
|
|
|
def _recalc_rec_method(self, rec, table):
|
|
# Create a key in the index for every combination of values in columns
|
|
# looked up with CONTAINS()
|
|
new_keys_groups = []
|
|
for col_id in self._col_ids_tuple:
|
|
# Note that _get_col is what creates the correct dependency, as well as ensures
|
|
# that the columns used to index by are brought up-to-date (in case they are formula columns).
|
|
group = rec._get_col(extract_column_id(col_id))
|
|
|
|
if isinstance(col_id, _Contains):
|
|
# Check that the cell targeted by CONTAINS() has an appropriate type.
|
|
# Don't iterate over characters of a string.
|
|
# group = [] essentially means there are no new keys in this call
|
|
if isinstance(group, (six.binary_type, six.text_type)):
|
|
group = []
|
|
elif not group and col_id.match_empty != _Contains.no_match_empty:
|
|
group = [col_id.match_empty]
|
|
else:
|
|
group = [group]
|
|
|
|
try:
|
|
# We only care about the unique key values
|
|
group = set(group)
|
|
except TypeError:
|
|
group = []
|
|
|
|
new_keys_groups.append([_extract(v) for v in group])
|
|
|
|
new_keys = set(itertools.product(*new_keys_groups))
|
|
|
|
row_id = rec._row_id
|
|
old_keys = self._get_keys(row_id)
|
|
for old_key in old_keys - new_keys:
|
|
self._row_key_map.remove(row_id, old_key)
|
|
|
|
for new_key in new_keys - old_keys:
|
|
self._row_key_map.insert(row_id, new_key)
|
|
|
|
# Invalidate all keys which were either inserted or removed
|
|
self._invalidate_affected(new_keys ^ old_keys)
|
|
|
|
def _get_keys(self, target_row_id):
|
|
# Need to copy the return value since it's the actual set
|
|
# stored in the map and may be modified
|
|
return set(self._row_key_map.lookup_left(target_row_id, ()))
|
|
|
|
|
|
#----------------------------------------------------------------------
|
|
|
|
class _LookupRelation(relation.Relation):
|
|
"""
|
|
_LookupRelation maintains a mapping between rows of a table doing a lookup to the rows getting
|
|
returned from the lookup. Lookups are implemented using a LookupMapColumn, and a _LookupRelation
|
|
with in conjunction with its LookupMapColumn.
|
|
|
|
_LookupRelation are created and owned by LookupMapColumn, and should not be created directly by
|
|
other code.
|
|
"""
|
|
|
|
def __init__(self, lookup_map, referring_node):
|
|
super(_LookupRelation, self).__init__(referring_node.table_id, lookup_map.table_id)
|
|
self._lookup_map = lookup_map
|
|
self._referring_node = referring_node
|
|
|
|
# Maps referring rows to keys, where multiple rows may map to the same key AND one row may
|
|
# map to multiple keys (if a formula does multiple lookup calls).
|
|
self._row_key_map = twowaymap.TwoWayMap(left=set, right=set)
|
|
|
|
def __str__(self):
|
|
return "_LookupRelation(%s->%s)" % (self._referring_node, self.target_table)
|
|
|
|
def get_affected_rows(self, target_row_ids):
|
|
if target_row_ids == depend.ALL_ROWS:
|
|
return depend.ALL_ROWS
|
|
# Each target row (result of a lookup by key)
|
|
# is associated with a set of keys,and all rows that
|
|
# looked up an affected key are affected by a change to any associated row. We remember which
|
|
# rows looked up which key in self._row_key_map, so that when some target row changes to a new
|
|
# key, we can know which referring rows need to be recomputed.
|
|
return self.get_affected_rows_by_keys(
|
|
set().union(*[self._lookup_map._get_keys(r) for r in target_row_ids])
|
|
)
|
|
|
|
def get_affected_rows_by_keys(self, keys):
|
|
"""
|
|
This is used by LookupMapColumn to know which rows got affected when a target row changed to
|
|
have a different key. Keys can be any iterable. A key of None is allowed and affects nothing.
|
|
"""
|
|
affected_rows = set()
|
|
for key in keys:
|
|
if key is not None:
|
|
affected_rows.update(self._row_key_map.lookup_right(key, default=()))
|
|
return affected_rows
|
|
|
|
def _add_lookup(self, referring_row_id, key):
|
|
"""
|
|
Helper used by LookupMapColumn to store the fact that the given key was looked up in the
|
|
process of computing the given referring_row_id.
|
|
"""
|
|
self._row_key_map.insert(referring_row_id, key)
|
|
|
|
def reset_rows(self, referring_rows):
|
|
"""
|
|
Called when starting to compute a formula, so that mappings for the given referring_rows can
|
|
be cleared as they are about to be rebuilt.
|
|
"""
|
|
# Clear out references from referring_rows.
|
|
if referring_rows == depend.ALL_ROWS:
|
|
self._row_key_map.clear()
|
|
else:
|
|
for row_id in referring_rows:
|
|
self._row_key_map.remove_left(row_id)
|
|
|
|
def reset_all(self):
|
|
"""
|
|
Called when the dependency using this relation is reset, and this relation is no longer used.
|
|
"""
|
|
# In this case also, remove it from the LookupMapColumn. Once all relations are gone, the
|
|
# lookup map can get cleaned up.
|
|
self._row_key_map.clear()
|
|
self._lookup_map._delete_relation(self._referring_node)
|
|
|
|
|
|
def extract_column_id(c):
|
|
if isinstance(c, _Contains):
|
|
return c.value
|
|
else:
|
|
return c
|