gristlabs_grist-core/sandbox/grist/lookup.py
Paul Fitzpatrick b82eec714a (core) move data engine code to core
Summary:
this moves sandbox/grist to core, and adds a requirements.txt
file for reconstructing the content of sandbox/thirdparty.

Test Plan:
existing tests pass.
Tested core functionality manually.  Tested docker build manually.

Reviewers: dsagal

Reviewed By: dsagal

Differential Revision: https://phab.getgrist.com/D2563
2020-07-29 08:57:25 -04:00

217 lines
8.9 KiB
Python

import column
import depend
import records
import relation
import twowaymap
import usertypes
import logger
log = logger.Logger(__name__, logger.INFO)
def _extract(cell_value):
"""
When cell_value is a Record, returns its rowId. Otherwise returns the value unchanged.
This is to allow lookups to work with reference columns.
"""
if isinstance(cell_value, records.Record):
return cell_value._row_id
return cell_value
class LookupMapColumn(column.BaseColumn):
"""
Conceptually a LookupMapColumn is associated with a table ("target table") and maintains for
each row a key (which is a tuple of values from the named columns), which is fast to look up.
The lookup is generally performed in a formula in a different table ("referring table").
LookupMapColumn is similar to a FormulaColumn in that it needs to do some computation whenever
one of its dependencies changes: namely, it needs to update the index.
Although it acts as a column, a LookupMapColumn isn't included among its table's columns, and
doesn't have a column id.
Compared to relational database, LookupMapColumn is analogous to a database index.
"""
def __init__(self, table, col_id, col_ids_tuple):
# Note that self._recalc_rec_method is passed in as the formula's "method".
col_info = column.ColInfo(usertypes.Any(), is_formula=True, method=self._recalc_rec_method)
super(LookupMapColumn, self).__init__(table, col_id, col_info)
self._col_ids_tuple = col_ids_tuple
self._engine = table._engine
# Two-way map between rowIds of the target table (on the left) and key tuples (on the right).
# Multiple rows can map to the same key. The map is populated by engine's _recompute when this
# node is brought up-to-date.
self._row_key_map = twowaymap.TwoWayMap(left=set, right="single")
self._engine.invalidate_column(self)
# Map of referring Node to _LookupRelation. Different tables may do lookups using this
# LookupMapColumn, and that creates a dependency from other Nodes to us, with a relation
# between referring rows and the lookup keys. This map stores these relations.
self._lookup_relations = {}
def _recalc_rec_method(self, rec, table):
"""
LookupMapColumn acts as a formula column, and this method is the "formula" called whenever
a dependency changes. If LookupMapColumn indexes columns (A,B), then a change to A or B would
cause the LookupMapColumn to be invalidated for the corresponding rows, and brought up to date
during formula recomputation by calling this method. It shold take O(1) time per affected row.
"""
old_key = self._row_key_map.lookup_left(rec._row_id)
# Note that getattr(rec, col_id) is what creates the correct dependency, as well as ensures
# that the columns used to index by are brought up-to-date (in case they are formula columns).
new_key = tuple(_extract(rec._get_col(_col_id)) for _col_id in self._col_ids_tuple)
try:
self._row_key_map.insert(rec._row_id, new_key)
except TypeError:
# If key is not hashable, ignore it, just remove the old_key then.
self._row_key_map.remove(rec._row_id, old_key)
new_key = None
# It's OK if None is one of the values, since None will just never be found as a key.
self._invalidate_affected({old_key, new_key})
def unset(self, row_id):
# This is called on record removal, and is necessary to deal with removed records.
old_key = self._row_key_map.lookup_left(row_id)
self._row_key_map.remove(row_id, old_key)
self._invalidate_affected({old_key})
def _invalidate_affected(self, affected_keys):
# For each known relation, figure out which referring rows are affected, and invalidate them.
# The engine will notice that there have been more invalidations, and recompute things again.
for node, rel in self._lookup_relations.iteritems():
affected_rows = rel.get_affected_rows_by_keys(affected_keys)
self._engine.invalidate_records(node.table_id, affected_rows, col_ids=(node.col_id,))
def _get_relation(self, referring_node):
"""
Helper which returns an existing or new _LookupRelation object for the given referring Node.
"""
rel = self._lookup_relations.get(referring_node)
if not rel:
rel = _LookupRelation(self, referring_node)
self._lookup_relations[referring_node] = rel
return rel
def _delete_relation(self, referring_node):
self._lookup_relations.pop(referring_node, None)
if not self._lookup_relations:
self._engine.mark_lookupmap_for_cleanup(self)
def do_lookup(self, key):
"""
Looks up key in the lookup map and returns a tuple with two elements: the set of matching
records (as a set object, not ordered), and the Relation object for those records, relating
the current frame to the returned records. Returns an empty set if no records match.
"""
key = tuple(_extract(val) for val in key)
current_frame = self._engine.get_current_frame()
if current_frame:
rel = self._get_relation(current_frame.node)
rel._add_lookup(current_frame.current_row_id, key)
else:
rel = None
# The _use_node call both brings LookupMapColumn up-to-date, and creates a dependency on it.
# Relation of None isn't valid, but it happens to be unused when there is no current_frame.
row_ids = self._row_key_map.lookup_right(key, set())
self._engine._use_node(self.node, rel, row_ids)
if not row_ids:
row_ids = self._row_key_map.lookup_right(key, set())
return row_ids, rel
def _get_key(self, target_row_id):
"""
Helper used by _LookupRelation to get the key associated with the given target row id.
"""
return self._row_key_map.lookup_left(target_row_id)
# Override various column methods, since LookupMapColumn doesn't care to store any values. To
# outside code, it looks like a column of None's.
def raw_get(self, value):
return None
def convert(self, value):
return None
def get_cell_value(self, row_id):
return None
def set(self, row_id, value):
pass
#----------------------------------------------------------------------
class _LookupRelation(relation.Relation):
"""
_LookupRelation maintains a mapping between rows of a table doing a lookup to the rows getting
returned from the lookup. Lookups are implemented using a LookupMapColumn, and a _LookupRelation
with in conjunction with its LookupMapColumn.
_LookupRelation are created and owned by LookupMapColumn, and should not be created directly by
other code.
"""
def __init__(self, lookup_map, referring_node):
super(_LookupRelation, self).__init__(referring_node.table_id, lookup_map.table_id)
self._lookup_map = lookup_map
self._referring_node = referring_node
# Maps referring rows to keys, where multiple rows may map to the same key AND one row may
# map to multiple keys (if a formula does multiple lookup calls).
self._row_key_map = twowaymap.TwoWayMap(left=set, right=set)
def __str__(self):
return "_LookupRelation(%s->%s)" % (self._referring_node, self.target_table)
def get_affected_rows(self, target_row_ids):
# Each target row (result of a lookup by key) is associated with a key, and all rows that
# looked up an affected key are affected by a change to any associated row. We remember which
# rows looked up which key in self._row_key_map, so that when some target row changes to a new
# key, we can know which referring rows need to be recomputed.
return self.get_affected_rows_by_keys({ self._lookup_map._get_key(r) for r in target_row_ids })
def get_affected_rows_by_keys(self, keys):
"""
This is used by LookupMapColumn to know which rows got affected when a target row changed to
have a different key. Keys can be any iterable. A key of None is allowed and affects nothing.
"""
affected_rows = set()
for key in keys:
if key is not None:
affected_rows.update(self._row_key_map.lookup_right(key, default=()))
return affected_rows
def _add_lookup(self, referring_row_id, key):
"""
Helper used by LookupMapColumn to store the fact that the given key was looked up in the
process of computing the given referring_row_id.
"""
self._row_key_map.insert(referring_row_id, key)
def reset_rows(self, referring_rows):
"""
Called when starting to compute a formula, so that mappings for the given referring_rows can
be cleared as they are about to be rebuilt.
"""
# Clear out references from referring_rows.
if referring_rows == depend.ALL_ROWS:
self._row_key_map.clear()
else:
for row_id in referring_rows:
self._row_key_map.remove_left(row_id)
def reset_all(self):
"""
Called when the dependency using this relation is reset, and this relation is no longer used.
"""
# In this case also, remove it from the LookupMapColumn. Once all relations are gone, the
# lookup map can get cleaned up.
self._row_key_map.clear()
self._lookup_map._delete_relation(self._referring_node)