gristlabs_grist-core/sandbox/grist/lookup.py

import column
import depend
import records
import relation
import twowaymap
import usertypes

import logger
log = logger.Logger(__name__, logger.INFO)


def _extract(cell_value):
  """
  When cell_value is a Record, returns its rowId. Otherwise returns the value unchanged.
  This is to allow lookups to work with reference columns.
  """
  if isinstance(cell_value, records.Record):
    return cell_value._row_id
  return cell_value


class LookupMapColumn(column.BaseColumn):
  """
  Conceptually a LookupMapColumn is associated with a table ("target table") and maintains for
  each row a key (which is a tuple of values from the named columns), which is fast to look up.
  The lookup is generally performed in a formula in a different table ("referring table").

  LookupMapColumn is similar to a FormulaColumn in that it needs to do some computation whenever
  one of its dependencies changes: namely, it needs to update the index.

  Although it acts as a column, a LookupMapColumn isn't included among its table's columns, and
  doesn't have a column id.

  Compared to relational database, LookupMapColumn is analogous to a database index.
  """

  def __init__(self, table, col_id, col_ids_tuple):
    # Note that self._recalc_rec_method is passed in as the formula's "method".
    col_info = column.ColInfo(usertypes.Any(), is_formula=True, method=self._recalc_rec_method)
    super(LookupMapColumn, self).__init__(table, col_id, col_info)

    self._col_ids_tuple = col_ids_tuple
    self._engine = table._engine

    # Two-way map between rowIds of the target table (on the left) and key tuples (on the right).
    # Multiple rows can map to the same key. The map is populated by engine's _recompute when this
    # node is brought up-to-date.
    self._row_key_map = twowaymap.TwoWayMap(left=set, right="single")
    self._engine.invalidate_column(self)

    # Map of referring Node to _LookupRelation. Different tables may do lookups using this
    # LookupMapColumn, and that creates a dependency from other Nodes to us, with a relation
    # between referring rows and the lookup keys. This map stores these relations.
    self._lookup_relations = {}

  def _recalc_rec_method(self, rec, table):
    """
    LookupMapColumn acts as a formula column, and this method is the "formula" called whenever
    a dependency changes. If LookupMapColumn indexes columns (A,B), then a change to A or B would
    cause the LookupMapColumn to be invalidated for the corresponding rows, and brought up to date
    during formula recomputation by calling this method. It shold take O(1) time per affected row.
    """
    old_key = self._row_key_map.lookup_left(rec._row_id)

    # Note that getattr(rec, col_id) is what creates the correct dependency, as well as ensures
    # that the columns used to index by are brought up-to-date (in case they are formula columns).
    new_key = tuple(_extract(rec._get_col(_col_id)) for _col_id in self._col_ids_tuple)

    try:
      self._row_key_map.insert(rec._row_id, new_key)
    except TypeError:
      # If key is not hashable, ignore it, just remove the old_key then.
      self._row_key_map.remove(rec._row_id, old_key)
      new_key = None

    # It's OK if None is one of the values, since None will just never be found as a key.
    self._invalidate_affected({old_key, new_key})

  def unset(self, row_id):
    # This is called on record removal, and is necessary to deal with removed records.
    old_key = self._row_key_map.lookup_left(row_id)
    self._row_key_map.remove(row_id, old_key)
    self._invalidate_affected({old_key})

  def _invalidate_affected(self, affected_keys):
    # For each known relation, figure out which referring rows are affected, and invalidate them.
    # The engine will notice that there have been more invalidations, and recompute things again.
    for node, rel in self._lookup_relations.iteritems():
      affected_rows = rel.get_affected_rows_by_keys(affected_keys)
      self._engine.invalidate_records(node.table_id, affected_rows, col_ids=(node.col_id,))

  def _get_relation(self, referring_node):
    """
    Helper which returns an existing or new _LookupRelation object for the given referring Node.
    """
    rel = self._lookup_relations.get(referring_node)
    if not rel:
      rel = _LookupRelation(self, referring_node)
      self._lookup_relations[referring_node] = rel
    return rel

  def _delete_relation(self, referring_node):
    self._lookup_relations.pop(referring_node, None)
    if not self._lookup_relations:
      self._engine.mark_lookupmap_for_cleanup(self)

  def do_lookup(self, key):
    """
    Looks up key in the lookup map and returns a tuple with two elements: the set of matching
    records (as a set object, not ordered), and the Relation object for those records, relating
    the current frame to the returned records. Returns an empty set if no records match.
    """
    key = tuple(_extract(val) for val in key)
    current_frame = self._engine.get_current_frame()
    if current_frame:
      rel = self._get_relation(current_frame.node)
      rel._add_lookup(current_frame.current_row_id, key)
    else:
      rel = None

    # The _use_node call both brings LookupMapColumn up-to-date, and creates a dependency on it.
    # Relation of None isn't valid, but it happens to be unused when there is no current_frame.
    row_ids = self._row_key_map.lookup_right(key, set())
    self._engine._use_node(self.node, rel, row_ids)
    if not row_ids:
      row_ids = self._row_key_map.lookup_right(key, set())

    return row_ids, rel

  def _get_key(self, target_row_id):
    """
    Helper used by _LookupRelation to get the key associated with the given target row id.
    """
    return self._row_key_map.lookup_left(target_row_id)

  # Override various column methods, since LookupMapColumn doesn't care to store any values. To
  # outside code, it looks like a column of None's.
  def raw_get(self, value):
    return None
  def convert(self, value):
    return None
  def get_cell_value(self, row_id):
    return None
  def set(self, row_id, value):
    pass


#----------------------------------------------------------------------

class _LookupRelation(relation.Relation):
  """
  _LookupRelation maintains a mapping between rows of a table doing a lookup to the rows getting
  returned from the lookup. Lookups are implemented using a LookupMapColumn, and a _LookupRelation
  with in conjunction with its LookupMapColumn.

  _LookupRelation are created and owned by LookupMapColumn, and should not be created directly by
  other code.
  """

  def __init__(self, lookup_map, referring_node):
    super(_LookupRelation, self).__init__(referring_node.table_id, lookup_map.table_id)
    self._lookup_map = lookup_map
    self._referring_node = referring_node

    # Maps referring rows to keys, where multiple rows may map to the same key AND one row may
    # map to multiple keys (if a formula does multiple lookup calls).
    self._row_key_map = twowaymap.TwoWayMap(left=set, right=set)

  def __str__(self):
    return "_LookupRelation(%s->%s)" % (self._referring_node, self.target_table)

  def get_affected_rows(self, target_row_ids):
    # Each target row (result of a lookup by key) is associated with a key, and all rows that
    # looked up an affected key are affected by a change to any associated row. We remember which
    # rows looked up which key in self._row_key_map, so that when some target row changes to a new
    # key, we can know which referring rows need to be recomputed.
    return self.get_affected_rows_by_keys({ self._lookup_map._get_key(r) for r in target_row_ids })

  def get_affected_rows_by_keys(self, keys):
    """
    This is used by LookupMapColumn to know which rows got affected when a target row changed to
    have a different key. Keys can be any iterable. A key of None is allowed and affects nothing.
    """
    affected_rows = set()
    for key in keys:
      if key is not None:
        affected_rows.update(self._row_key_map.lookup_right(key, default=()))
    return affected_rows

  def _add_lookup(self, referring_row_id, key):
    """
    Helper used by LookupMapColumn to store the fact that the given key was looked up in the
    process of computing the given referring_row_id.
    """
    self._row_key_map.insert(referring_row_id, key)

  def reset_rows(self, referring_rows):
    """
    Called when starting to compute a formula, so that mappings for the given referring_rows can
    be cleared as they are about to be rebuilt.
    """
    # Clear out references from referring_rows.
    if referring_rows == depend.ALL_ROWS:
      self._row_key_map.clear()
    else:
      for row_id in referring_rows:
        self._row_key_map.remove_left(row_id)

  def reset_all(self):
    """
    Called when the dependency using this relation is reset, and this relation is no longer used.
    """
    # In this case also, remove it from the LookupMapColumn. Once all relations are gone, the
    # lookup map can get cleaned up.
    self._row_key_map.clear()
    self._lookup_map._delete_relation(self._referring_node)