(core) Implement PREVIOUS/NEXT/RANK and lookupRecords().find.* methods.

Summary:
- `lookupRecords()` now allows efficient search in sorted results, with
  the syntax  `lookupRecords(..., order_by="-Date").find.le($Date)`. This will find the record with the nearest date that's <= `$Date`.
- The `find.*` methods are `le`, `lt`, `ge`, `gt`, and `eq`. All have O(log N) performance.
- `PREVIOUS(rec, group_by=..., order_by=...)` finds the previous record to rec, according to `group_by` / `order_by`, in amortized O(log N) time. For example, `PREVIOUS(rec, group_by="Account", order_by="Date")`.
- `PREVIOUS(rec, order_by=None)` finds the previous record in the full table, sorted by the `manualSort` column, to match the order visible in the unsorted table.
- `NEXT(...)` is just like `PREVIOUS(...)` but finds the next record.
- `RANK(rec, group_by=..., order_by=..., order="asc")` returns the rank of the record within the group, starting with 1. Order can be `"asc"` (default) or `"desc"`.
- The `order_by` argument in `lookupRecords`, and the new functions now supports tuples, as well as the "-" prefix to reverse order, e.g. `("Category", "-Date")`.
- New functions are only available in Python3, for a minor reason (to support keyword-only arguments for `group_by` and `order_by`) and also as a nudge to Python2 users to update.

- Includes fixes for several situations related to lookups that used to cause quadratic complexity.

Test Plan:
- New performance check that sorted lookups don't add quadratic complexity.
- Tests added for lookup find.* methods, and for PREVIOUS/NEXT/RANK.
- Tests added that renaming columns updates `order_by` and `group_by` arguments, and attributes on results (e.g. `PREVIOUS(...).ColId`) appropriately.
- Python3 tests can now produce verbose output when VERBOSE=1 and -v are given.

Reviewers: jarek, georgegevoian

Reviewed By: jarek, georgegevoian

Subscribers: paulfitz, jarek

Differential Revision: https://phab.getgrist.com/D4265
This commit is contained in:
Dmitry S 2024-07-16 23:43:53 -04:00
parent 063df75204
commit f0d0a07295
22 changed files with 2291 additions and 199 deletions

View File

@ -199,6 +199,8 @@ def infer(node):
_lookup_method_names = ('lookupOne', 'lookupRecords') _lookup_method_names = ('lookupOne', 'lookupRecords')
_prev_next_functions = ('PREVIOUS', 'NEXT', 'RANK')
_lookup_find_methods = ('lt', 'le', 'gt', 'ge', 'eq', 'previous', 'next')
def _is_table(node): def _is_table(node):
""" """
@ -323,6 +325,50 @@ class InferAllReference(InferenceTip):
yield astroid.bases.Instance(infer(node.expr)) yield astroid.bases.Instance(infer(node.expr))
class InferLookupFindResult(InferenceTip):
"""
Inference helper to treat the return value of `Table.lookupRecords(...).find.lt(...)` as
returning instances of table `Table`.
"""
node_class = astroid.nodes.Call
@classmethod
def filter(cls, node):
func = node.func
if isinstance(func, astroid.nodes.Attribute) and func.attrname in _lookup_find_methods:
p_expr = func.expr
if isinstance(p_expr, astroid.nodes.Attribute) and p_expr.attrname in ('find', '_find'):
obj = infer(p_expr.expr)
if isinstance(obj, astroid.bases.Instance) and _is_table(obj._proxied):
return True
return False
@classmethod
def infer(cls, node, context=None):
# A bit of fuzziness here: node.func.expr.expr is the result of lookupRecords(). It so happens
# that at the moment it is already of type Instance(table), as if a single record rather than
# a list, to support recognizing `.ColId` attributes. So we return the same type.
yield infer(node.func.expr.expr)
class InferPrevNextResult(InferenceTip):
"""
Inference helper to treat the return value of PREVIOUS(...) and NEXT(...) as returning instances
of table `Table`.
"""
node_class = astroid.nodes.Call
@classmethod
def filter(cls, node):
return (isinstance(node.func, astroid.nodes.Name) and
node.func.name in _prev_next_functions and
node.args)
@classmethod
def infer(cls, node, context=None):
yield infer(node.args[0])
class InferComprehensionBase(InferenceTip): class InferComprehensionBase(InferenceTip):
node_class = astroid.nodes.AssignName node_class = astroid.nodes.AssignName
reference_inference_class = None reference_inference_class = None
@ -397,7 +443,8 @@ def parse_grist_names(builder):
code_text = builder.get_text() code_text = builder.get_text()
with use_inferences(InferReferenceColumn, InferReferenceFormula, InferLookupReference, with use_inferences(InferReferenceColumn, InferReferenceFormula, InferLookupReference,
InferLookupComprehension, InferAllReference, InferAllComprehension): InferLookupComprehension, InferAllReference, InferAllComprehension,
InferLookupFindResult, InferPrevNextResult):
atok = asttokens.ASTText(code_text, tree=astroid.builder.parse(code_text)) atok = asttokens.ASTText(code_text, tree=astroid.builder.parse(code_text))
def make_tuple(start, end, table_id, col_id): def make_tuple(start, end, table_id, col_id):
@ -413,6 +460,13 @@ def parse_grist_names(builder):
return (in_value, in_patch.start, table_id, col_id) return (in_value, in_patch.start, table_id, col_id)
return None return None
# Helper for collecting column IDs mentioned in order_by/group_by parameters, so that
# those can be updated when a column is renamed.
def list_order_group_by_tuples(table_id, node):
for start, end, col_id in parse_order_group_by(atok, node):
if code_text[start:end] == col_id:
yield make_tuple(start, end, table_id, col_id)
parsed_names = [] parsed_names = []
for node in asttokens.util.walk(atok.tree, include_joined_str=True): for node in asttokens.util.walk(atok.tree, include_joined_str=True):
if isinstance(node, astroid.nodes.Name): if isinstance(node, astroid.nodes.Name):
@ -430,21 +484,53 @@ def parse_grist_names(builder):
start = end - len(node.attrname) start = end - len(node.attrname)
if code_text[start:end] == node.attrname: if code_text[start:end] == node.attrname:
parsed_names.append(make_tuple(start, end, cls.name, node.attrname)) parsed_names.append(make_tuple(start, end, cls.name, node.attrname))
elif isinstance(node, astroid.nodes.Keyword): elif isinstance(node, astroid.nodes.Keyword):
func = node.parent.func func = node.parent.func
if isinstance(func, astroid.nodes.Attribute) and func.attrname in _lookup_method_names: if isinstance(func, astroid.nodes.Attribute) and func.attrname in _lookup_method_names:
obj = infer(func.expr) obj = infer(func.expr)
if _is_table(obj) and node.arg is not None: # Skip **kwargs, which have arg value of None if _is_table(obj) and node.arg is not None: # Skip **kwargs, which have arg value of None
table_id = obj.name
start = atok.get_text_range(node)[0] start = atok.get_text_range(node)[0]
end = start + len(node.arg) end = start + len(node.arg)
if code_text[start:end] == node.arg: if node.arg == 'order_by':
parsed_names.append(make_tuple(start, end, obj.name, node.arg)) # Rename values in 'order_by' arguments to lookup methods.
parsed_names.extend(list_order_group_by_tuples(table_id, node.value))
elif code_text[start:end] == node.arg:
parsed_names.append(make_tuple(start, end, table_id, node.arg))
elif (isinstance(func, astroid.nodes.Name)
# Rename values in 'order_by' and 'group_by' arguments to PREVIOUS() and NEXT().
and func.name in _prev_next_functions
and node.arg in ('order_by', 'group_by')
and node.parent.args):
obj = infer(node.parent.args[0])
if isinstance(obj, astroid.bases.Instance):
cls = obj._proxied
if _is_table(cls):
table_id = cls.name
parsed_names.extend(list_order_group_by_tuples(table_id, node.value))
return [name for name in parsed_names if name] return [name for name in parsed_names if name]
code_filename = "usercode" code_filename = "usercode"
def parse_order_group_by(atok, node):
"""
order_by and group_by parameters take the form of a column ID string, optionally prefixed by a
"-", or a tuple of them. We parse out the list of (start, end, col_id) tuples for each column ID
mentioned, to support automatic formula updates when a mentioned column is renamed.
"""
if isinstance(node, astroid.nodes.Const):
if isinstance(node.value, six.string_types):
start, end = atok.get_text_range(node)
# Account for opening/closing quote, and optional leading "-".
return [(start + 2, end - 1, node.value[1:]) if node.value.startswith("-") else
(start + 1, end - 1, node.value)]
elif isinstance(node, astroid.nodes.Tuple):
return [t for e in node.elts for t in parse_order_group_by(atok, e)]
return []
def save_to_linecache(source_code): def save_to_linecache(source_code):
""" """

View File

@ -1,3 +1,4 @@
import os
import sys import sys
import six import six
@ -16,3 +17,15 @@ class FakeStdStreams(object):
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout = self._orig_stdout sys.stdout = self._orig_stdout
sys.stderr = self._orig_stderr sys.stderr = self._orig_stderr
if os.environ.get('VERBOSE'):
# Don't disable stdio streams if VERBOSE is on. This is helpful when debugging tests with
# logging messages or print() calls.
class DummyFakeStdStreams(object):
def __enter__(self):
pass
def __exit__(self, exc_type, exc_val, exc_tb):
pass
FakeStdStreams = DummyFakeStdStreams

View File

@ -1,4 +1,6 @@
# pylint: disable=wildcard-import # pylint: disable=wildcard-import, unused-argument
import six
from .date import * from .date import *
from .info import * from .info import *
from .logical import * from .logical import *
@ -8,5 +10,17 @@ from .stats import *
from .text import * from .text import *
from .schedule import * from .schedule import *
if six.PY3:
# These new functions use Python3-specific syntax.
from .prevnext import * # pylint: disable=import-error
else:
# In Python2, only expose them to guide the user to upgrade.
def PREVIOUS(rec, group_by=None, order_by=None):
raise NotImplementedError("Update engine to Python3 to use PREVIOUS, NEXT, or RANK")
def NEXT(rec, group_by=None, order_by=None):
raise NotImplementedError("Update engine to Python3 to use PREVIOUS, NEXT, or RANK")
def RANK(rec, group_by=None, order_by=None, order="asc"):
raise NotImplementedError("Update engine to Python3 to use PREVIOUS, NEXT, or RANK")
# Export all uppercase names, for use with `from functions import *`. # Export all uppercase names, for use with `from functions import *`.
__all__ = [k for k in dir() if not k.startswith('_') and k.isupper()] __all__ = [k for k in dir() if not k.startswith('_') and k.isupper()]

View File

@ -0,0 +1,61 @@
def PREVIOUS(rec, *, group_by=(), order_by):
"""
Finds the previous record in the table according to the order specified by `order_by`, and
grouping specified by `group_by`. Each of these arguments may be a column ID or a tuple of
column IDs, and `order_by` allows column IDs to be prefixed with "-" to reverse sort order.
For example,
- `PREVIOUS(rec, order_by="Date")` will return the previous record when the list of records is
sorted by the Date column.
- `PREVIOUS(rec, order_by="-Date")` will return the previous record when the list is sorted by
the Date column in descending order.
- `PREVIOUS(rec, group_by="Account", order_by="Date")` will return the previous record with the
same Account as `rec`, when records are filtered by the Account of `rec` and sorted by Date.
When multiple records have the same `order_by` values (e.g. the same Date in the examples above),
the order is determined by the relative position of rows in views. This is done internally by
falling back to the special column `manualSort` and the row ID column `id`.
Use `order_by=None` to find the previous record in an unsorted table (when rows may be
rearranged by dragging them manually). For example,
- `PREVIOUS(rec, order_by=None)` will return the previous record in the unsorted list of records.
You may specify multiple column IDs as a tuple, for both `group_by` and `order_by`. This can be
used to match views sorted by multiple columns. For example:
- `PREVIOUS(rec, group_by=("Account", "Year"), order_by=("Date", "-Amount"))`
"""
return _sorted_lookup(rec, group_by=group_by, order_by=order_by)._find.previous(rec)
def NEXT(rec, *, group_by=(), order_by):
"""
Finds the next record in the table according to the order specified by `order_by`, and
grouping specified by `group_by`. See [`PREVIOUS`](#previous) for details.
"""
return _sorted_lookup(rec, group_by=group_by, order_by=order_by)._find.next(rec)
def RANK(rec, *, group_by=(), order_by, order="asc"):
"""
Returns the rank (or position) of this record in the table according to the order specified by
`order_by`, and grouping specified by `group_by`. See [`PREVIOUS`](#previous) for details of
these parameters.
The `order` parameter may be "asc" (which is the default) or "desc".
When `order` is "asc" or omitted, the first record in the group in the sorted order would have
the rank of 1. When `order` is "desc", the last record in the sorted order would have the rank
of 1.
If there are multiple groups, there will be multiple records with the same rank. In particular,
each group will have a record with rank 1.
For example, `RANK(rec, group_by="Year", order_by="Score", order="desc")` will return the rank of
the current record (`rec`) among all the records in its table for the same year, ordered by
score.
"""
return _sorted_lookup(rec, group_by=group_by, order_by=order_by)._find.rank(rec, order=order)
def _sorted_lookup(rec, *, group_by, order_by):
if isinstance(group_by, str):
group_by = (group_by,)
return rec._table.lookup_records(**{c: getattr(rec, c) for c in group_by}, order_by=order_by)

View File

@ -495,11 +495,6 @@ def QUARTILE(data, quartile_number):
"""Returns a value nearest to a specified quartile of a dataset.""" """Returns a value nearest to a specified quartile of a dataset."""
raise NotImplementedError() raise NotImplementedError()
@unimplemented
def RANK(value, data, is_ascending=None):
"""Returns the rank of a specified value in a dataset."""
raise NotImplementedError()
@unimplemented @unimplemented
def RANK_AVG(value, data, is_ascending=None): def RANK_AVG(value, data, is_ascending=None):
"""Returns the rank of a specified value in a dataset. If there is more than one entry of the same value in the dataset, the average rank of the entries will be returned.""" """Returns the rank of a specified value in a dataset. If there is more than one entry of the same value in the dataset, the average rank of the entries will be returned."""

View File

@ -1,3 +1,46 @@
# Lookups are hard.
#
# Example to explain the relationship of various lookup helpers.
# Let's say we have this formula (notation [People.Rate] means a column "Rate" in table "People").
# [People.Rate] = Rates.lookupRecords(Email=$Email, sort_by="Date")
#
# Conceptually, a good representation is to think of a helper table "UniqueRateEmails", which
# contains a list of unique Email values in the table Rates. These are all the values that
# lookupRecords() can find.
#
# So conceptually, it helps to imagine a table with the following columns:
# [UniqueRateEmails.Email] = each Email in Rates
# [UniqueRateEmails.lookedUpRates] = {r.id for r in Rates if r.Email == $Email}
# -- this is the set of row_ids of all Rates with the email of this UniqueRateEmails row.
# [UniqueRateEmails.lookedUpRatesSorted] = sorted($lookedUpRates) # sorted by Date.
#
# We don't _actually_ create a helper table. (That would be a lot over overhead from all the extra
# tracking for recalculations.)
#
# We have two helper columns in the Rates table (the one in which we are looking up):
# [Rate.#lookup#Email] (LookupMapColumn)
# This is responsible to know which Rate rows correspond to which Emails (using a
# SimpleLookupMapping helper). For any email, it can produce the set of row_ids of Rate
# records.
#
# - It depends on [Rate.Email], so that changes to Email cause a recalculation.
# - When it gets recalculated, it
# - updates internal maps.
# - invalidates affected callers.
#
# [Rate.#lookup#Email#Date] (SortedLookupMapColumn)
# For each set of Rate results, this maintains a list of Rate row_ids sorted by Date.
#
# - It depends on [Rate.Date] so that changes to Date cause a recalculation.
# - When its do_lookup() is called, it creates
# - a dependency between the caller [People.Rate] and itself [Rate.#lookup#Email#Date]
# using a special _LookupRelation (which it keeps track of).
# - a dependency between the caller [People.Rate] and unsorted lookup [Rate.#lookup#Email]
# using another _LookupRelation (which [Rate.#lookup#Email] keeps track of).
# - When it gets recalculated, which means that order of the lookup result has changed:
# - it clears the cached sorted version of the lookup result
# - uses its _LookupRelations to invalidate affected callers.
import itertools import itertools
import logging import logging
from abc import abstractmethod from abc import abstractmethod
@ -8,24 +51,29 @@ import column
import depend import depend
import records import records
import relation import relation
from sort_key import make_sort_key
import twowaymap import twowaymap
from twowaymap import LookupSet
import usertypes import usertypes
from functions.lookup import _Contains from functions.lookup import _Contains
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def _extract(cell_value): class NoValueColumn(column.BaseColumn):
""" # Override various column methods, since (Sorted)LookupMapColumn doesn't care to store any
When cell_value is a Record, returns its rowId. Otherwise returns the value unchanged. # values. To outside code, it looks like a column of None's.
This is to allow lookups to work with reference columns. def raw_get(self, row_id):
""" return None
if isinstance(cell_value, records.Record): def convert(self, value_to_convert):
return cell_value._row_id return None
return cell_value def get_cell_value(self, row_id, restore=False):
return None
def set(self, row_id, value):
pass
class BaseLookupMapColumn(column.BaseColumn): class LookupMapColumn(NoValueColumn):
""" """
Conceptually a LookupMapColumn is associated with a table ("target table") and maintains for Conceptually a LookupMapColumn is associated with a table ("target table") and maintains for
each row a key (which is a tuple of values from the named columns), which is fast to look up. each row a key (which is a tuple of values from the named columns), which is fast to look up.
@ -43,128 +91,208 @@ class BaseLookupMapColumn(column.BaseColumn):
def __init__(self, table, col_id, col_ids_tuple): def __init__(self, table, col_id, col_ids_tuple):
# Note that self._recalc_rec_method is passed in as the formula's "method". # Note that self._recalc_rec_method is passed in as the formula's "method".
col_info = column.ColInfo(usertypes.Any(), is_formula=True, method=self._recalc_rec_method) col_info = column.ColInfo(usertypes.Any(), is_formula=True, method=self._recalc_rec_method)
super(BaseLookupMapColumn, self).__init__(table, col_id, col_info) super(LookupMapColumn, self).__init__(table, col_id, col_info)
self._col_ids_tuple = col_ids_tuple # For performance, prefer SimpleLookupMapping when no CONTAINS is used in lookups.
self._engine = table._engine if any(isinstance(col_id, _Contains) for col_id in col_ids_tuple):
self._mapping = ContainsLookupMapping(col_ids_tuple)
else:
self._mapping = SimpleLookupMapping(col_ids_tuple)
# Two-way map between rowIds of the target table (on the left) and key tuples (on the right). engine = table._engine
# Multiple rows can naturally map to the same key. engine.invalidate_column(self)
# Multiple keys can map to the same row if CONTAINS() is used self._relation_tracker = _RelationTracker(engine, self)
# The map is populated by engine's _recompute when this
# node is brought up-to-date.
self._row_key_map = self._make_row_key_map()
self._engine.invalidate_column(self)
# Map of referring Node to _LookupRelation. Different tables may do lookups using this def _recalc_rec_method(self, rec, _table):
# LookupMapColumn, and that creates a dependency from other Nodes to us, with a relation
# between referring rows and the lookup keys. This map stores these relations.
self._lookup_relations = {}
@abstractmethod
def _make_row_key_map(self):
raise NotImplementedError
@abstractmethod
def _recalc_rec_method(self, rec, table):
""" """
LookupMapColumn acts as a formula column, and this method is the "formula" called whenever LookupMapColumn acts as a formula column, and this method is the "formula" called whenever
a dependency changes. If LookupMapColumn indexes columns (A,B), then a change to A or B would a dependency changes. If LookupMapColumn indexes columns (A,B), then a change to A or B would
cause the LookupMapColumn to be invalidated for the corresponding rows, and brought up to date cause the LookupMapColumn to be invalidated for the corresponding rows, and brought up to date
during formula recomputation by calling this method. It shold take O(1) time per affected row. during formula recomputation by calling this method. It shold take O(1) time per affected row.
""" """
raise NotImplementedError affected_keys = self._mapping.update_record(rec)
self._relation_tracker.invalidate_affected_keys(affected_keys)
@abstractmethod
def _get_keys(self, target_row_id):
"""
Get the keys associated with the given target row id.
"""
raise NotImplementedError
def unset(self, row_id):
# This is called on record removal, and is necessary to deal with removed records.
old_keys = self._get_keys(row_id)
for old_key in old_keys:
self._row_key_map.remove(row_id, old_key)
self._invalidate_affected(old_keys)
def _invalidate_affected(self, affected_keys):
# For each known relation, figure out which referring rows are affected, and invalidate them.
# The engine will notice that there have been more invalidations, and recompute things again.
for node, rel in six.iteritems(self._lookup_relations):
affected_rows = rel.get_affected_rows_by_keys(affected_keys)
self._engine.invalidate_records(node.table_id, affected_rows, col_ids=(node.col_id,))
def _get_relation(self, referring_node):
"""
Helper which returns an existing or new _LookupRelation object for the given referring Node.
"""
rel = self._lookup_relations.get(referring_node)
if not rel:
rel = _LookupRelation(self, referring_node)
self._lookup_relations[referring_node] = rel
return rel
def _delete_relation(self, referring_node):
self._lookup_relations.pop(referring_node, None)
if not self._lookup_relations:
self._engine.mark_lookupmap_for_cleanup(self)
def _do_fast_empty_lookup(self): def _do_fast_empty_lookup(self):
""" """
Simplified version of do_lookup for a lookup column with no key columns Simplified version of do_lookup for a lookup column with no key columns
to make Table._num_rows as fast as possible. to make Table._num_rows as fast as possible.
""" """
return self._row_key_map.lookup_right((), default=()) return self._mapping.lookup_by_key((), default=())
def _do_fast_lookup(self, key):
key = tuple(_extract(val) for val in key)
return self._mapping.lookup_by_key(key, default=LookupSet())
@property
def sort_key(self):
return None
def do_lookup(self, key): def do_lookup(self, key):
""" """
Looks up key in the lookup map and returns a tuple with two elements: the set of matching Looks up key in the lookup map and returns a tuple with two elements: the list of matching
records (as a set object, not ordered), and the Relation object for those records, relating records (sorted), and the Relation object for those records, relating
the current frame to the returned records. Returns an empty set if no records match. the current frame to the returned records. Returns an empty set if no records match.
""" """
key = tuple(_extract(val) for val in key) key = tuple(_extract(val) for val in key)
engine = self._engine row_ids, rel = self._do_lookup_with_sort(key, (), None)
if engine._is_current_node_formula:
rel = self._get_relation(engine._current_node)
rel._add_lookup(engine._current_row_id, key)
else:
rel = None
# The _use_node call both brings LookupMapColumn up-to-date, and creates a dependency on it.
# Relation of None isn't valid, but it happens to be unused when there is no current_frame.
engine._use_node(self.node, rel)
row_ids = self._row_key_map.lookup_right(key, set())
return row_ids, rel return row_ids, rel
# Override various column methods, since LookupMapColumn doesn't care to store any values. To def _do_lookup_with_sort(self, key, sort_spec, sort_key):
# outside code, it looks like a column of None's. rel = self._relation_tracker.update_relation_from_current_node(key)
def raw_get(self, value): row_id_set = self._do_fast_lookup(key)
return None row_ids = row_id_set.sorted_versions.get(sort_spec)
def convert(self, value): if row_ids is None:
return None row_ids = sorted(row_id_set, key=sort_key)
def get_cell_value(self, row_id): row_id_set.sorted_versions[sort_spec] = row_ids
return None return row_ids, rel
def set(self, row_id, value):
pass
# For performance, prefer SimpleLookupMapColumn when no CONTAINS is used def _reset_sorted_versions(self, rec, sort_spec):
# in lookups, although the two implementations should be equivalent # For the lookup keys in rec, find the associated LookupSets, and clear the cached
# See also table._add_update_summary_col # .sorted_versions entry for the given sort_spec. Used when only sort-by columns change.
# Returns the set of affected keys.
new_keys = set(self._mapping.get_new_keys_iter(rec))
for key in new_keys:
row_ids = self._mapping.lookup_by_key(key, default=LookupSet())
row_ids.sorted_versions.pop(sort_spec, None)
return new_keys
class SimpleLookupMapColumn(BaseLookupMapColumn): def unset(self, row_id):
# This is called on record removal, and is necessary to deal with removed records.
affected_keys = self._mapping.remove_row_id(row_id)
self._relation_tracker.invalidate_affected_keys(affected_keys)
def _get_keys(self, row_id):
# For _LookupRelation to know which keys are affected when the given looked-up row_id changes.
return self._mapping.get_mapped_keys(row_id)
#----------------------------------------------------------------------
class SortedLookupMapColumn(NoValueColumn):
"""
A SortedLookupMapColumn is associated with a LookupMapColumn and a set of columns used for
sorting. It lives in the table containing the looked-up data. It is like a FormulaColumn in that
it has a method triggered for a record whenever any of the sort columns change for that record.
This method, in turn, invalidates lookups using the relations maintained by the LookupMapColumn.
"""
def __init__(self, table, col_id, lookup_col, sort_spec):
# Before creating the helper column, check that all dependencies are actually valid col_ids.
sort_col_ids = [(c[1:] if c.startswith('-') else c) for c in sort_spec]
for c in sort_col_ids:
if not table.has_column(c):
raise KeyError("Table %s has no column %s" % (table.table_id, c))
# Note that different LookupSortHelperColumns may exist with the same sort_col_ids but
# different sort_keys because they could differ in order of columns and ASC/DESC flags.
col_info = column.ColInfo(usertypes.Any(), is_formula=True, method=self._recalc_rec_method)
super(SortedLookupMapColumn, self).__init__(table, col_id, col_info)
self._lookup_col = lookup_col
self._sort_spec = sort_spec
self._sort_col_ids = sort_col_ids
self._sort_key = make_sort_key(table, sort_spec)
self._engine = table._engine
self._engine.invalidate_column(self)
self._relation_tracker = _RelationTracker(self._engine, self)
@property
def sort_key(self):
return self._sort_key
def do_lookup(self, key):
"""
Looks up key in the lookup map and returns a tuple with two elements: the list of matching
records (sorted), and the Relation object for those records, relating
the current frame to the returned records. Returns an empty set if no records match.
"""
key = tuple(_extract(val) for val in key)
self._relation_tracker.update_relation_from_current_node(key)
row_ids, rel = self._lookup_col._do_lookup_with_sort(key, self._sort_spec, self._sort_key)
return row_ids, rel
def _recalc_rec_method(self, rec, _table):
# Create dependencies on all the sort columns.
for col_id in self._sort_col_ids:
getattr(rec, col_id)
affected_keys = self._lookup_col._reset_sorted_versions(rec, self._sort_spec)
self._relation_tracker.invalidate_affected_keys(affected_keys)
def _get_keys(self, row_id):
# For _LookupRelation to know which keys are affected when the given looked-up row_id changes.
return self._lookup_col._get_keys(row_id)
#----------------------------------------------------------------------
class BaseLookupMapping(object):
def __init__(self, col_ids_tuple):
self._col_ids_tuple = col_ids_tuple
# Two-way map between rowIds of the target table (on the left) and key tuples (on the right).
# Multiple rows can naturally map to the same key.
# A single row can map to multiple keys when CONTAINS() is used.
self._row_key_map = self._make_row_key_map()
@abstractmethod
def _make_row_key_map(self): def _make_row_key_map(self):
return twowaymap.TwoWayMap(left=set, right="single") raise NotImplementedError
def _recalc_rec_method(self, rec, table): @abstractmethod
old_key = self._row_key_map.lookup_left(rec._row_id) def get_mapped_keys(self, row_id):
"""
Get the set of keys associated with the given target row id, as stored in our mapping.
"""
raise NotImplementedError
@abstractmethod
def get_new_keys_iter(self, rec):
"""
Returns an iterator over the current value of all keys represented by the given record.
Typically, it's just one key, but when list-type columns are involved, then could be several.
"""
raise NotImplementedError
@abstractmethod
def update_record(self, rec):
"""
Update the mapping to reflect the current value of all keys represented by the given record,
and return all the affected keys, i.e. the set of all the keys that changed (old and new).
"""
raise NotImplementedError
def remove_row_id(self, row_id):
old_keys = self.get_mapped_keys(row_id)
for old_key in old_keys:
self._row_key_map.remove(row_id, old_key)
return old_keys
def lookup_by_key(self, key, default=None):
return self._row_key_map.lookup_right(key, default=default)
class SimpleLookupMapping(BaseLookupMapping):
def _make_row_key_map(self):
return twowaymap.TwoWayMap(left=LookupSet, right="single")
def _get_mapped_key(self, row_id):
return self._row_key_map.lookup_left(row_id)
def get_mapped_keys(self, row_id):
return {self._get_mapped_key(row_id)}
def get_new_keys_iter(self, rec):
# Note that getattr(rec, _col_id) is what creates the correct dependency, as well as ensures # Note that getattr(rec, _col_id) is what creates the correct dependency, as well as ensures
# that the columns used to index by are brought up-to-date (in case they are formula columns). # that the columns used to index by are brought up-to-date (in case they are formula columns).
new_key = tuple(_extract(getattr(rec, _col_id)) for _col_id in self._col_ids_tuple) return [tuple(_extract(getattr(rec, _col_id)) for _col_id in self._col_ids_tuple)]
def update_record(self, rec):
old_key = self._get_mapped_key(rec._row_id)
new_key = self.get_new_keys_iter(rec)[0]
if new_key == old_key:
return set()
try: try:
self._row_key_map.insert(rec._row_id, new_key) self._row_key_map.insert(rec._row_id, new_key)
except TypeError: except TypeError:
@ -172,18 +300,20 @@ class SimpleLookupMapColumn(BaseLookupMapColumn):
self._row_key_map.remove(rec._row_id, old_key) self._row_key_map.remove(rec._row_id, old_key)
new_key = None new_key = None
# It's OK if None is one of the values, since None will just never be found as a key. # Both keys are affected when present.
self._invalidate_affected({old_key, new_key}) return {k for k in (old_key, new_key) if k is not None}
def _get_keys(self, target_row_id):
return {self._row_key_map.lookup_left(target_row_id)}
class ContainsLookupMapColumn(BaseLookupMapColumn): class ContainsLookupMapping(BaseLookupMapping):
def _make_row_key_map(self): def _make_row_key_map(self):
return twowaymap.TwoWayMap(left=set, right=set) return twowaymap.TwoWayMap(left=LookupSet, right=set)
def _recalc_rec_method(self, rec, table): def get_mapped_keys(self, row_id):
# Need to copy the return value since it's the actual set
# stored in the map and may be modified
return set(self._row_key_map.lookup_left(row_id, ()))
def get_new_keys_iter(self, rec):
# Create a key in the index for every combination of values in columns # Create a key in the index for every combination of values in columns
# looked up with CONTAINS() # looked up with CONTAINS()
new_keys_groups = [] new_keys_groups = []
@ -211,27 +341,79 @@ class ContainsLookupMapColumn(BaseLookupMapColumn):
new_keys_groups.append([_extract(v) for v in group]) new_keys_groups.append([_extract(v) for v in group])
new_keys = set(itertools.product(*new_keys_groups)) return itertools.product(*new_keys_groups)
def update_record(self, rec):
new_keys = set(self.get_new_keys_iter(rec))
row_id = rec._row_id row_id = rec._row_id
old_keys = self._get_keys(row_id) old_keys = self.get_mapped_keys(row_id)
for old_key in old_keys - new_keys: for old_key in old_keys - new_keys:
self._row_key_map.remove(row_id, old_key) self._row_key_map.remove(row_id, old_key)
for new_key in new_keys - old_keys: for new_key in new_keys - old_keys:
self._row_key_map.insert(row_id, new_key) self._row_key_map.insert(row_id, new_key)
# Invalidate all keys which were either inserted or removed # Affected keys are those that were either newly inserted or newly removed.
self._invalidate_affected(new_keys ^ old_keys) return new_keys ^ old_keys
def _get_keys(self, target_row_id):
# Need to copy the return value since it's the actual set
# stored in the map and may be modified
return set(self._row_key_map.lookup_left(target_row_id, ()))
#---------------------------------------------------------------------- #----------------------------------------------------------------------
class _RelationTracker(object):
"""
Helper used by (Sorted)LookupMapColumn to keep track of the _LookupRelations between referring
nodes and that column.
"""
def __init__(self, engine, lookup_map):
self._engine = engine
self._lookup_map = lookup_map
# Map of referring Node to _LookupRelation. Different tables may do lookups using a
# (Sorted)LookupMapColumn, and that creates a dependency from other Nodes to us, with a
# relation between referring rows and the lookup keys. This map stores these relations.
self._lookup_relations = {}
def update_relation_from_current_node(self, key):
"""
Looks up key in the lookup map and returns a tuple with two elements: the list of matching
records (sorted), and the Relation object for those records, relating
the current frame to the returned records. Returns an empty set if no records match.
"""
engine = self._engine
if engine._is_current_node_formula:
rel = self._get_relation(engine._current_node)
rel._add_lookup(engine._current_row_id, key)
else:
rel = None
# The _use_node call brings the _lookup_map column up-to-date, and creates a dependency on it.
# Relation of None isn't valid, but it happens to be unused when there is no current_frame.
engine._use_node(self._lookup_map.node, rel)
return rel
def invalidate_affected_keys(self, affected_keys):
# For each known relation, figure out which referring rows are affected, and invalidate them.
# The engine will notice that there have been more invalidations, and recompute things again.
for rel in six.itervalues(self._lookup_relations):
rel.invalidate_affected_keys(affected_keys, self._engine)
def _get_relation(self, referring_node):
"""
Helper which returns an existing or new _LookupRelation object for the given referring Node.
"""
rel = self._lookup_relations.get(referring_node)
if not rel:
rel = _LookupRelation(self._lookup_map, self, referring_node)
self._lookup_relations[referring_node] = rel
return rel
def _delete_relation(self, referring_node):
self._lookup_relations.pop(referring_node, None)
if not self._lookup_relations:
self._engine.mark_lookupmap_for_cleanup(self._lookup_map)
class _LookupRelation(relation.Relation): class _LookupRelation(relation.Relation):
""" """
_LookupRelation maintains a mapping between rows of a table doing a lookup to the rows getting _LookupRelation maintains a mapping between rows of a table doing a lookup to the rows getting
@ -242,15 +424,21 @@ class _LookupRelation(relation.Relation):
other code. other code.
""" """
def __init__(self, lookup_map, referring_node): def __init__(self, lookup_map, relation_tracker, referring_node):
super(_LookupRelation, self).__init__(referring_node.table_id, lookup_map.table_id) super(_LookupRelation, self).__init__(referring_node.table_id, lookup_map.table_id)
self._lookup_map = lookup_map self._lookup_map = lookup_map
self._relation_tracker = relation_tracker
self._referring_node = referring_node self._referring_node = referring_node
# Maps referring rows to keys, where multiple rows may map to the same key AND one row may # Maps referring rows to keys, where multiple rows may map to the same key AND one row may
# map to multiple keys (if a formula does multiple lookup calls). # map to multiple keys (if a formula does multiple lookup calls).
self._row_key_map = twowaymap.TwoWayMap(left=set, right=set) self._row_key_map = twowaymap.TwoWayMap(left=set, right=set)
# This is for an optimization. We may invalidate the same key many times (including O(N)
# times), which will lead to invalidating the same O(N) records over and over, resulting in
# O(N^2) work. By remembering the keys we invalidated, we can avoid that waste.
self._invalidated_keys_cache = set()
def __str__(self): def __str__(self):
return "_LookupRelation(%s->%s)" % (self._referring_node, self.target_table) return "_LookupRelation(%s->%s)" % (self._referring_node, self.target_table)
@ -266,6 +454,13 @@ class _LookupRelation(relation.Relation):
set().union(*[self._lookup_map._get_keys(r) for r in target_row_ids]) set().union(*[self._lookup_map._get_keys(r) for r in target_row_ids])
) )
def invalidate_affected_keys(self, affected_keys, engine):
affected_rows = self.get_affected_rows_by_keys(affected_keys - self._invalidated_keys_cache)
if affected_rows:
node = self._referring_node
engine.invalidate_records(node.table_id, affected_rows, col_ids=(node.col_id,))
self._invalidated_keys_cache.update(affected_keys)
def get_affected_rows_by_keys(self, keys): def get_affected_rows_by_keys(self, keys):
""" """
This is used by LookupMapColumn to know which rows got affected when a target row changed to This is used by LookupMapColumn to know which rows got affected when a target row changed to
@ -283,6 +478,7 @@ class _LookupRelation(relation.Relation):
process of computing the given referring_row_id. process of computing the given referring_row_id.
""" """
self._row_key_map.insert(referring_row_id, key) self._row_key_map.insert(referring_row_id, key)
self._reset_invalidated_keys_cache()
def reset_rows(self, referring_rows): def reset_rows(self, referring_rows):
""" """
@ -295,6 +491,7 @@ class _LookupRelation(relation.Relation):
else: else:
for row_id in referring_rows: for row_id in referring_rows:
self._row_key_map.remove_left(row_id) self._row_key_map.remove_left(row_id)
self._reset_invalidated_keys_cache()
def reset_all(self): def reset_all(self):
""" """
@ -303,7 +500,15 @@ class _LookupRelation(relation.Relation):
# In this case also, remove it from the LookupMapColumn. Once all relations are gone, the # In this case also, remove it from the LookupMapColumn. Once all relations are gone, the
# lookup map can get cleaned up. # lookup map can get cleaned up.
self._row_key_map.clear() self._row_key_map.clear()
self._lookup_map._delete_relation(self._referring_node) self._relation_tracker._delete_relation(self._referring_node)
self._reset_invalidated_keys_cache()
def _reset_invalidated_keys_cache(self):
# When the invalidations take effect (i.e. invalidated columns get recomputed), the engine
# resets the relations for the affected rows. We use that, as well as any change to the
# relation, as a signal to clear _invalidated_keys_cache. Its purpose is only to serve while
# going down a helper (Sorted)LookupMapColumn.
self._invalidated_keys_cache.clear()
def extract_column_id(c): def extract_column_id(c):
@ -311,3 +516,12 @@ def extract_column_id(c):
return c.value return c.value
else: else:
return c return c
def _extract(cell_value):
"""
When cell_value is a Record, returns its rowId. Otherwise returns the value unchanged.
This is to allow lookups to work with reference columns.
"""
if isinstance(cell_value, records.Record):
return cell_value._row_id
return cell_value

View File

@ -379,10 +379,11 @@ class RecordList(list):
Just like list but allows setting custom attributes, which we use for remembering _group_by and Just like list but allows setting custom attributes, which we use for remembering _group_by and
_sort_by attributes when storing RecordSet as usertypes.ReferenceList type. _sort_by attributes when storing RecordSet as usertypes.ReferenceList type.
""" """
def __init__(self, row_ids, group_by=None, sort_by=None): def __init__(self, row_ids, group_by=None, sort_by=None, sort_key=None):
list.__init__(self, row_ids) list.__init__(self, row_ids)
self._group_by = group_by self._group_by = group_by # None or a tuple of col_ids
self._sort_by = sort_by self._sort_by = sort_by # None or a tuple of col_ids, optionally prefixed with "-"
self._sort_key = sort_key # Comparator function (see sort_key.py)
def __repr__(self): def __repr__(self):
return "RecordList(%s, group_by=%r, sort_by=%r)" % ( return "RecordList(%s, group_by=%r, sort_by=%r)" % (

View File

@ -3,7 +3,11 @@ Implements the base classes for Record and RecordSet objects used to represent r
tables. Individual tables use derived versions of these, which add per-column properties. tables. Individual tables use derived versions of these, which add per-column properties.
""" """
from bisect import bisect_left, bisect_right
import functools import functools
import sys
import six
@functools.total_ordering @functools.total_ordering
class Record(object): class Record(object):
@ -134,14 +138,14 @@ class RecordSet(object):
""" """
# Slots are an optimization to avoid the need for a per-object __dict__. # Slots are an optimization to avoid the need for a per-object __dict__.
__slots__ = ('_row_ids', '_source_relation', '_group_by', '_sort_by') __slots__ = ('_row_ids', '_source_relation', '_group_by', '_sort_by', '_sort_key')
# Per-table derived classes override this and set it to the appropriate Table object. # Per-table derived classes override this and set it to the appropriate Table object.
_table = None _table = None
# Methods should be named with a leading underscore to avoid interfering with access to # Methods should be named with a leading underscore to avoid interfering with access to
# user-defined fields. # user-defined fields.
def __init__(self, row_ids, relation=None, group_by=None, sort_by=None): def __init__(self, row_ids, relation=None, group_by=None, sort_by=None, sort_key=None):
""" """
group_by may be a dictionary mapping column names to values that are all the same for the given group_by may be a dictionary mapping column names to values that are all the same for the given
RecordSet. sort_by may be the column name used for sorting this record set. Both are set by RecordSet. sort_by may be the column name used for sorting this record set. Both are set by
@ -149,9 +153,10 @@ class RecordSet(object):
""" """
self._row_ids = row_ids self._row_ids = row_ids
self._source_relation = relation or self._table._identity_relation self._source_relation = relation or self._table._identity_relation
# If row_ids is itself a RecordList, default to its _group_by and _sort_by properties. # If row_ids is itself a RecordList, default to its _group_by, _sort_by, _sort_key properties.
self._group_by = group_by or getattr(row_ids, '_group_by', None) self._group_by = group_by or getattr(row_ids, '_group_by', None)
self._sort_by = sort_by or getattr(row_ids, '_sort_by', None) self._sort_by = sort_by or getattr(row_ids, '_sort_by', None)
self._sort_key = sort_key or getattr(row_ids, '_sort_key', None)
def __len__(self): def __len__(self):
return len(self._row_ids) return len(self._row_ids)
@ -181,15 +186,13 @@ class RecordSet(object):
return False return False
def get_one(self): def get_one(self):
if not self._row_ids: # Pick the first record in the sorted order, or empty/sample record for empty RecordSet
# Default to the empty/sample record row_id = self._row_ids[0] if self._row_ids else 0
row_id = 0 return self._table.Record(row_id, self._source_relation)
elif self._sort_by:
# Pick the first record in the sorted order def __getitem__(self, index):
row_id = self._row_ids[0] # Allows subscripting a RecordSet as r[0] or r[-1].
else: row_id = self._row_ids[index]
# Pick the first record in the order of the underlying table, for backwards compatibility.
row_id = min(self._row_ids)
return self._table.Record(row_id, self._source_relation) return self._table.Record(row_id, self._source_relation)
def __getattr__(self, name): def __getattr__(self, name):
@ -198,11 +201,20 @@ class RecordSet(object):
def __repr__(self): def __repr__(self):
return "%s[%s]" % (self._table.table_id, self._row_ids) return "%s[%s]" % (self._table.table_id, self._row_ids)
def _at(self, index):
"""
Returns element of RecordSet at the given index when the index is valid and non-negative.
Otherwise returns the empty/sample record.
"""
row_id = self._row_ids[index] if (0 <= index < len(self._row_ids)) else 0
return self._table.Record(row_id, self._source_relation)
def _clone_with_relation(self, src_relation): def _clone_with_relation(self, src_relation):
return self._table.RecordSet(self._row_ids, return self._table.RecordSet(self._row_ids,
relation=src_relation.compose(self._source_relation), relation=src_relation.compose(self._source_relation),
group_by=self._group_by, group_by=self._group_by,
sort_by=self._sort_by) sort_by=self._sort_by,
sort_key=self._sort_key)
def _get_encodable_row_ids(self): def _get_encodable_row_ids(self):
""" """
@ -214,6 +226,134 @@ class RecordSet(object):
else: else:
return list(self._row_ids) return list(self._row_ids)
def _get_sort_key(self):
if not self._sort_key:
if self._sort_by:
raise ValueError("Sorted by %s but no sort_key" % (self._sort_by,))
raise ValueError("Can only use 'find' methods in a sorted reference list")
return self._sort_key
def _to_local_row_id(self, item):
if isinstance(item, int):
return item
if isinstance(item, Record) and item._table == self._table:
return int(item)
raise ValueError("unexpected search item") # Need better error
@property
def find(self):
"""
A set of methods for finding values in sorted set of records. For example:
```
Transactions.lookupRecords(..., sort_by="Date").find.lt($Date)
Table.lookupRecords(..., sort_by=("Foo", "Bar")).find.le(foo, bar)
```
If the `find` method is shadowed by a same-named user column, you may use `_find` instead.
The methods available are:
- `lt`: (less than) find nearest record with sort values < the given values
- `le`: (less than or equal to) find nearest record with sort values <= the given values
- `gt`: (greater than) find nearest record with sort values > the given values
- `ge`: (greater than or equal to) find nearest record with sort values >= the given values
- `eq`: (equal to) find nearest record with sort values == the given values
Example from https://templates.getgrist.com/5pHLanQNThxk/Payroll. Each person has a history of
pay rates, in the Rates table. To find a rate applicable on a certain date, here is how you
can do it old-style:
```
# Get all the rates for the Person and Role in this row.
rates = Rates.lookupRecords(Person=$Person, Role=$Role)
# Pick out only those rates whose Rate_Start is on or before this row's Date.
past_rates = [r for r in rates if r.Rate_Start <= $Date]
# Select the latest of past_rates, i.e. maximum by Rate_Start.
rate = max(past_rates, key=lambda r: r.Rate_Start)
# Return the Hourly_Rate from the relevant Rates record.
return rate.Hourly_Rate
```
With the new methods, it is much simpler:
```
rate = Rates.lookupRecords(Person=$Person, Role=$Role, sort_by="Rate_Start").find.le($Date)
return rate.Hourly_Rate
```
Note that this is also much faster when there are many rates for the same Person and Role.
"""
return FindOps(self)
@property
def _find(self):
return FindOps(self)
def _find_eq(self, *values):
found = self._bisect_find(bisect_left, 0, _min_row_id, values)
if found:
# 'found' means that we found a row that's greater-than-or-equal-to the values we are
# looking for. To check if the row is actually "equal", it remains to check if it is stictly
# greater than the passed-in values.
key = self._get_sort_key()
if key(found._row_id, values) < key(found._row_id):
return self._table.Record(0, self._source_relation)
return found
def _bisect_index(self, bisect_func, search_row_id, search_values=None):
key = self._get_sort_key()
# Note that 'key' argument is only available from Python 3.10.
return bisect_func(self._row_ids, key(search_row_id, search_values), key=key)
def _bisect_find(self, bisect_func, shift, search_row_id, search_values=None):
i = self._bisect_index(bisect_func, search_row_id, search_values=search_values)
return self._at(i + shift)
_min_row_id = -sys.float_info.max
_max_row_id = sys.float_info.max
if six.PY3:
class FindOps(object):
def __init__(self, record_set):
self._rset = record_set
def previous(self, row):
row_id = self._rset._to_local_row_id(row)
return self._rset._bisect_find(bisect_left, -1, row_id)
def next(self, row):
row_id = self._rset._to_local_row_id(row)
return self._rset._bisect_find(bisect_right, 0, row_id)
def rank(self, row, order="asc"):
row_id = self._rset._to_local_row_id(row)
index = self._rset._bisect_index(bisect_left, row_id)
if order == "asc":
return index + 1
elif order == "desc":
return len(self._rset) - index
else:
raise ValueError("The 'order' parameter must be \"asc\" (default) or \"desc\"")
def lt(self, *values):
return self._rset._bisect_find(bisect_left, -1, _min_row_id, values)
def le(self, *values):
return self._rset._bisect_find(bisect_right, -1, _max_row_id, values)
def gt(self, *values):
return self._rset._bisect_find(bisect_right, 0, _max_row_id, values)
def ge(self, *values):
return self._rset._bisect_find(bisect_left, 0, _min_row_id, values)
def eq(self, *values):
return self._rset._find_eq(*values)
else:
class FindOps(object):
def __init__(self, record_set):
raise NotImplementedError("Update engine to Python3 to use lookupRecords().find")
def adjust_record(relation, value): def adjust_record(relation, value):

54
sandbox/grist/sort_key.py Normal file
View File

@ -0,0 +1,54 @@
from numbers import Number
def make_sort_key(table, sort_spec):
"""
table: Table object from table.py
sort_spec: tuple of column IDs, optionally prefixed by '-' to invert the sort order.
Returns a key class for comparing row_ids, i.e. with the returned SortKey, the expression
SortKey(r1) < SortKey(r2) is true iff r1 comes before r2 according to sort_spec.
The returned SortKey also allows comparing values that aren't in the table:
SortKey(row_id, (v1, v2, ...)) will act as if the values of the columns mentioned in
sort_spec are v1, v2, etc.
"""
col_sort_spec = []
for col_spec in sort_spec:
col_id, sign = (col_spec[1:], -1) if col_spec.startswith('-') else (col_spec, 1)
col_obj = table.get_column(col_id)
col_sort_spec.append((col_obj, sign))
class SortKey(object):
__slots__ = ("row_id", "values")
def __init__(self, row_id, values=None):
# When values are provided, row_id is not used for access but is used for comparison, so
# must still be comparable to any valid row_id (e.g. must not be None). We use
# +-sys.float_info.max in records.py for this.
self.row_id = row_id
self.values = values or tuple(c.get_cell_value(row_id) for (c, _) in col_sort_spec)
def __lt__(self, other):
for (a, b, (col_obj, sign)) in zip(self.values, other.values, col_sort_spec):
try:
if a < b:
return sign == 1
if b < a:
return sign == -1
except TypeError:
# Use fallback values to maintain order similar to Python2 (this matches the fallback
# logic in SafeSortKey in column.py).
# - None is less than everything else
# - Numbers are less than other types
# - Other types are ordered by type name
af = ( (0 if a is None else 1), (0 if isinstance(a, Number) else 1), type(a).__name__ )
bf = ( (0 if b is None else 1), (0 if isinstance(b, Number) else 1), type(b).__name__ )
if af < bf:
return sign == 1
if bf < af:
return sign == -1
# Fallback order is by ascending row_id.
return self.row_id < other.row_id
return SortKey

View File

@ -69,18 +69,32 @@ class UserTable(object):
most commonly a field in the current row (e.g. `$SomeField`) or a constant (e.g. a quoted string most commonly a field in the current row (e.g. `$SomeField`) or a constant (e.g. a quoted string
like `"Some Value"`) (examples below). like `"Some Value"`) (examples below).
You may set the optional `sort_by` parameter to the column ID by which to sort multiple matching
results, to determine which of them is returned. You can prefix the column ID with "-" to
reverse the order.
For example: For example:
``` ```
People.lookupRecords(Email=$Work_Email) People.lookupRecords(Email=$Work_Email)
People.lookupRecords(First_Name="George", Last_Name="Washington") People.lookupRecords(First_Name="George", Last_Name="Washington")
People.lookupRecords(Last_Name="Johnson", sort_by="First_Name")
Orders.lookupRecords(Customer=$id, sort_by="-OrderDate")
``` ```
You may set the optional `order_by` parameter to the column ID by which to sort the results.
You can prefix the column ID with "-" to reverse the order. You can also specify multiple
column IDs as a tuple (e.g. `order_by=("Account", "-Date")`).
For example:
```
Transactions.lookupRecords(Account=$Account, order_by="Date")
Transactions.lookupRecords(Account=$Account, order_by="-Date")
Transactions.lookupRecords(Active=True, order_by=("Account", "-Date"))
```
For records with equal `order_by` fields, the results are sorted according to how they appear
in views (which is determined by the special `manualSort` column). You may set `order_by=None`
to match the order of records in unsorted views.
By default, with no `order_by`, records are sorted by row ID, as if with `order_by="id"`.
For backward compatibility, `sort_by` may be used instead of `order_by`, but only allows a
single field, and falls back to row ID (rather than `manualSort`).
See [RecordSet](#recordset) for useful properties offered by the returned object. See [RecordSet](#recordset) for useful properties offered by the returned object.
See [CONTAINS](#contains) for an example utilizing `UserTable.lookupRecords` to find records See [CONTAINS](#contains) for an example utilizing `UserTable.lookupRecords` to find records
@ -92,27 +106,35 @@ class UserTable(object):
return self.table.lookup_records(**field_value_pairs) return self.table.lookup_records(**field_value_pairs)
def lookupOne(self, **field_value_pairs): def lookupOne(self, **field_value_pairs):
# pylint: disable=line-too-long
""" """
Name: lookupOne Name: lookupOne
Usage: UserTable.__lookupOne__(Field_In_Lookup_Table=value, ...) Usage: UserTable.__lookupOne__(Field_In_Lookup_Table=value, ...)
Returns a [Record](#record) matching the given field=value arguments. The value may be any Returns a [Record](#record) matching the given field=value arguments. The value may be any
expression, expression,
most commonly a field in the current row (e.g. `$SomeField`) or a constant (e.g. a quoted string most commonly a field in the current row (e.g. `$SomeField`) or a constant (e.g. a quoted string
like `"Some Value"`). If multiple records are found, the first match is returned. like `"Some Value"`).
You may set the optional `sort_by` parameter to the column ID by which to sort multiple matching
results, to determine which of them is returned. You can prefix the column ID with "-" to
reverse the order.
For example: For example:
``` ```
People.lookupOne(First_Name="Lewis", Last_Name="Carroll") People.lookupOne(First_Name="Lewis", Last_Name="Carroll")
People.lookupOne(Email=$Work_Email) People.lookupOne(Email=$Work_Email)
Tickets.lookupOne(Person=$id, sort_by="Date") # Find the first ticket for the person
Tickets.lookupOne(Person=$id, sort_by="-Date") # Find the last ticket for the person
``` ```
Learn more about [lookupOne](references-lookups.md#lookupone). Learn more about [lookupOne](references-lookups.md#lookupone).
If multiple records are found, the first match is returned. You may set the optional `order_by`
parameter to the column ID by which to sort the matches, to determine which of them is
returned as the first one. By default, the record with the lowest row ID is returned.
See [`lookupRecords`](#lookupRecords) for details of all available options and behavior of
`order_by` (and of its legacy alternative, `sort_by`).
For example:
```
Tasks.lookupOne(Project=$id, order_by="Priority") # Returns the Task with the smallest Priority.
Rates.lookupOne(Person=$id, order_by="-Date") # Returns the Rate with the latest Date.
```
""" """
return self.table.lookup_one_record(**field_value_pairs) return self.table.lookup_one_record(**field_value_pairs)
@ -176,7 +198,7 @@ class Table(object):
self._id_column = id_column self._id_column = id_column
def __contains__(self, row_id): def __contains__(self, row_id):
return row_id < self._id_column.size() and self._id_column.raw_get(row_id) > 0 return 0 < row_id < self._id_column.size() and self._id_column.raw_get(row_id) > 0
def __iter__(self): def __iter__(self):
for row_id in xrange(self._id_column.size()): for row_id in xrange(self._id_column.size()):
@ -500,6 +522,7 @@ class Table(object):
""" """
# The tuple of keys used determines the LookupMap we need. # The tuple of keys used determines the LookupMap we need.
sort_by = kwargs.pop('sort_by', None) sort_by = kwargs.pop('sort_by', None)
order_by = kwargs.pop('order_by', 'id') # For backward compatibility
key = [] key = []
col_ids = [] col_ids = []
for col_id in sorted(kwargs): for col_id in sorted(kwargs):
@ -520,21 +543,15 @@ class Table(object):
key = tuple(key) key = tuple(key)
lookup_map = self._get_lookup_map(col_ids) lookup_map = self._get_lookup_map(col_ids)
row_id_set, rel = lookup_map.do_lookup(key) sort_spec = make_sort_spec(order_by, sort_by, self.has_column('manualSort'))
if sort_by: if sort_spec:
if not isinstance(sort_by, six.string_types): sorted_lookup_map = self._get_sorted_lookup_map(lookup_map, sort_spec)
raise TypeError("sort_by must be a column ID (string)")
reverse = sort_by.startswith("-")
sort_col = sort_by.lstrip("-")
sort_col_obj = self.all_columns[sort_col]
row_ids = sorted(
row_id_set,
key=lambda r: column.SafeSortKey(self._get_col_obj_value(sort_col_obj, r, rel)),
reverse=reverse,
)
else: else:
row_ids = sorted(row_id_set) sorted_lookup_map = lookup_map
return self.RecordSet(row_ids, rel, group_by=kwargs, sort_by=sort_by)
row_ids, rel = sorted_lookup_map.do_lookup(key)
return self.RecordSet(row_ids, rel, group_by=kwargs, sort_by=sort_by,
sort_key=sorted_lookup_map.sort_key)
def lookup_one_record(self, **kwargs): def lookup_one_record(self, **kwargs):
return self.lookup_records(**kwargs).get_one() return self.lookup_records(**kwargs).get_one()
@ -555,14 +572,19 @@ class Table(object):
c = lookup.extract_column_id(c) c = lookup.extract_column_id(c)
if not self.has_column(c): if not self.has_column(c):
raise KeyError("Table %s has no column %s" % (self.table_id, c)) raise KeyError("Table %s has no column %s" % (self.table_id, c))
if any(isinstance(col_id, lookup._Contains) for col_id in col_ids_tuple): lmap = lookup.LookupMapColumn(self, lookup_col_id, col_ids_tuple)
column_class = lookup.ContainsLookupMapColumn
else:
column_class = lookup.SimpleLookupMapColumn
lmap = column_class(self, lookup_col_id, col_ids_tuple)
self._add_special_col(lmap) self._add_special_col(lmap)
return lmap return lmap
def _get_sorted_lookup_map(self, lookup_map, sort_spec):
helper_col_id = lookup_map.col_id + "#" + ":".join(sort_spec)
# Find or create a helper col for the given sort_spec.
helper_col = self._special_cols.get(helper_col_id)
if not helper_col:
helper_col = lookup.SortedLookupMapColumn(self, helper_col_id, lookup_map, sort_spec)
self._add_special_col(helper_col)
return helper_col
def delete_column(self, col_obj): def delete_column(self, col_obj):
assert col_obj.table_id == self.table_id assert col_obj.table_id == self.table_id
self._special_cols.pop(col_obj.col_id, None) self._special_cols.pop(col_obj.col_id, None)
@ -719,7 +741,40 @@ class Table(object):
setattr(self.RecordSet, col_obj.col_id, recordset_field) setattr(self.RecordSet, col_obj.col_id, recordset_field)
def _remove_field_from_record_classes(self, col_id): def _remove_field_from_record_classes(self, col_id):
if hasattr(self.Record, col_id): # Check if col_id is in the immediate dictionary of self.Record[Set]; if missing, or inherited
# from the base class (e.g. "find"), there is nothing to delete.
if col_id in self.Record.__dict__:
delattr(self.Record, col_id) delattr(self.Record, col_id)
if hasattr(self.RecordSet, col_id): if col_id in self.RecordSet.__dict__:
delattr(self.RecordSet, col_id) delattr(self.RecordSet, col_id)
def make_sort_spec(order_by, sort_by, has_manual_sort):
# Note that rowId is always an automatic fallback.
if sort_by:
if not isinstance(sort_by, six.string_types):
# pylint: disable=line-too-long
raise TypeError("sort_by must be a string column ID, with optional '-'; use order_by for tuples")
# No fallback to 'manualSort' here, for backward compatibility.
return (sort_by,)
if not isinstance(order_by, tuple):
# Suppot None and single-string specs (for a single column)
if isinstance(order_by, six.string_types):
order_by = (order_by,)
elif order_by is None:
order_by = ()
else:
raise TypeError("order_by must be a string column ID, with optional '-', or a tuple of them")
# Check if 'id' is mentioned explicitly. If so, then no fallback to 'manualSort', or anything
# else, since row IDs are unique. Also, drop the 'id' column itself because the row ID fallback
# is mandatory and automatic.
if 'id' in order_by:
return order_by[:order_by.index('id')]
# Fall back to manualSort, but only if it exists in the table and not yet mentioned in order_by.
if has_manual_sort and 'manualSort' not in order_by:
return order_by + ('manualSort',)
return order_by

View File

@ -2,6 +2,7 @@ import difflib
import functools import functools
import json import json
import logging import logging
import os
import sys import sys
import unittest import unittest
from collections import namedtuple from collections import namedtuple
@ -39,7 +40,7 @@ class EngineTestCase(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls._orig_log_level = logging.root.level cls._orig_log_level = logging.root.level
logging.root.setLevel(logging.WARNING) logging.root.setLevel(logging.DEBUG if os.environ.get('VERBOSE') else logging.WARNING)
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
@ -58,7 +59,8 @@ class EngineTestCase(unittest.TestCase):
def trace_call(col_obj, _rec): def trace_call(col_obj, _rec):
# Ignore formulas in metadata tables for simplicity. Such formulas are mostly private, and # Ignore formulas in metadata tables for simplicity. Such formulas are mostly private, and
# it would be annoying to fix tests every time we change them. # it would be annoying to fix tests every time we change them.
if not col_obj.table_id.startswith("_grist_"): # Also ignore negative row_ids, used as extra dependency nodes in lookups.
if not col_obj.table_id.startswith("_grist_") and _rec._row_id >= 0:
tmap = self.call_counts.setdefault(col_obj.table_id, {}) tmap = self.call_counts.setdefault(col_obj.table_id, {})
tmap[col_obj.col_id] = tmap.get(col_obj.col_id, 0) + 1 tmap[col_obj.col_id] = tmap.get(col_obj.col_id, 0) + 1
self.engine.formula_tracer = trace_call self.engine.formula_tracer = trace_call
@ -211,13 +213,23 @@ class EngineTestCase(unittest.TestCase):
def assertTableData(self, table_name, data=[], cols="all", rows="all", sort=None): def assertTableData(self, table_name, data=[], cols="all", rows="all", sort=None):
""" """
Verify some or all of the data in the table named `table_name`. Verify some or all of the data in the table named `table_name`.
- data: an array of rows, with first row containing column names starting with "id", and - data: one of
(1) an array of rows, with first row containing column names starting with "id", and
other rows also all starting with row_id. other rows also all starting with row_id.
(2) an array of dictionaries, mapping colIds to values
(3) an array of namedtuples, e.g. as returned by transpose_bulk_action().
- cols: may be "all" (default) to match all columns, or "subset" to match only those listed. - cols: may be "all" (default) to match all columns, or "subset" to match only those listed.
- rows: may be "all" (default) to match all rows, or "subset" to match only those listed, - rows: may be "all" (default) to match all rows, or "subset" to match only those listed,
or a function called with a Record to return whether to include it. or a function called with a Record to return whether to include it.
- sort: optionally a key function called with a Record, for sorting observed rows. - sort: optionally a key function called with a Record, for sorting observed rows.
""" """
if hasattr(data[0], '_asdict'): # namedtuple
data = [r._asdict() for r in data]
if isinstance(data[0], dict):
expected = testutil.table_data_from_row_dicts(table_name, data)
col_names = ['id'] + list(expected.columns)
else:
assert data[0][0] == 'id', "assertRecords requires 'id' as the first column" assert data[0][0] == 'id', "assertRecords requires 'id' as the first column"
col_names = data[0] col_names = data[0]
row_data = data[1:] row_data = data[1:]
@ -236,7 +248,7 @@ class EngineTestCase(unittest.TestCase):
if rows == "all": if rows == "all":
row_ids = list(table.row_ids) row_ids = list(table.row_ids)
elif rows == "subset": elif rows == "subset":
row_ids = [row[0] for row in row_data] row_ids = expected.row_ids
elif callable(rows): elif callable(rows):
row_ids = [r.id for r in table.user_table.all if rows(r)] row_ids = [r.id for r in table.user_table.all if rows(r)]
else: else:

View File

@ -0,0 +1,253 @@
import datetime
import logging
import unittest
import six
import moment
import objtypes
import testutil
import test_engine
log = logging.getLogger(__name__)
def D(year, month, day):
return moment.date_to_ts(datetime.date(year, month, day))
class TestLookupFind(test_engine.EngineTestCase):
def do_setup(self):
self.load_sample(testutil.parse_test_sample({
"SCHEMA": [
[1, "Customers", [
[11, "Name", "Text", False, "", "", ""],
[12, "MyDate", "Date", False, "", "", ""],
]],
[2, "Purchases", [
[20, "manualSort", "PositionNumber", False, "", "", ""],
[21, "Customer", "Ref:Customers", False, "", "", ""],
[22, "Date", "Date", False, "", "", ""],
[24, "Category", "Text", False, "", "", ""],
[25, "Amount", "Numeric", False, "", "", ""],
[26, "Prev", "Ref:Purchases", True, "None", "", ""], # To be filled
[27, "Cumul", "Numeric", True, "$Prev.Cumul + $Amount", "", ""],
]],
],
"DATA": {
"Customers": [
["id", "Name", "MyDate"],
[1, "Alice", D(2023,12,5)],
[2, "Bob", D(2023,12,10)],
],
"Purchases": [
[ "id", "manualSort", "Customer", "Date", "Category", "Amount", ],
[1, 1.0, 1, D(2023,12,1), "A", 10],
[2, 2.0, 2, D(2023,12,4), "A", 17],
[3, 3.0, 1, D(2023,12,3), "A", 20],
[4, 4.0, 1, D(2023,12,9), "A", 40],
[5, 5.0, 1, D(2023,12,2), "B", 80],
[6, 6.0, 1, D(2023,12,6), "B", 160],
[7, 7.0, 1, D(2023,12,7), "A", 320],
[8, 8.0, 1, D(2023,12,5), "A", 640],
],
}
}))
def do_test_lookup_find(self, find="find", ref_type_to_use=None):
self.do_setup()
if ref_type_to_use:
self.add_column("Customers", "PurchasesByDate", type=ref_type_to_use,
formula="Purchases.lookupRecords(Customer=$id, sort_by='Date')")
lookup = "$PurchasesByDate"
else:
lookup = "Purchases.lookupRecords(Customer=$id, sort_by='Date')"
self.add_column("Customers", "LTDate", type="Ref:Purchases",
formula="{}.{}.lt($MyDate)".format(lookup, find))
self.add_column("Customers", "LEDate", type="Ref:Purchases",
formula="{}.{}.le($MyDate)".format(lookup, find))
self.add_column("Customers", "GTDate", type="Ref:Purchases",
formula="{}.{}.gt($MyDate)".format(lookup, find))
self.add_column("Customers", "GEDate", type="Ref:Purchases",
formula="{}.{}.ge($MyDate)".format(lookup, find))
self.add_column("Customers", "EQDate", type="Ref:Purchases",
formula="{}.{}.eq($MyDate)".format(lookup, find))
# Here's the purchase data sorted by Customer and Date
# id Customer Date
# 1, 1, D(2023,12,1)
# 5, 1, D(2023,12,2)
# 3, 1, D(2023,12,3)
# 8, 1, D(2023,12,5)
# 6, 1, D(2023,12,6)
# 7, 1, D(2023,12,7)
# 4, 1, D(2023,12,9)
# 2, 2, D(2023,12,4)
# pylint: disable=line-too-long
self.assertTableData('Customers', cols="subset", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,5), LTDate=3, LEDate=8, GTDate=6, GEDate=8, EQDate=8),
dict(id=2, Name="Bob", MyDate=D(2023,12,10), LTDate=2, LEDate=2, GTDate=0, GEDate=0, EQDate=0),
])
# Change Dates for Alice and Bob
self.update_record('Customers', 1, MyDate=D(2023,12,4))
self.update_record('Customers', 2, MyDate=D(2023,12,4))
self.assertTableData('Customers', cols="subset", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,4), LTDate=3, LEDate=3, GTDate=8, GEDate=8, EQDate=0),
dict(id=2, Name="Bob", MyDate=D(2023,12,4), LTDate=0, LEDate=2, GTDate=0, GEDate=2, EQDate=2),
])
# Change a Purchase from Alice to Bob, and remove a purchase for Alice
self.update_record('Purchases', 5, Customer=2)
self.remove_record('Purchases', 3)
self.assertTableData('Customers', cols="subset", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,4), LTDate=1, LEDate=1, GTDate=8, GEDate=8, EQDate=0),
dict(id=2, Name="Bob", MyDate=D(2023,12,4), LTDate=5, LEDate=2, GTDate=0, GEDate=2, EQDate=2),
])
# Another update to the lookup date for Bob.
self.update_record('Customers', 2, MyDate=D(2023,1,1))
self.assertTableData('Customers', cols="subset", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,4), LTDate=1, LEDate=1, GTDate=8, GEDate=8, EQDate=0),
dict(id=2, Name="Bob", MyDate=D(2023,1,1), LTDate=0, LEDate=0, GTDate=5, GEDate=5, EQDate=0),
])
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_lookup_find(self):
self.do_test_lookup_find()
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_lookup_underscore_find(self):
# Repeat the previous test case with _find in place of find. Normally, we can use
# lookupRecords(...).find.*, but if a column named "find" exists, it will shadow this method,
# and lookupRecords(...)._find.* may be used instead (with an underscore). Check that it works.
self.do_test_lookup_find(find="_find")
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_lookup_find_ref_any(self):
self.do_test_lookup_find(ref_type_to_use='Any')
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_lookup_find_ref_reflist(self):
self.do_test_lookup_find(ref_type_to_use='RefList:Purchases')
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_lookup_find_empty(self):
self.do_setup()
self.add_column("Customers", "P", type='RefList:Purchases',
formula="Purchases.lookupRecords(Customer=$id, Category='C', sort_by='Date')")
self.add_column("Customers", "LTDate", type="Ref:Purchases", formula="$P.find.lt($MyDate)")
self.add_column("Customers", "LEDate", type="Ref:Purchases", formula="$P.find.le($MyDate)")
self.add_column("Customers", "GTDate", type="Ref:Purchases", formula="$P.find.gt($MyDate)")
self.add_column("Customers", "GEDate", type="Ref:Purchases", formula="$P.find.ge($MyDate)")
self.add_column("Customers", "EQDate", type="Ref:Purchases", formula="$P.find.eq($MyDate)")
# pylint: disable=line-too-long
self.assertTableData('Customers', cols="subset", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,5), LTDate=0, LEDate=0, GTDate=0, GEDate=0, EQDate=0),
dict(id=2, Name="Bob", MyDate=D(2023,12,10), LTDate=0, LEDate=0, GTDate=0, GEDate=0, EQDate=0),
])
# Check find.* results once the lookup result becomes non-empty.
self.update_record('Purchases', 5, Category="C")
self.assertTableData('Customers', cols="subset", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,5), LTDate=5, LEDate=5, GTDate=0, GEDate=0, EQDate=0),
dict(id=2, Name="Bob", MyDate=D(2023,12,10), LTDate=0, LEDate=0, GTDate=0, GEDate=0, EQDate=0),
])
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_lookup_find_unsorted(self):
self.do_setup()
self.add_column("Customers", "P", type='RefList:Purchases',
formula="[Purchases.lookupOne(Customer=$id)]")
self.add_column("Customers", "LTDate", type="Ref:Purchases", formula="$P.find.lt($MyDate)")
err = objtypes.RaisedException(ValueError())
self.assertTableData('Customers', cols="subset", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,5), LTDate=err),
dict(id=2, Name="Bob", MyDate=D(2023,12,10), LTDate=err),
])
@unittest.skipUnless(six.PY2, "Python 2 only")
def test_lookup_find_py2(self):
self.do_setup()
self.add_column("Customers", "LTDate", type="Ref:Purchases",
formula="Purchases.lookupRecords(Customer=$id, sort_by='Date').find.lt($MyDate)")
err = objtypes.RaisedException(NotImplementedError())
self.assertTableData('Customers', data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,5), LTDate=err),
dict(id=2, Name="Bob", MyDate=D(2023,12,10), LTDate=err),
])
def test_column_named_find(self):
# Test that we can add a column named "find", use it, and remove it.
self.do_setup()
self.add_column("Customers", "find", type="Text")
# Check that the column is usable.
self.update_record("Customers", 1, find="Hello")
self.assertTableData('Customers', cols="all", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,5), find="Hello"),
dict(id=2, Name="Bob", MyDate=D(2023,12,10), find=""),
])
# Check that we can remove the column.
self.remove_column("Customers", "find")
self.assertTableData('Customers', cols="all", data=[
dict(id=1, Name="Alice", MyDate=D(2023,12,5)),
dict(id=2, Name="Bob", MyDate=D(2023,12,10)),
])
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_rename_find_attrs(self):
"""
Check that in formulas like Table.lookupRecords(...).find.lt(...).ColID, renames of ColID
update the formula.
"""
# Create a simple table (People) with a couple records.
self.apply_user_action(["AddTable", "People", [
dict(id="Name", type="Text")
]])
self.add_record("People", Name="Alice")
self.add_record("People", Name="Bob")
# Create a separate table that does a lookup in the People table.
self.apply_user_action(["AddTable", "Test", [
dict(id="Lookup1", type="Any", isFormula=True,
formula="People.lookupRecords(order_by='Name').find.ge('B').Name"),
dict(id="Lookup2", type="Any", isFormula=True,
formula="People.lookupRecords(order_by='Name')._find.eq('Alice').Name"),
dict(id="Lookup3", type="Any", isFormula=True,
formula="r = People.lookupRecords(order_by='Name').find.ge('B')\n" +
"PREVIOUS(r, order_by=None).Name"),
dict(id="Lookup4", type="Any", isFormula=True,
formula="r = People.lookupRecords(order_by='Name').find.eq('Alice')\n" +
"People.lookupRecords(order_by='Name').find.next(r).Name")
]])
self.add_record("Test")
# Test that lookups return data as expected.
self.assertTableData('Test', cols="subset", data=[
dict(id=1, Lookup1="Bob", Lookup2="Alice", Lookup3="Alice", Lookup4="Bob")
])
# Rename a column used for lookups or order_by. Lookup result shouldn't change.
self.apply_user_action(["RenameColumn", "People", "Name", "FullName"])
self.assertTableData('Test', cols="subset", data=[
dict(id=1, Lookup1="Bob", Lookup2="Alice", Lookup3="Alice", Lookup4="Bob")
])
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=6, colId="Lookup3",
formula="r = People.lookupRecords(order_by='FullName').find.ge('B')\n" +
"PREVIOUS(r, order_by=None).FullName"),
dict(id=7, colId="Lookup4",
formula="r = People.lookupRecords(order_by='FullName').find.eq('Alice')\n" +
"People.lookupRecords(order_by='FullName').find.next(r).FullName")
])

View File

@ -0,0 +1,115 @@
import math
import time
import testutil
import test_engine
class TestLookupPerformance(test_engine.EngineTestCase):
def test_non_quadratic(self):
# This test measures performance which depends on other stuff running on the machine, which
# makes it inherently flaky. But if it fails legitimately, it should fail every time. So we
# run multiple times (3), and fail only if all of those times fail.
for i in range(2):
try:
return self._do_test_non_quadratic()
except Exception as e:
print("FAIL #%d" % (i + 1))
self._do_test_non_quadratic()
def _do_test_non_quadratic(self):
# If the same lookupRecords is called by many cells, it should reuse calculations, not lead to
# quadratic complexity. (Actually making use of the result would often still be O(N) in each
# cell, but here we check that just doing the lookup is O(1) amortized.)
# Table1 has columns: Date and Status, each will have just two distinct values.
# We add a bunch of formulas that should take constant time outside of the lookup.
# The way we test for quadratic complexity is by timing "BulkAddRecord" action that causes all
# rows to recalculate for a geometrically growing sequence of row counts. Then we
# log-transform the data and do linear regression on it. It should produce data that fits
# closely a line of slope 1.
self.setUp() # Repeat setup because this test case gets called multiple times.
self.load_sample(testutil.parse_test_sample({
"SCHEMA": [
[1, "Table1", [
[1, "Date", "Date", False, "", "", ""],
[2, "Status", "Text", False, "", "", ""],
[3, "lookup_1a", "Any", True, "len(Table1.all)", "", ""],
[4, "lookup_2a", "Any", True, "len(Table1.lookupRecords(order_by='-Date'))", "", ""],
[5, "lookup_3a", "Any", True,
"len(Table1.lookupRecords(Status=$Status, order_by=('-Date', '-id')))", "", ""],
[6, "lookup_1b", "Any", True, "Table1.lookupOne().id", "", ""],
# Keep one legacy sort_by example (it shares implementation, so should work similarly)
[7, "lookup_2b", "Any", True, "Table1.lookupOne(sort_by='-Date').id", "", ""],
[8, "lookup_3b", "Any", True,
"Table1.lookupOne(Status=$Status, order_by=('-Date', '-id')).id", "", ""],
]]
],
"DATA": {}
}))
num_records = 0
def add_records(count):
assert count % 4 == 0, "Call add_records with multiples of 4 here"
self.add_records("Table1", ["Date", "Status"], [
[ "2024-01-01", "Green" ],
[ "2024-01-01", "Green" ],
[ "2024-02-01", "Blue" ],
[ "2000-01-01", "Blue" ],
] * (count // 4))
N = num_records + count
self.assertTableData(
"Table1", cols="subset", rows="subset", data=[
["id", "lookup_1a", "lookup_2a", "lookup_3a", "lookup_1b", "lookup_2b", "lookup_3b"],
[1, N, N, N // 2, 1, 3, N - 2],
])
return N
# Add records in a geometric sequence
times = {}
start_time = time.time()
last_time = start_time
count_add = 20
while last_time < start_time + 2: # Stop once we've spent 2 seconds
add_time = time.time()
num_records = add_records(count_add)
last_time = time.time()
times[num_records] = last_time - add_time
count_add *= 2
count_array = sorted(times.keys())
times_array = [times[r] for r in count_array]
# Perform linear regression on log-transformed data
log_count_array = [math.log(x) for x in count_array]
log_times_array = [math.log(x) for x in times_array]
# Calculate slope and intercept using the least squares method.
# Doing this manually so that it works in Python2 too.
# Otherwise, we could just use statistics.linear_regression()
n = len(log_count_array)
sum_x = sum(log_count_array)
sum_y = sum(log_times_array)
sum_xx = sum(x * x for x in log_count_array)
sum_xy = sum(x * y for x, y in zip(log_count_array, log_times_array))
slope = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x)
intercept = (sum_y - slope * sum_x) / n
# Calculate R-squared
mean_y = sum_y / n
ss_tot = sum((y - mean_y) ** 2 for y in log_times_array)
ss_res = sum((y - (slope * x + intercept)) ** 2
for x, y in zip(log_count_array, log_times_array))
r_squared = 1 - (ss_res / ss_tot)
# Check that the slope is close to 1. For log-transformed data, this means a linear
# relationship (a quadratic term would make the slope 2).
# In practice, we see slope even less 1 (because there is a non-trivial constant term), so we
# can assert things a bit lower than 1: 0.86 to 1.04.
err_msg = "Time is non-linear: slope {} R^2 {}".format(slope, r_squared)
self.assertAlmostEqual(slope, 0.95, delta=0.09, msg=err_msg)
# Check that R^2 is close to 1, meaning that data is very close to that line (of slope ~1).
self.assertAlmostEqual(r_squared, 1, delta=0.08, msg=err_msg)

View File

@ -0,0 +1,514 @@
import datetime
import logging
import moment
import testutil
import test_engine
from table import make_sort_spec
log = logging.getLogger(__name__)
def D(year, month, day):
return moment.date_to_ts(datetime.date(year, month, day))
class TestLookupSort(test_engine.EngineTestCase):
def do_setup(self, order_by_arg):
self.load_sample(testutil.parse_test_sample({
"SCHEMA": [
[1, "Customers", [
[11, "Name", "Text", False, "", "", ""],
[12, "Lookup", "RefList:Purchases", True,
"Purchases.lookupRecords(Customer=$id, %s)" % order_by_arg, "", ""],
[13, "LookupAmount", "Any", True,
"Purchases.lookupRecords(Customer=$id, %s).Amount" % order_by_arg, "", ""],
[14, "LookupDotAmount", "Any", True, "$Lookup.Amount", "", ""],
[15, "LookupContains", "RefList:Purchases", True,
"Purchases.lookupRecords(Customer=$id, Tags=CONTAINS('foo'), %s)" % order_by_arg,
"", ""],
[16, "LookupContainsDotAmount", "Any", True, "$LookupContains.Amount", "", ""],
]],
[2, "Purchases", [
[21, "Customer", "Ref:Customers", False, "", "", ""],
[22, "Date", "Date", False, "", "", ""],
[23, "Tags", "ChoiceList", False, "", "", ""],
[24, "Category", "Text", False, "", "", ""],
[25, "Amount", "Numeric", False, "", "", ""],
]],
],
"DATA": {
"Customers": [
["id", "Name"],
[1, "Alice"],
[2, "Bob"],
],
"Purchases": [
[ "id", "Customer", "Date", "Tags", "Category", "Amount", ],
# Note: the tenths digit of Amount corresponds to day, for easier ordering of expected
# sort results.
[1, 1, D(2023,12,1), ["foo"], "A", 10.1],
[2, 2, D(2023,12,4), ["foo"], "A", 17.4],
[3, 1, D(2023,12,3), ["bar"], "A", 20.3],
[4, 1, D(2023,12,9), ["foo", "bar"], "A", 40.9],
[5, 1, D(2023,12,2), ["foo", "bar"], "B", 80.2],
[6, 1, D(2023,12,6), ["bar"], "B", 160.6],
[7, 1, D(2023,12,7), ["foo"], "A", 320.7],
[8, 1, D(2023,12,5), ["bar", "foo"], "A", 640.5],
],
}
}))
def test_make_sort_spec(self):
"""
Test interpretations of different kinds of order_by and sort_by params.
"""
# Test the default for Table.lookupRecords.
self.assertEqual(make_sort_spec(('id',), None, True), ())
self.assertEqual(make_sort_spec(('id',), None, False), ())
# Test legacy sort_by
self.assertEqual(make_sort_spec(('Doh',), 'Foo', True), ('Foo',))
self.assertEqual(make_sort_spec(None, '-Foo', False), ('-Foo',))
# Test None, string, tuple, without manualSort.
self.assertEqual(make_sort_spec(None, None, False), ())
self.assertEqual(make_sort_spec('Bar', None, False), ('Bar',))
self.assertEqual(make_sort_spec(('Foo', '-Bar'), None, False), ('Foo', '-Bar'))
# Test None, string, tuple, WITH manualSort.
self.assertEqual(make_sort_spec(None, None, True), ('manualSort',))
self.assertEqual(make_sort_spec('Bar', None, True), ('Bar', 'manualSort'))
self.assertEqual(make_sort_spec(('Foo', '-Bar'), None, True), ('Foo', '-Bar', 'manualSort'))
# If 'manualSort' is present, should not be added twice.
self.assertEqual(make_sort_spec(('Foo', 'manualSort'), None, True), ('Foo', 'manualSort'))
# If 'id' is present, fields starting with it are dropped.
self.assertEqual(make_sort_spec(('Bar', 'id'), None, True), ('Bar',))
self.assertEqual(make_sort_spec(('Foo', 'id', 'manualSort', 'X'), None, True), ('Foo',))
self.assertEqual(make_sort_spec('id', None, True), ())
def test_lookup_sort_by_default(self):
"""
Tests lookups with default sort (by row_id) using sort_by=None, and how it reacts to changes.
"""
self.do_setup('sort_by=None')
self._do_test_lookup_sort_by_default()
def test_lookup_order_by_none(self):
# order_by=None means default to manualSort. But this test case should not be affected.
self.do_setup('order_by=None')
self._do_test_lookup_sort_by_default()
def _do_test_lookup_sort_by_default(self):
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [1, 3, 4, 5, 6, 7, 8],
LookupAmount = [10.1, 20.3, 40.9, 80.2, 160.6, 320.7, 640.5],
LookupDotAmount = [10.1, 20.3, 40.9, 80.2, 160.6, 320.7, 640.5],
LookupContains = [1, 4, 5, 7, 8],
LookupContainsDotAmount = [10.1, 40.9, 80.2, 320.7, 640.5],
)
])
# Change Customer of Purchase #2 (Bob -> Alice) and check that all got updated.
# (The list of purchases for Alice gets the new purchase #2.)
out_actions = self.update_record("Purchases", 2, Customer=1)
self.assertEqual(out_actions.calls["Customers"], {
"Lookup": 2, "LookupAmount": 2, "LookupDotAmount": 2,
"LookupContains": 2, "LookupContainsDotAmount": 2,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [1, 2, 3, 4, 5, 6, 7, 8],
LookupAmount = [10.1, 17.4, 20.3, 40.9, 80.2, 160.6, 320.7, 640.5],
LookupDotAmount = [10.1, 17.4, 20.3, 40.9, 80.2, 160.6, 320.7, 640.5],
LookupContains = [1, 2, 4, 5, 7, 8],
LookupContainsDotAmount = [10.1, 17.4, 40.9, 80.2, 320.7, 640.5],
)
])
# Change Customer of Purchase #1 (Alice -> Bob) and check that all got updated.
# (The list of purchases for Alice loses the purchase #1.)
out_actions = self.update_record("Purchases", 1, Customer=2)
self.assertEqual(out_actions.calls["Customers"], {
"Lookup": 2, "LookupAmount": 2, "LookupDotAmount": 2,
"LookupContains": 2, "LookupContainsDotAmount": 2,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [2, 3, 4, 5, 6, 7, 8],
LookupAmount = [17.4, 20.3, 40.9, 80.2, 160.6, 320.7, 640.5],
LookupDotAmount = [17.4, 20.3, 40.9, 80.2, 160.6, 320.7, 640.5],
LookupContains = [2, 4, 5, 7, 8],
LookupContainsDotAmount = [17.4, 40.9, 80.2, 320.7, 640.5],
)
])
# Change Date of Purchase #3 to much earlier, and check that all got updated.
out_actions = self.update_record("Purchases", 3, Date=D(2023,8,1))
# Nothing to recompute in this case, since it doesn't depend on Date.
self.assertEqual(out_actions.calls.get("Customers"), None)
# Change Amount of Purchase #3 to much larger, and check that just amounts got updated.
out_actions = self.update_record("Purchases", 3, Amount=999999)
self.assertEqual(out_actions.calls["Customers"], {
# Lookups that don't depend on Amount aren't recalculated
"LookupAmount": 1, "LookupDotAmount": 1,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [2, 3, 4, 5, 6, 7, 8],
LookupAmount = [17.4, 999999, 40.9, 80.2, 160.6, 320.7, 640.5],
LookupDotAmount = [17.4, 999999, 40.9, 80.2, 160.6, 320.7, 640.5],
LookupContains = [2, 4, 5, 7, 8],
LookupContainsDotAmount = [17.4, 40.9, 80.2, 320.7, 640.5],
)
])
def test_lookup_sort_by_date(self):
"""
Tests lookups with sort by "-Date", and how it reacts to changes.
"""
self.do_setup('sort_by="-Date"')
self._do_test_lookup_sort_by_date()
def test_lookup_order_by_date(self):
# With order_by, we'll fall back to manualSort, but this shouldn't matter here.
self.do_setup('order_by="-Date"')
self._do_test_lookup_sort_by_date()
def _do_test_lookup_sort_by_date(self):
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 6, 8, 3, 5, 1],
LookupAmount = [40.9, 320.7, 160.6, 640.5, 20.3, 80.2, 10.1],
LookupDotAmount = [40.9, 320.7, 160.6, 640.5, 20.3, 80.2, 10.1],
LookupContains = [4, 7, 8, 5, 1],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 80.2, 10.1],
)
])
# Change Customer of Purchase #2 (Bob -> Alice) and check that all got updated.
# (The list of purchases for Alice gets the new purchase #2.)
out_actions = self.update_record("Purchases", 2, Customer=1)
self.assertEqual(out_actions.calls["Customers"], {
"Lookup": 2, "LookupAmount": 2, "LookupDotAmount": 2,
"LookupContains": 2, "LookupContainsDotAmount": 2,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 6, 8, 2, 3, 5, 1],
LookupAmount = [40.9, 320.7, 160.6, 640.5, 17.4, 20.3, 80.2, 10.1],
LookupDotAmount = [40.9, 320.7, 160.6, 640.5, 17.4, 20.3, 80.2, 10.1],
LookupContains = [4, 7, 8, 2, 5, 1],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 80.2, 10.1],
)
])
# Change Customer of Purchase #1 (Alice -> Bob) and check that all got updated.
# (The list of purchases for Alice loses the purchase #1.)
out_actions = self.update_record("Purchases", 1, Customer=2)
self.assertEqual(out_actions.calls["Customers"], {
"Lookup": 2, "LookupAmount": 2, "LookupDotAmount": 2,
"LookupContains": 2, "LookupContainsDotAmount": 2,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 6, 8, 2, 3, 5],
LookupAmount = [40.9, 320.7, 160.6, 640.5, 17.4, 20.3, 80.2],
LookupDotAmount = [40.9, 320.7, 160.6, 640.5, 17.4, 20.3, 80.2],
LookupContains = [4, 7, 8, 2, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 80.2],
)
])
# Change Date of Purchase #3 to much earlier, and check that all got updated.
out_actions = self.update_record("Purchases", 3, Date=D(2023,8,1))
self.assertEqual(out_actions.calls.get("Customers"), {
# Only the affected lookups are affected
"Lookup": 1, "LookupAmount": 1, "LookupDotAmount": 1
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 6, 8, 2, 5, 3],
LookupAmount = [40.9, 320.7, 160.6, 640.5, 17.4, 80.2, 20.3],
LookupDotAmount = [40.9, 320.7, 160.6, 640.5, 17.4, 80.2, 20.3],
LookupContains = [4, 7, 8, 2, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 80.2],
)
])
# Change Amount of Purchase #3 to much larger, and check that just amounts got updated.
out_actions = self.update_record("Purchases", 3, Amount=999999)
self.assertEqual(out_actions.calls["Customers"], {
# Lookups that don't depend on Amount aren't recalculated
"LookupAmount": 1, "LookupDotAmount": 1,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 6, 8, 2, 5, 3],
LookupAmount = [40.9, 320.7, 160.6, 640.5, 17.4, 80.2, 999999],
LookupDotAmount = [40.9, 320.7, 160.6, 640.5, 17.4, 80.2, 999999],
LookupContains = [4, 7, 8, 2, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 80.2],
)
])
def test_lookup_order_by_tuple(self):
"""
Tests lookups with order by ("Category", "-Date"), and how it reacts to changes.
"""
self.do_setup('order_by=("Category", "-Date")')
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 8, 3, 1, 6, 5],
LookupAmount = [40.9, 320.7, 640.5, 20.3, 10.1, 160.6, 80.2],
LookupDotAmount = [40.9, 320.7, 640.5, 20.3, 10.1, 160.6, 80.2],
LookupContains = [4, 7, 8, 1, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 10.1, 80.2],
)
])
# Change Customer of Purchase #2 (Bob -> Alice) and check that all got updated.
# (The list of purchases for Alice gets the new purchase #2.)
out_actions = self.update_record("Purchases", 2, Customer=1)
self.assertEqual(out_actions.calls["Customers"], {
"Lookup": 2, "LookupAmount": 2, "LookupDotAmount": 2,
"LookupContains": 2, "LookupContainsDotAmount": 2,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 8, 2, 3, 1, 6, 5],
LookupAmount = [40.9, 320.7, 640.5, 17.4, 20.3, 10.1, 160.6, 80.2],
LookupDotAmount = [40.9, 320.7, 640.5, 17.4, 20.3, 10.1, 160.6, 80.2],
LookupContains = [4, 7, 8, 2, 1, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 10.1, 80.2],
)
])
# Change Customer of Purchase #1 (Alice -> Bob) and check that all got updated.
# (The list of purchases for Alice loses the purchase #1.)
out_actions = self.update_record("Purchases", 1, Customer=2)
self.assertEqual(out_actions.calls["Customers"], {
"Lookup": 2, "LookupAmount": 2, "LookupDotAmount": 2,
"LookupContains": 2, "LookupContainsDotAmount": 2,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 8, 2, 3, 6, 5],
LookupAmount = [40.9, 320.7, 640.5, 17.4, 20.3, 160.6, 80.2],
LookupDotAmount = [40.9, 320.7, 640.5, 17.4, 20.3, 160.6, 80.2],
LookupContains = [4, 7, 8, 2, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 80.2],
)
])
# Change Date of Purchase #3 to much earlier, and check that all got updated.
out_actions = self.update_record("Purchases", 3, Date=D(2023,8,1))
self.assertEqual(out_actions.calls.get("Customers"), {
# Only the affected lookups are affected
"Lookup": 1, "LookupAmount": 1, "LookupDotAmount": 1
})
# Actually this happens to be unchanged, because within the category, the new date is still in
# the same position.
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 8, 2, 3, 6, 5],
LookupAmount = [40.9, 320.7, 640.5, 17.4, 20.3, 160.6, 80.2],
LookupDotAmount = [40.9, 320.7, 640.5, 17.4, 20.3, 160.6, 80.2],
LookupContains = [4, 7, 8, 2, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 80.2],
)
])
# Change Category of Purchase #3 to "B", and check that it got moved.
out_actions = self.update_record("Purchases", 3, Category="B")
self.assertEqual(out_actions.calls.get("Customers"), {
# Only the affected lookups are affected
"Lookup": 1, "LookupAmount": 1, "LookupDotAmount": 1
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 8, 2, 6, 5, 3],
LookupAmount = [40.9, 320.7, 640.5, 17.4, 160.6, 80.2, 20.3],
LookupDotAmount = [40.9, 320.7, 640.5, 17.4, 160.6, 80.2, 20.3],
LookupContains = [4, 7, 8, 2, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 80.2],
)
])
# Change Amount of Purchase #3 to much larger, and check that just amounts got updated.
out_actions = self.update_record("Purchases", 3, Amount=999999)
self.assertEqual(out_actions.calls["Customers"], {
# Lookups that don't depend on Amount aren't recalculated
"LookupAmount": 1, "LookupDotAmount": 1,
})
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 8, 2, 6, 5, 3],
LookupAmount = [40.9, 320.7, 640.5, 17.4, 160.6, 80.2, 999999],
LookupDotAmount = [40.9, 320.7, 640.5, 17.4, 160.6, 80.2, 999999],
LookupContains = [4, 7, 8, 2, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 17.4, 80.2],
)
])
def test_lookup_one(self):
self.do_setup('order_by=None')
# Check that the first value returned by default is the one with the lowest row ID.
self.add_column('Customers', 'One', type="Ref:Purchases",
formula="Purchases.lookupOne(Customer=$id)")
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(id = 1, Name = "Alice", One = 1),
dict(id = 2, Name = "Bob", One = 2),
])
# Check that the first value returned with "-Date" is the one with the highest Date.
self.modify_column('Customers', 'One',
formula="Purchases.lookupOne(Customer=$id, order_by=('-Date',))")
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(id = 1, Name = "Alice", One = 4),
dict(id = 2, Name = "Bob", One = 2),
])
# Check that the first value returned with "-id" is the one with the highest row ID.
self.modify_column('Customers', 'One',
formula="Purchases.lookupOne(Customer=$id, order_by='-id')")
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(id = 1, Name = "Alice", One = 8),
dict(id = 2, Name = "Bob", One = 2),
])
def test_renaming_order_by_str(self):
# Given some lookups with order_by, rename a column used in order_by. Check order_by got
# adjusted, and the results are correct. Try for order_by as string.
self.do_setup("order_by='-Date'")
self.apply_user_action(['RenameColumn', 'Purchases', 'Category', 'cat'])
self.apply_user_action(['RenameColumn', 'Purchases', 'Date', 'Fecha'])
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=12, colId="Lookup",
formula="Purchases.lookupRecords(Customer=$id, order_by='-Fecha')"),
dict(id=13, colId="LookupAmount",
formula="Purchases.lookupRecords(Customer=$id, order_by='-Fecha').Amount"),
])
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 6, 8, 3, 5, 1],
LookupAmount = [40.9, 320.7, 160.6, 640.5, 20.3, 80.2, 10.1],
LookupDotAmount = [40.9, 320.7, 160.6, 640.5, 20.3, 80.2, 10.1],
LookupContains = [4, 7, 8, 5, 1],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 80.2, 10.1],
)
])
# Change the (renamed) Date of Purchase #1 to much later, and check that all got updated.
self.update_record("Purchases", 1, Fecha=D(2024,12,31))
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [1, 4, 7, 6, 8, 3, 5],
LookupAmount = [10.1, 40.9, 320.7, 160.6, 640.5, 20.3, 80.2],
LookupDotAmount = [10.1, 40.9, 320.7, 160.6, 640.5, 20.3, 80.2],
LookupContains = [1, 4, 7, 8, 5],
LookupContainsDotAmount = [10.1, 40.9, 320.7, 640.5, 80.2],
)
])
def test_renaming_order_by_tuple(self):
# Given some lookups with order_by, rename a column used in order_by. Check order_by got
# adjusted, and the results are correct. Try for order_by as tuple.
self.do_setup("order_by=('Category', '-Date')")
out_actions = self.apply_user_action(['RenameColumn', 'Purchases', 'Category', 'cat'])
# Check returned actions to ensure we don't produce actions for any stale lookup helper columns
# (this is a way to check that we don't forget to clean up stale lookup helper columns).
# pylint: disable=line-too-long
self.assertPartialOutActions(out_actions, {
"stored": [
["RenameColumn", "Purchases", "Category", "cat"],
["ModifyColumn", "Customers", "Lookup", {"formula": "Purchases.lookupRecords(Customer=$id, order_by=('cat', '-Date'))"}],
["ModifyColumn", "Customers", "LookupAmount", {"formula": "Purchases.lookupRecords(Customer=$id, order_by=('cat', '-Date')).Amount"}],
["ModifyColumn", "Customers", "LookupContains", {"formula": "Purchases.lookupRecords(Customer=$id, Tags=CONTAINS('foo'), order_by=('cat', '-Date'))"}],
["BulkUpdateRecord", "_grist_Tables_column", [24, 12, 13, 15], {"colId": ["cat", "Lookup", "LookupAmount", "LookupContains"], "formula": [
"",
"Purchases.lookupRecords(Customer=$id, order_by=('cat', '-Date'))",
"Purchases.lookupRecords(Customer=$id, order_by=('cat', '-Date')).Amount",
"Purchases.lookupRecords(Customer=$id, Tags=CONTAINS('foo'), order_by=('cat', '-Date'))",
]}],
]
})
self.apply_user_action(['RenameColumn', 'Purchases', 'Date', 'Fecha'])
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=12, colId="Lookup",
formula="Purchases.lookupRecords(Customer=$id, order_by=('cat', '-Fecha'))"),
dict(id=13, colId="LookupAmount",
formula="Purchases.lookupRecords(Customer=$id, order_by=('cat', '-Fecha')).Amount"),
])
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 8, 3, 1, 6, 5],
LookupAmount = [40.9, 320.7, 640.5, 20.3, 10.1, 160.6, 80.2],
LookupDotAmount = [40.9, 320.7, 640.5, 20.3, 10.1, 160.6, 80.2],
LookupContains = [4, 7, 8, 1, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 10.1, 80.2],
)
])
# Change the (renamed) Date of Purchase #3 to much earlier, and check that all got updated.
self.update_record("Purchases", 3, Fecha=D(2023,8,1))
self.assertTableData("Customers", cols="subset", rows="subset", data=[
dict(
id = 1,
Name = "Alice",
Lookup = [4, 7, 8, 1, 3, 6, 5],
LookupAmount = [40.9, 320.7, 640.5, 10.1, 20.3, 160.6, 80.2],
LookupDotAmount = [40.9, 320.7, 640.5, 10.1, 20.3, 160.6, 80.2],
LookupContains = [4, 7, 8, 1, 5],
LookupContainsDotAmount = [40.9, 320.7, 640.5, 10.1, 80.2],
)
])

View File

@ -773,6 +773,13 @@ return ",".join(str(r.id) for r in Students.lookupRecords(firstName=fn, lastName
[9, "lookup_max_num", [9, "lookup_max_num",
"Any", True, "Any", True,
"Table1.lookupOne(is_num=True, sort_by='-num').num", "", ""], "Table1.lookupOne(is_num=True, sort_by='-num').num", "", ""],
[10, "lookup_2a", "Any", True,
"Table1.lookupRecords(order_by=('is_num', 'num')).num", "", ""],
[10, "lookup_2b", "Any", True,
"Table1.lookupRecords(order_by=('is_num', '-num')).num", "", ""],
[10, "lookup_2c", "Any", True,
"Table1.lookupRecords(order_by=('-is_num', 'num')).num", "", ""],
]] ]]
], ],
"DATA": { "DATA": {
@ -795,13 +802,42 @@ return ",".join(str(r.id) for r in Students.lookupRecords(firstName=fn, lastName
"lookup_reverse", "lookup_reverse",
"lookup_first", "lookup_first",
"lookup_min", "lookup_min_num", "lookup_min", "lookup_min_num",
"lookup_max", "lookup_max_num"], "lookup_max", "lookup_max_num",
"lookup_2a", "lookup_2b", "lookup_2c"],
[1, [1,
[None, 0, 1, 2, 3, 'foo'], [None, 0, 1, 2, 3, 'foo'],
['foo', 3, 2, 1, 0, None], ['foo', 3, 2, 1, 0, None],
2, # lookup_first: first record (by id) 2, # lookup_first: first record (by id)
None, 0, # lookup_min[_num] None, 0, # lookup_min[_num]
'foo', 3], # lookup_max[_num] 'foo', 3, # lookup_max[_num]
[None, 'foo', 0, 1, 2, 3], # lookup_2a ('is_num', 'num')
['foo', None, 3, 2, 1, 0], # lookup_2b ('is_num', '-num')
[0, 1, 2, 3, None, 'foo'], # lookup_2c ('-is_num', 'num')
]
])
# Ensure that changes in values used for sorting result in updates,
# and produce correctly sorted updates.
self.update_record("Table1", 2, num=100)
self.assertTableData(
"Table1", cols="subset", rows="subset", data=[
["id",
"lookup",
"lookup_reverse",
"lookup_first",
"lookup_min", "lookup_min_num",
"lookup_max", "lookup_max_num",
"lookup_2a", "lookup_2b", "lookup_2c"],
[1,
[None, 0, 2, 3, 100, 'foo'],
['foo', 100, 3, 2, 0, None],
2, # lookup_first: first record (by id)
None, 0, # lookup_min[_num]
'foo', 100, # lookup_max[_num]
[None, 'foo', 0, 2, 3, 100], # lookup_2a ('is_num', 'num')
['foo', None, 100, 3, 2, 0], # lookup_2b ('is_num', '-num')
[0, 2, 3, 100, None, 'foo'], # lookup_2c ('-is_num', 'num')
]
]) ])
def test_conversion(self): def test_conversion(self):

View File

@ -0,0 +1,389 @@
import datetime
import functools
import itertools
import logging
import unittest
import six
import actions
from column import SafeSortKey
import moment
import objtypes
import testutil
import test_engine
log = logging.getLogger(__name__)
def D(year, month, day):
return moment.date_to_ts(datetime.date(year, month, day))
class TestPrevNext(test_engine.EngineTestCase):
def do_setup(self):
self.load_sample(testutil.parse_test_sample({
"SCHEMA": [
[1, "Customers", [
[11, "Name", "Text", False, "", "", ""],
]],
[2, "Purchases", [
[20, "manualSort", "PositionNumber", False, "", "", ""],
[21, "Customer", "Ref:Customers", False, "", "", ""],
[22, "Date", "Date", False, "", "", ""],
[24, "Category", "Text", False, "", "", ""],
[25, "Amount", "Numeric", False, "", "", ""],
[26, "Prev", "Ref:Purchases", True, "None", "", ""], # To be filled
[27, "Cumul", "Numeric", True, "$Prev.Cumul + $Amount", "", ""],
]],
],
"DATA": {
"Customers": [
["id", "Name"],
[1, "Alice"],
[2, "Bob"],
],
"Purchases": [
[ "id", "manualSort", "Customer", "Date", "Category", "Amount", ],
[1, 1.0, 1, D(2023,12,1), "A", 10],
[2, 2.0, 2, D(2023,12,4), "A", 17],
[3, 3.0, 1, D(2023,12,3), "A", 20],
[4, 4.0, 1, D(2023,12,9), "A", 40],
[5, 5.0, 1, D(2023,12,2), "B", 80],
[6, 6.0, 1, D(2023,12,6), "B", 160],
[7, 7.0, 1, D(2023,12,7), "A", 320],
[8, 8.0, 1, D(2023,12,5), "A", 640],
],
}
}))
def calc_expected(self, group_key=None, sort_key=None, sort_reverse=False):
# Returns expected {id, Prev, Cumul} values from Purchases table calculated according to the
# given grouping and sorting parameters.
group_key = group_key or (lambda r: 0)
data = list(actions.transpose_bulk_action(self.engine.fetch_table('Purchases')))
expected = []
sorted_data = sorted(data, key=sort_key, reverse=sort_reverse)
sorted_data = sorted(sorted_data, key=group_key)
for key, group in itertools.groupby(sorted_data, key=group_key):
prev = 0
cumul = 0.0
for r in group:
cumul = round(cumul + r.Amount, 2)
expected.append({"id": r.id, "Prev": prev, "Cumul": cumul})
prev = r.id
expected.sort(key=lambda r: r["id"])
return expected
def do_test(self, formula, group_key=None, sort_key=None, sort_reverse=False):
calc_expected = lambda: self.calc_expected(
group_key=group_key, sort_key=sort_key, sort_reverse=sort_reverse)
def assertPrevValid():
# Check that Prev column is legitimate values, e.g. not errors.
prev = self.engine.fetch_table('Purchases').columns["Prev"]
self.assertTrue(is_all_ints(prev), "Prev column contains invalid values: %s" %
[objtypes.encode_object(x) for x in prev])
# This verification works as follows:
# (1) Set "Prev" column to the specified formula.
# (2) Calculate expected values for "Prev" and "Cumul" manually, and compare to reality.
# (3) Try a few actions that affect the data, and calculate again.
self.do_setup()
self.modify_column('Purchases', 'Prev', formula=formula)
# Check the initial data.
assertPrevValid()
self.assertTableData('Purchases', cols="subset", data=calc_expected())
# Check the result after removing a record.
self.remove_record('Purchases', 6)
self.assertTableData('Purchases', cols="subset", data=calc_expected())
# Check the result after updating a record
self.update_record('Purchases', 5, Amount=1080) # original value +1000
self.assertTableData('Purchases', cols="subset", data=calc_expected())
first_date = D(2023, 8, 1)
# Update a few other records
self.update_record("Purchases", 2, Customer=1)
self.update_record("Purchases", 1, Customer=2)
self.update_record("Purchases", 3, Date=first_date) # becomes earliest in date order
assertPrevValid()
self.assertTableData('Purchases', cols="subset", data=calc_expected())
# Check the result after re-adding a record
# Note that Date here matches new date of record #3. This tests sort fallback to rowId.
# Amount is the original amount +1.
self.add_record('Purchases', 6, manualSort=6.0, Date=first_date, Amount=161)
self.assertTableData('Purchases', cols="subset", data=calc_expected())
# Update the manualSort value to test how it affects sort results.
self.update_record('Purchases', 6, manualSort=0.5)
self.assertTableData('Purchases', cols="subset", data=calc_expected())
assertPrevValid()
def do_test_prevnext(self, formula, group_key=None, sort_key=None, sort_reverse=False):
# Run do_test() AND also repeat it after replacing PREVIOUS with NEXT in formula, and
# reversing the expected results.
# Note that this is a bit fragile: it relies on do_test() being limited to only the kinds of
# changes that would be reset by another call to self.load_sample().
with self.subTest(formula=formula): # pylint: disable=no-member
self.do_test(formula, group_key=group_key, sort_key=sort_key, sort_reverse=sort_reverse)
nformula = formula.replace('PREVIOUS', 'NEXT')
with self.subTest(formula=nformula): # pylint: disable=no-member
self.do_test(nformula, group_key=group_key, sort_key=sort_key, sort_reverse=not sort_reverse)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_none(self):
self.do_test_prevnext("PREVIOUS(rec, order_by=None)", group_key=None,
sort_key=lambda r: r.manualSort)
# Check that order_by arg is required (get TypeError without it).
with self.assertRaisesRegex(AssertionError, r'Prev column contains invalid values:.*TypeError'):
self.do_test("PREVIOUS(rec)", sort_key=lambda r: -r.id)
# These assertions are just to ensure that do_test() tests do exercise the feature being
# tested, i.e. fail when comparisons are NOT correct.
with self.assertRaisesRegex(AssertionError, r'Observed data not as expected'):
self.do_test("PREVIOUS(rec, order_by=None)", sort_key=lambda r: -r.id)
with self.assertRaisesRegex(AssertionError, r'Observed data not as expected'):
self.do_test("PREVIOUS(rec, order_by=None)", group_key=(lambda r: r.Customer),
sort_key=(lambda r: r.id))
# Make sure the test case above exercises the disambiguation by 'manualSort' (i.e. fails if
# 'manualSort' isn't used to disambiguate).
with self.assertRaisesRegex(AssertionError, r'Observed data not as expected'):
self.do_test("PREVIOUS(rec, order_by=None)", sort_key=lambda r: r.id)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_date(self):
self.do_test_prevnext("PREVIOUS(rec, order_by='Date')",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), r.manualSort))
# Make sure the test case above exercises the disambiguation by 'manualSort' (i.e. fails if it
# isn't used to disambiguate).
with self.assertRaisesRegex(AssertionError, r'Observed data not as expected'):
self.do_test("PREVIOUS(rec, order_by='Date')",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), r.id))
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_date_manualsort(self):
# Same as the previous test case (with just 'Date'), but specifies 'manualSort' explicitly.
self.do_test_prevnext("PREVIOUS(rec, order_by=('Date', 'manualSort'))",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), r.manualSort))
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_rdate(self):
self.do_test_prevnext("PREVIOUS(rec, order_by='-Date')",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), -r.manualSort), sort_reverse=True)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_rdate_id(self):
self.do_test_prevnext("PREVIOUS(rec, order_by=('-Date', 'id'))",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), -r.id), sort_reverse=True)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_customer_rdate(self):
self.do_test_prevnext("PREVIOUS(rec, group_by=('Customer',), order_by='-Date')",
group_key=(lambda r: r.Customer), sort_key=lambda r: (SafeSortKey(r.Date), -r.id),
sort_reverse=True)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_category_date(self):
self.do_test_prevnext("PREVIOUS(rec, group_by=('Category',), order_by='Date')",
group_key=(lambda r: r.Category), sort_key=lambda r: SafeSortKey(r.Date))
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_category_date2(self):
self.do_test_prevnext("PREVIOUS(rec, group_by='Category', order_by='Date')",
group_key=(lambda r: r.Category), sort_key=lambda r: SafeSortKey(r.Date))
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_n_cat_date(self):
self.do_test_prevnext("PREVIOUS(rec, order_by=('Category', 'Date'))",
sort_key=lambda r: (SafeSortKey(r.Category), SafeSortKey(r.Date)))
@unittest.skipUnless(six.PY2, "Python 2 only")
def test_prevnext_py2(self):
# On Python2, we expect NEXT/PREVIOUS to raise a NotImplementedError. It's not hard to make
# it work, but the stricter argument syntax supported by Python3 is helpful, and we'd like
# to drop Python2 support anyway.
self.do_setup()
self.modify_column('Purchases', 'Prev', formula='PREVIOUS(rec, order_by=None)')
self.add_column('Purchases', 'Next', formula="NEXT(rec, group_by='Category', order_by='Date')")
self.add_column('Purchases', 'Rank', formula="RANK(rec, order_by='Date', order='desc')")
# Check that all values are the expected exception.
err = objtypes.RaisedException(NotImplementedError())
self.assertTableData('Purchases', cols="subset", data=[
dict(id=r, Prev=err, Next=err, Rank=err, Cumul=err) for r in range(1, 9)
])
def do_test_renames(self, formula, renamed_formula, calc_expected_pre, calc_expected_post):
self.do_setup()
self.modify_column('Purchases', 'Prev', formula=formula)
# Check the initial data.
self.assertTableData('Purchases', cols="subset", data=calc_expected_pre())
# Do the renames
self.apply_user_action(['RenameColumn', 'Purchases', 'Category', 'cat'])
self.apply_user_action(['RenameColumn', 'Purchases', 'Date', 'Fecha'])
self.apply_user_action(['RenameColumn', 'Purchases', 'Customer', 'person'])
# Check that rename worked.
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=26, colId="Prev", formula=renamed_formula)
])
# Check that data is as expected, and reacts to changes.
self.assertTableData('Purchases', cols="subset", data=calc_expected_post())
self.update_record("Purchases", 1, cat="B")
self.assertTableData('Purchases', cols="subset", data=calc_expected_post())
self.update_record("Purchases", 3, Fecha=D(2023,8,1))
self.assertTableData('Purchases', cols="subset", data=calc_expected_post())
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_renaming_prev_str(self):
self.do_test_renaming_prevnext_str("PREVIOUS")
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_renaming_next_str(self):
self.do_test_renaming_prevnext_str("NEXT")
def do_test_renaming_prevnext_str(self, func):
# Given some PREVIOUS/NEXT calls with group_by and order_by, rename columns mentioned there,
# and check columns get adjusted and data remains correct.
formula = "{}(rec, group_by='Category', order_by='Date')".format(func)
renamed_formula = "{}(rec, group_by='cat', order_by='Fecha')".format(func)
self.do_test_renames(formula, renamed_formula,
calc_expected_pre = functools.partial(self.calc_expected,
group_key=(lambda r: r.Category), sort_key=lambda r: SafeSortKey(r.Date),
sort_reverse=(func == 'NEXT')
),
calc_expected_post = functools.partial(self.calc_expected,
group_key=(lambda r: r.cat), sort_key=lambda r: SafeSortKey(r.Fecha),
sort_reverse=(func == 'NEXT')
),
)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_renaming_prev_tuple(self):
self.do_test_renaming_prevnext_tuple('PREVIOUS')
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_renaming_next_tuple(self):
self.do_test_renaming_prevnext_tuple('NEXT')
def do_test_renaming_prevnext_tuple(self, func):
formula = "{}(rec, group_by=('Customer',), order_by=('Category', '-Date'))".format(func)
renamed_formula = "{}(rec, group_by=('person',), order_by=('cat', '-Fecha'))".format(func)
# To handle "-" prefix for Date.
class Reverse(object):
def __init__(self, key):
self.key = key
def __lt__(self, other):
return other.key < self.key
self.do_test_renames(formula, renamed_formula,
calc_expected_pre = functools.partial(self.calc_expected,
group_key=(lambda r: r.Customer),
sort_key=lambda r: (SafeSortKey(r.Category), Reverse(SafeSortKey(r.Date))),
sort_reverse=(func == 'NEXT')
),
calc_expected_post = functools.partial(self.calc_expected,
group_key=(lambda r: r.person),
sort_key=lambda r: (SafeSortKey(r.cat), Reverse(SafeSortKey(r.Fecha))),
sort_reverse=(func == 'NEXT')
),
)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_rank(self):
self.do_setup()
formula = "RANK(rec, group_by='Category', order_by='Date')"
self.add_column('Purchases', 'Rank', formula=formula)
self.assertTableData('Purchases', cols="subset", data=[
[ "id", "Date", "Category", "Rank"],
[1, D(2023,12,1), "A", 1 ],
[2, D(2023,12,4), "A", 3 ],
[3, D(2023,12,3), "A", 2 ],
[4, D(2023,12,9), "A", 6 ],
[5, D(2023,12,2), "B", 1 ],
[6, D(2023,12,6), "B", 2 ],
[7, D(2023,12,7), "A", 5 ],
[8, D(2023,12,5), "A", 4 ],
])
formula = "RANK(rec, order_by='Date', order='desc')"
self.modify_column('Purchases', 'Rank', formula=formula)
self.assertTableData('Purchases', cols="subset", data=[
[ "id", "Date", "Category", "Rank"],
[1, D(2023,12,1), "A", 8 ],
[2, D(2023,12,4), "A", 5 ],
[3, D(2023,12,3), "A", 6 ],
[4, D(2023,12,9), "A", 1 ],
[5, D(2023,12,2), "B", 7 ],
[6, D(2023,12,6), "B", 3 ],
[7, D(2023,12,7), "A", 2 ],
[8, D(2023,12,5), "A", 4 ],
])
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_rank_rename(self):
self.do_setup()
self.add_column('Purchases', 'Rank',
formula="RANK(rec, group_by=\"Category\", order_by='Date')")
self.assertTableData('Purchases', cols="subset", data=[
[ "id", "Date", "Category", "Rank"],
[1, D(2023,12,1), "A", 1 ],
[2, D(2023,12,4), "A", 3 ],
[3, D(2023,12,3), "A", 2 ],
[4, D(2023,12,9), "A", 6 ],
[5, D(2023,12,2), "B", 1 ],
[6, D(2023,12,6), "B", 2 ],
[7, D(2023,12,7), "A", 5 ],
[8, D(2023,12,5), "A", 4 ],
])
self.apply_user_action(['RenameColumn', 'Purchases', 'Category', 'cat'])
self.apply_user_action(['RenameColumn', 'Purchases', 'Date', 'when'])
renamed_formula = "RANK(rec, group_by=\"cat\", order_by='when')"
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=28, colId="Rank", formula=renamed_formula)
])
self.assertTableData('Purchases', cols="subset", data=[
[ "id", "when", "cat", "Rank"],
[1, D(2023,12,1), "A", 1 ],
[2, D(2023,12,4), "A", 3 ],
[3, D(2023,12,3), "A", 2 ],
[4, D(2023,12,9), "A", 6 ],
[5, D(2023,12,2), "B", 1 ],
[6, D(2023,12,6), "B", 2 ],
[7, D(2023,12,7), "A", 5 ],
[8, D(2023,12,5), "A", 4 ],
])
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_rename_result_attr(self):
self.do_setup()
self.add_column('Purchases', 'PrevAmount', formula="PREVIOUS(rec, order_by=None).Amount")
self.add_column('Purchases', 'NextAmount', formula="NEXT(rec, order_by=None).Amount")
self.apply_user_action(['RenameColumn', 'Purchases', 'Amount', 'Dollars'])
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=28, colId="PrevAmount", formula="PREVIOUS(rec, order_by=None).Dollars"),
dict(id=29, colId="NextAmount", formula="NEXT(rec, order_by=None).Dollars"),
])
def is_all_ints(array):
return all(isinstance(x, int) for x in array)

View File

@ -0,0 +1,78 @@
import test_engine
import testutil
from sort_key import make_sort_key
class TestSortKey(test_engine.EngineTestCase):
def test_sort_key(self):
# Set up a table with a few rows.
self.load_sample(testutil.parse_test_sample({
"SCHEMA": [
[1, "Values", [
[1, "Date", "Numeric", False, "", "", ""],
[2, "Type", "Text", False, "", "", ""],
]],
],
"DATA": {
"Values": [
["id", "Date", "Type"],
[1, 5, "a"],
[2, 4, "a"],
[3, 5, "b"],
],
}
}))
table = self.engine.tables["Values"]
sort_key1 = make_sort_key(table, ("Date", "-Type"))
sort_key2 = make_sort_key(table, ("-Date", "Type"))
self.assertEqual(sorted([1, 2, 3], key=sort_key1), [2, 3, 1])
self.assertEqual(sorted([1, 2, 3], key=sort_key2), [1, 3, 2])
# Change some values
self.update_record("Values", 2, Date=6)
self.assertEqual(sorted([1, 2, 3], key=sort_key1), [3, 1, 2])
self.assertEqual(sorted([1, 2, 3], key=sort_key2), [2, 1, 3])
def test_column_rename(self):
"""
Make sure that renaming a column to another name and back does not continue using stale
references to the deleted column.
"""
# Note that SortedLookupMapColumn does retain references to the columns it uses for sorting,
# but lookup columns themselves get deleted and rebuilt in these cases (by mysterious voodoo).
# Create a simple table (People) with a couple records.
self.apply_user_action(["AddTable", "People", [
dict(id="Name", type="Text")
]])
self.add_record("People", Name="Alice")
self.add_record("People", Name="Bob")
# Create a separate table that does a lookup in the People table.
self.apply_user_action(["AddTable", "Test", [
dict(id="Lookup1", type="Any", isFormula=True,
formula="People.lookupOne(order_by='-Name').Name"),
dict(id="Lookup2", type="Any", isFormula=True,
formula="People.lookupOne(order_by='Name').Name"),
dict(id="Lookup3", type="Any", isFormula=True,
formula="People.lookupOne(Name='Bob').Name"),
]])
self.add_record("Test")
# Test that lookups return data as expected.
self.assertTableData('Test', cols="subset", data=[
dict(id=1, Lookup1="Bob", Lookup2="Alice", Lookup3="Bob")
])
# Rename a column used for lookups or order_by. Lookup result shouldn't change.
self.apply_user_action(["RenameColumn", "People", "Name", "FullName"])
self.assertTableData('Test', cols="subset", data=[
dict(id=1, Lookup1="Bob", Lookup2="Alice", Lookup3="Bob")
])
# Rename the column back. Lookup result shouldn't change.
self.apply_user_action(["RenameColumn", "People", "FullName", "Name"])
self.assertTableData('Test', cols="subset", data=[
dict(id=1, Lookup1="Bob", Lookup2="Alice", Lookup3="Bob")
])

View File

@ -145,22 +145,22 @@ class TestSummaryChoiceList(EngineTestCase):
{ {
'#summary#Source_summary_choices1': column.ReferenceListColumn, '#summary#Source_summary_choices1': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#Source_summary_choices1', match_empty=no_match_empty)": "#lookup#_Contains(value='#summary#Source_summary_choices1', match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn, lookup.LookupMapColumn,
'#summary#Source_summary_choices1_choices2': column.ReferenceListColumn, '#summary#Source_summary_choices1_choices2': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#Source_summary_choices1_choices2', " "#lookup#_Contains(value='#summary#Source_summary_choices1_choices2', "
"match_empty=no_match_empty)": "match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn, lookup.LookupMapColumn,
# simple summary and lookup # simple summary and lookup
'#summary#Source_summary_other': column.ReferenceColumn, '#summary#Source_summary_other': column.ReferenceColumn,
'#lookup##summary#Source_summary_other': lookup.SimpleLookupMapColumn, '#lookup##summary#Source_summary_other': lookup.LookupMapColumn,
'#summary#Source_summary_choices1_other': column.ReferenceListColumn, '#summary#Source_summary_choices1_other': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#Source_summary_choices1_other', " "#lookup#_Contains(value='#summary#Source_summary_choices1_other', "
"match_empty=no_match_empty)": "match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn, lookup.LookupMapColumn,
"#lookup#": lookup.SimpleLookupMapColumn, "#lookup#": lookup.LookupMapColumn,
} }
) )

View File

@ -86,3 +86,24 @@ class TestTempRowIds(test_engine.EngineTestCase):
"schoolCities": ["E:C", "B:New York", "E:C", "B:New York", "B:New York"]}], "schoolCities": ["E:C", "B:New York", "E:C", "B:New York", "B:New York"]}],
] ]
}) })
def test_update_remove(self):
self.load_sample(testsamples.sample_students)
out_actions = self.engine.apply_user_actions([useractions.from_repr(ua) for ua in (
['AddRecord', 'Students', -1, {'firstName': 'A'}],
['UpdateRecord', 'Students', -1, {'lastName': 'A'}],
['BulkAddRecord', 'Students', [-2, None, -3], {'firstName': ['C', 'D', 'E']}],
['BulkUpdateRecord', 'Students', [-2, -3, -1], {'lastName': ['C', 'E', 'F']}],
['RemoveRecord', 'Students', -2],
)])
self.assertPartialOutActions(out_actions, {
"stored": [
['AddRecord', 'Students', 7, {'firstName': 'A'}],
['UpdateRecord', 'Students', 7, {'lastName': 'A'}],
['BulkAddRecord', 'Students', [8, 9, 10], {'firstName': ['C', 'D', 'E']}],
['BulkUpdateRecord', 'Students', [8, 10, 7], {'lastName': ['C', 'E', 'F']}],
['RemoveRecord', 'Students', 8],
]
})

View File

@ -19,6 +19,17 @@ def table_data_from_rows(table_id, col_names, rows):
return actions.TableData(table_id, column_values.pop('id'), column_values) return actions.TableData(table_id, column_values.pop('id'), column_values)
def table_data_from_row_dicts(table_id, row_dict_list):
"""
Returns a TableData object built from table_id and a list of dictionaries, one per row, mapping
column names to cell values.
"""
col_ids = {'id': None} # Collect the set of col_ids. Use a dict for predictable order.
for row in row_dict_list:
col_ids.update({c: None for c in row})
column_values = {col: [row.get(col) for row in row_dict_list] for col in col_ids}
return actions.TableData(table_id, column_values.pop('id'), column_values)
def parse_testscript(script_path=None): def parse_testscript(script_path=None):
""" """

View File

@ -238,6 +238,28 @@ def _set_remove(container, value):
register_container(set, _set_make, _set_add, _set_remove) register_container(set, _set_make, _set_add, _set_remove)
# A version of `set` that maintains also sorted versions of the set. Used in lookups, to cache the
# sorted lookup results.
class LookupSet(set):
def __init__(self, iterable=[]):
super(LookupSet, self).__init__(list(iterable))
self.sorted_versions = {}
def _LookupSet_make(value):
return LookupSet([value])
def _LookupSet_add(container, value):
if value not in container:
container.add(value)
container.sorted_versions.clear()
return True
return False
def _LookupSet_remove(container, value):
if value in container:
container.discard(value)
container.sorted_versions.clear()
register_container(LookupSet, _LookupSet_make, _LookupSet_add, _LookupSet_remove)
# Allow `list` to be used as a bin type. # Allow `list` to be used as a bin type.
def _list_make(value): def _list_make(value):

View File

@ -457,7 +457,8 @@ class ReferenceList(BaseColumnType):
if isinstance(value, RecordSet): if isinstance(value, RecordSet):
assert value._table.table_id == self.table_id assert value._table.table_id == self.table_id
return objtypes.RecordList(value._row_ids, group_by=value._group_by, sort_by=value._sort_by) return objtypes.RecordList(value._row_ids, group_by=value._group_by, sort_by=value._sort_by,
sort_key=value._sort_key)
elif not value: elif not value:
# Represent an empty ReferenceList as None (also its default value). Formulas will see []. # Represent an empty ReferenceList as None (also its default value). Formulas will see [].
return None return None
@ -465,8 +466,15 @@ class ReferenceList(BaseColumnType):
@classmethod @classmethod
def is_right_type(cls, value): def is_right_type(cls, value):
return value is None or (isinstance(value, list) and # TODO: whenever is_right_type isn't trivial, get_cell_value should just remember the result
all(Reference.is_right_type(val) for val in value)) # rather than recompute it on every access. Actually this applies not only to is_right_type
# but to everything get_cell_value does. It should use minimal-memory minimal-overhead
# translations of raw->rich for valid values, and use what memory it needs but still guarantee
# constant time for invalid values.
return (value is None or
(isinstance(value, objtypes.RecordList)) or
(isinstance(value, list) and all(Reference.is_right_type(val) for val in value))
)
class Attachments(ReferenceList): class Attachments(ReferenceList):