gristlabs_grist-core/sandbox/grist/test_prevnext.py

390 lines
17 KiB
Python
Raw Normal View History

(core) Implement PREVIOUS/NEXT/RANK and lookupRecords().find.* methods. Summary: - `lookupRecords()` now allows efficient search in sorted results, with the syntax `lookupRecords(..., order_by="-Date").find.le($Date)`. This will find the record with the nearest date that's <= `$Date`. - The `find.*` methods are `le`, `lt`, `ge`, `gt`, and `eq`. All have O(log N) performance. - `PREVIOUS(rec, group_by=..., order_by=...)` finds the previous record to rec, according to `group_by` / `order_by`, in amortized O(log N) time. For example, `PREVIOUS(rec, group_by="Account", order_by="Date")`. - `PREVIOUS(rec, order_by=None)` finds the previous record in the full table, sorted by the `manualSort` column, to match the order visible in the unsorted table. - `NEXT(...)` is just like `PREVIOUS(...)` but finds the next record. - `RANK(rec, group_by=..., order_by=..., order="asc")` returns the rank of the record within the group, starting with 1. Order can be `"asc"` (default) or `"desc"`. - The `order_by` argument in `lookupRecords`, and the new functions now supports tuples, as well as the "-" prefix to reverse order, e.g. `("Category", "-Date")`. - New functions are only available in Python3, for a minor reason (to support keyword-only arguments for `group_by` and `order_by`) and also as a nudge to Python2 users to update. - Includes fixes for several situations related to lookups that used to cause quadratic complexity. Test Plan: - New performance check that sorted lookups don't add quadratic complexity. - Tests added for lookup find.* methods, and for PREVIOUS/NEXT/RANK. - Tests added that renaming columns updates `order_by` and `group_by` arguments, and attributes on results (e.g. `PREVIOUS(...).ColId`) appropriately. - Python3 tests can now produce verbose output when VERBOSE=1 and -v are given. Reviewers: jarek, georgegevoian Reviewed By: jarek, georgegevoian Subscribers: paulfitz, jarek Differential Revision: https://phab.getgrist.com/D4265
2024-07-17 03:43:53 +00:00
import datetime
import functools
import itertools
import logging
import unittest
import six
import actions
from column import SafeSortKey
import moment
import objtypes
import testutil
import test_engine
log = logging.getLogger(__name__)
def D(year, month, day):
return moment.date_to_ts(datetime.date(year, month, day))
class TestPrevNext(test_engine.EngineTestCase):
def do_setup(self):
self.load_sample(testutil.parse_test_sample({
"SCHEMA": [
[1, "Customers", [
[11, "Name", "Text", False, "", "", ""],
]],
[2, "Purchases", [
[20, "manualSort", "PositionNumber", False, "", "", ""],
[21, "Customer", "Ref:Customers", False, "", "", ""],
[22, "Date", "Date", False, "", "", ""],
[24, "Category", "Text", False, "", "", ""],
[25, "Amount", "Numeric", False, "", "", ""],
[26, "Prev", "Ref:Purchases", True, "None", "", ""], # To be filled
[27, "Cumul", "Numeric", True, "$Prev.Cumul + $Amount", "", ""],
]],
],
"DATA": {
"Customers": [
["id", "Name"],
[1, "Alice"],
[2, "Bob"],
],
"Purchases": [
[ "id", "manualSort", "Customer", "Date", "Category", "Amount", ],
[1, 1.0, 1, D(2023,12,1), "A", 10],
[2, 2.0, 2, D(2023,12,4), "A", 17],
[3, 3.0, 1, D(2023,12,3), "A", 20],
[4, 4.0, 1, D(2023,12,9), "A", 40],
[5, 5.0, 1, D(2023,12,2), "B", 80],
[6, 6.0, 1, D(2023,12,6), "B", 160],
[7, 7.0, 1, D(2023,12,7), "A", 320],
[8, 8.0, 1, D(2023,12,5), "A", 640],
],
}
}))
def calc_expected(self, group_key=None, sort_key=None, sort_reverse=False):
# Returns expected {id, Prev, Cumul} values from Purchases table calculated according to the
# given grouping and sorting parameters.
group_key = group_key or (lambda r: 0)
data = list(actions.transpose_bulk_action(self.engine.fetch_table('Purchases')))
expected = []
sorted_data = sorted(data, key=sort_key, reverse=sort_reverse)
sorted_data = sorted(sorted_data, key=group_key)
for key, group in itertools.groupby(sorted_data, key=group_key):
prev = 0
cumul = 0.0
for r in group:
cumul = round(cumul + r.Amount, 2)
expected.append({"id": r.id, "Prev": prev, "Cumul": cumul})
prev = r.id
expected.sort(key=lambda r: r["id"])
return expected
def do_test(self, formula, group_key=None, sort_key=None, sort_reverse=False):
calc_expected = lambda: self.calc_expected(
group_key=group_key, sort_key=sort_key, sort_reverse=sort_reverse)
def assertPrevValid():
# Check that Prev column is legitimate values, e.g. not errors.
prev = self.engine.fetch_table('Purchases').columns["Prev"]
self.assertTrue(is_all_ints(prev), "Prev column contains invalid values: %s" %
[objtypes.encode_object(x) for x in prev])
# This verification works as follows:
# (1) Set "Prev" column to the specified formula.
# (2) Calculate expected values for "Prev" and "Cumul" manually, and compare to reality.
# (3) Try a few actions that affect the data, and calculate again.
self.do_setup()
self.modify_column('Purchases', 'Prev', formula=formula)
# Check the initial data.
assertPrevValid()
self.assertTableData('Purchases', cols="subset", data=calc_expected())
# Check the result after removing a record.
self.remove_record('Purchases', 6)
self.assertTableData('Purchases', cols="subset", data=calc_expected())
# Check the result after updating a record
self.update_record('Purchases', 5, Amount=1080) # original value +1000
self.assertTableData('Purchases', cols="subset", data=calc_expected())
first_date = D(2023, 8, 1)
# Update a few other records
self.update_record("Purchases", 2, Customer=1)
self.update_record("Purchases", 1, Customer=2)
self.update_record("Purchases", 3, Date=first_date) # becomes earliest in date order
assertPrevValid()
self.assertTableData('Purchases', cols="subset", data=calc_expected())
# Check the result after re-adding a record
# Note that Date here matches new date of record #3. This tests sort fallback to rowId.
# Amount is the original amount +1.
self.add_record('Purchases', 6, manualSort=6.0, Date=first_date, Amount=161)
self.assertTableData('Purchases', cols="subset", data=calc_expected())
# Update the manualSort value to test how it affects sort results.
self.update_record('Purchases', 6, manualSort=0.5)
self.assertTableData('Purchases', cols="subset", data=calc_expected())
assertPrevValid()
def do_test_prevnext(self, formula, group_key=None, sort_key=None, sort_reverse=False):
# Run do_test() AND also repeat it after replacing PREVIOUS with NEXT in formula, and
# reversing the expected results.
# Note that this is a bit fragile: it relies on do_test() being limited to only the kinds of
# changes that would be reset by another call to self.load_sample().
with self.subTest(formula=formula): # pylint: disable=no-member
self.do_test(formula, group_key=group_key, sort_key=sort_key, sort_reverse=sort_reverse)
nformula = formula.replace('PREVIOUS', 'NEXT')
with self.subTest(formula=nformula): # pylint: disable=no-member
self.do_test(nformula, group_key=group_key, sort_key=sort_key, sort_reverse=not sort_reverse)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_none(self):
self.do_test_prevnext("PREVIOUS(rec, order_by=None)", group_key=None,
sort_key=lambda r: r.manualSort)
# Check that order_by arg is required (get TypeError without it).
with self.assertRaisesRegex(AssertionError, r'Prev column contains invalid values:.*TypeError'):
self.do_test("PREVIOUS(rec)", sort_key=lambda r: -r.id)
# These assertions are just to ensure that do_test() tests do exercise the feature being
# tested, i.e. fail when comparisons are NOT correct.
with self.assertRaisesRegex(AssertionError, r'Observed data not as expected'):
self.do_test("PREVIOUS(rec, order_by=None)", sort_key=lambda r: -r.id)
with self.assertRaisesRegex(AssertionError, r'Observed data not as expected'):
self.do_test("PREVIOUS(rec, order_by=None)", group_key=(lambda r: r.Customer),
sort_key=(lambda r: r.id))
# Make sure the test case above exercises the disambiguation by 'manualSort' (i.e. fails if
# 'manualSort' isn't used to disambiguate).
with self.assertRaisesRegex(AssertionError, r'Observed data not as expected'):
self.do_test("PREVIOUS(rec, order_by=None)", sort_key=lambda r: r.id)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_date(self):
self.do_test_prevnext("PREVIOUS(rec, order_by='Date')",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), r.manualSort))
# Make sure the test case above exercises the disambiguation by 'manualSort' (i.e. fails if it
# isn't used to disambiguate).
with self.assertRaisesRegex(AssertionError, r'Observed data not as expected'):
self.do_test("PREVIOUS(rec, order_by='Date')",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), r.id))
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_date_manualsort(self):
# Same as the previous test case (with just 'Date'), but specifies 'manualSort' explicitly.
self.do_test_prevnext("PREVIOUS(rec, order_by=('Date', 'manualSort'))",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), r.manualSort))
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_rdate(self):
self.do_test_prevnext("PREVIOUS(rec, order_by='-Date')",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), -r.manualSort), sort_reverse=True)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_rdate_id(self):
self.do_test_prevnext("PREVIOUS(rec, order_by=('-Date', 'id'))",
group_key=None, sort_key=lambda r: (SafeSortKey(r.Date), -r.id), sort_reverse=True)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_customer_rdate(self):
self.do_test_prevnext("PREVIOUS(rec, group_by=('Customer',), order_by='-Date')",
group_key=(lambda r: r.Customer), sort_key=lambda r: (SafeSortKey(r.Date), -r.id),
sort_reverse=True)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_category_date(self):
self.do_test_prevnext("PREVIOUS(rec, group_by=('Category',), order_by='Date')",
group_key=(lambda r: r.Category), sort_key=lambda r: SafeSortKey(r.Date))
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_category_date2(self):
self.do_test_prevnext("PREVIOUS(rec, group_by='Category', order_by='Date')",
group_key=(lambda r: r.Category), sort_key=lambda r: SafeSortKey(r.Date))
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_n_cat_date(self):
self.do_test_prevnext("PREVIOUS(rec, order_by=('Category', 'Date'))",
sort_key=lambda r: (SafeSortKey(r.Category), SafeSortKey(r.Date)))
@unittest.skipUnless(six.PY2, "Python 2 only")
def test_prevnext_py2(self):
# On Python2, we expect NEXT/PREVIOUS to raise a NotImplementedError. It's not hard to make
# it work, but the stricter argument syntax supported by Python3 is helpful, and we'd like
# to drop Python2 support anyway.
self.do_setup()
self.modify_column('Purchases', 'Prev', formula='PREVIOUS(rec, order_by=None)')
self.add_column('Purchases', 'Next', formula="NEXT(rec, group_by='Category', order_by='Date')")
self.add_column('Purchases', 'Rank', formula="RANK(rec, order_by='Date', order='desc')")
# Check that all values are the expected exception.
err = objtypes.RaisedException(NotImplementedError())
self.assertTableData('Purchases', cols="subset", data=[
dict(id=r, Prev=err, Next=err, Rank=err, Cumul=err) for r in range(1, 9)
])
def do_test_renames(self, formula, renamed_formula, calc_expected_pre, calc_expected_post):
self.do_setup()
self.modify_column('Purchases', 'Prev', formula=formula)
# Check the initial data.
self.assertTableData('Purchases', cols="subset", data=calc_expected_pre())
# Do the renames
self.apply_user_action(['RenameColumn', 'Purchases', 'Category', 'cat'])
self.apply_user_action(['RenameColumn', 'Purchases', 'Date', 'Fecha'])
self.apply_user_action(['RenameColumn', 'Purchases', 'Customer', 'person'])
# Check that rename worked.
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=26, colId="Prev", formula=renamed_formula)
])
# Check that data is as expected, and reacts to changes.
self.assertTableData('Purchases', cols="subset", data=calc_expected_post())
self.update_record("Purchases", 1, cat="B")
self.assertTableData('Purchases', cols="subset", data=calc_expected_post())
self.update_record("Purchases", 3, Fecha=D(2023,8,1))
self.assertTableData('Purchases', cols="subset", data=calc_expected_post())
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_renaming_prev_str(self):
self.do_test_renaming_prevnext_str("PREVIOUS")
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_renaming_next_str(self):
self.do_test_renaming_prevnext_str("NEXT")
def do_test_renaming_prevnext_str(self, func):
# Given some PREVIOUS/NEXT calls with group_by and order_by, rename columns mentioned there,
# and check columns get adjusted and data remains correct.
formula = "{}(rec, group_by='Category', order_by='Date')".format(func)
renamed_formula = "{}(rec, group_by='cat', order_by='Fecha')".format(func)
self.do_test_renames(formula, renamed_formula,
calc_expected_pre = functools.partial(self.calc_expected,
group_key=(lambda r: r.Category), sort_key=lambda r: SafeSortKey(r.Date),
sort_reverse=(func == 'NEXT')
),
calc_expected_post = functools.partial(self.calc_expected,
group_key=(lambda r: r.cat), sort_key=lambda r: SafeSortKey(r.Fecha),
sort_reverse=(func == 'NEXT')
),
)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_renaming_prev_tuple(self):
self.do_test_renaming_prevnext_tuple('PREVIOUS')
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_renaming_next_tuple(self):
self.do_test_renaming_prevnext_tuple('NEXT')
def do_test_renaming_prevnext_tuple(self, func):
formula = "{}(rec, group_by=('Customer',), order_by=('Category', '-Date'))".format(func)
renamed_formula = "{}(rec, group_by=('person',), order_by=('cat', '-Fecha'))".format(func)
# To handle "-" prefix for Date.
class Reverse(object):
def __init__(self, key):
self.key = key
def __lt__(self, other):
return other.key < self.key
self.do_test_renames(formula, renamed_formula,
calc_expected_pre = functools.partial(self.calc_expected,
group_key=(lambda r: r.Customer),
sort_key=lambda r: (SafeSortKey(r.Category), Reverse(SafeSortKey(r.Date))),
sort_reverse=(func == 'NEXT')
),
calc_expected_post = functools.partial(self.calc_expected,
group_key=(lambda r: r.person),
sort_key=lambda r: (SafeSortKey(r.cat), Reverse(SafeSortKey(r.Fecha))),
sort_reverse=(func == 'NEXT')
),
)
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_rank(self):
self.do_setup()
formula = "RANK(rec, group_by='Category', order_by='Date')"
self.add_column('Purchases', 'Rank', formula=formula)
self.assertTableData('Purchases', cols="subset", data=[
[ "id", "Date", "Category", "Rank"],
[1, D(2023,12,1), "A", 1 ],
[2, D(2023,12,4), "A", 3 ],
[3, D(2023,12,3), "A", 2 ],
[4, D(2023,12,9), "A", 6 ],
[5, D(2023,12,2), "B", 1 ],
[6, D(2023,12,6), "B", 2 ],
[7, D(2023,12,7), "A", 5 ],
[8, D(2023,12,5), "A", 4 ],
])
formula = "RANK(rec, order_by='Date', order='desc')"
self.modify_column('Purchases', 'Rank', formula=formula)
self.assertTableData('Purchases', cols="subset", data=[
[ "id", "Date", "Category", "Rank"],
[1, D(2023,12,1), "A", 8 ],
[2, D(2023,12,4), "A", 5 ],
[3, D(2023,12,3), "A", 6 ],
[4, D(2023,12,9), "A", 1 ],
[5, D(2023,12,2), "B", 7 ],
[6, D(2023,12,6), "B", 3 ],
[7, D(2023,12,7), "A", 2 ],
[8, D(2023,12,5), "A", 4 ],
])
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_rank_rename(self):
self.do_setup()
self.add_column('Purchases', 'Rank',
formula="RANK(rec, group_by=\"Category\", order_by='Date')")
self.assertTableData('Purchases', cols="subset", data=[
[ "id", "Date", "Category", "Rank"],
[1, D(2023,12,1), "A", 1 ],
[2, D(2023,12,4), "A", 3 ],
[3, D(2023,12,3), "A", 2 ],
[4, D(2023,12,9), "A", 6 ],
[5, D(2023,12,2), "B", 1 ],
[6, D(2023,12,6), "B", 2 ],
[7, D(2023,12,7), "A", 5 ],
[8, D(2023,12,5), "A", 4 ],
])
self.apply_user_action(['RenameColumn', 'Purchases', 'Category', 'cat'])
self.apply_user_action(['RenameColumn', 'Purchases', 'Date', 'when'])
renamed_formula = "RANK(rec, group_by=\"cat\", order_by='when')"
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=28, colId="Rank", formula=renamed_formula)
])
self.assertTableData('Purchases', cols="subset", data=[
[ "id", "when", "cat", "Rank"],
[1, D(2023,12,1), "A", 1 ],
[2, D(2023,12,4), "A", 3 ],
[3, D(2023,12,3), "A", 2 ],
[4, D(2023,12,9), "A", 6 ],
[5, D(2023,12,2), "B", 1 ],
[6, D(2023,12,6), "B", 2 ],
[7, D(2023,12,7), "A", 5 ],
[8, D(2023,12,5), "A", 4 ],
])
@unittest.skipUnless(six.PY3, "Python 3 only")
def test_prevnext_rename_result_attr(self):
self.do_setup()
self.add_column('Purchases', 'PrevAmount', formula="PREVIOUS(rec, order_by=None).Amount")
self.add_column('Purchases', 'NextAmount', formula="NEXT(rec, order_by=None).Amount")
self.apply_user_action(['RenameColumn', 'Purchases', 'Amount', 'Dollars'])
self.assertTableData('_grist_Tables_column', cols="subset", rows="subset", data=[
dict(id=28, colId="PrevAmount", formula="PREVIOUS(rec, order_by=None).Dollars"),
dict(id=29, colId="NextAmount", formula="NEXT(rec, order_by=None).Dollars"),
])
def is_all_ints(array):
return all(isinstance(x, int) for x in array)