mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
f0d0a07295
Summary: - `lookupRecords()` now allows efficient search in sorted results, with the syntax `lookupRecords(..., order_by="-Date").find.le($Date)`. This will find the record with the nearest date that's <= `$Date`. - The `find.*` methods are `le`, `lt`, `ge`, `gt`, and `eq`. All have O(log N) performance. - `PREVIOUS(rec, group_by=..., order_by=...)` finds the previous record to rec, according to `group_by` / `order_by`, in amortized O(log N) time. For example, `PREVIOUS(rec, group_by="Account", order_by="Date")`. - `PREVIOUS(rec, order_by=None)` finds the previous record in the full table, sorted by the `manualSort` column, to match the order visible in the unsorted table. - `NEXT(...)` is just like `PREVIOUS(...)` but finds the next record. - `RANK(rec, group_by=..., order_by=..., order="asc")` returns the rank of the record within the group, starting with 1. Order can be `"asc"` (default) or `"desc"`. - The `order_by` argument in `lookupRecords`, and the new functions now supports tuples, as well as the "-" prefix to reverse order, e.g. `("Category", "-Date")`. - New functions are only available in Python3, for a minor reason (to support keyword-only arguments for `group_by` and `order_by`) and also as a nudge to Python2 users to update. - Includes fixes for several situations related to lookups that used to cause quadratic complexity. Test Plan: - New performance check that sorted lookups don't add quadratic complexity. - Tests added for lookup find.* methods, and for PREVIOUS/NEXT/RANK. - Tests added that renaming columns updates `order_by` and `group_by` arguments, and attributes on results (e.g. `PREVIOUS(...).ColId`) appropriately. - Python3 tests can now produce verbose output when VERBOSE=1 and -v are given. Reviewers: jarek, georgegevoian Reviewed By: jarek, georgegevoian Subscribers: paulfitz, jarek Differential Revision: https://phab.getgrist.com/D4265
368 lines
13 KiB
Python
368 lines
13 KiB
Python
"""
|
|
Implements the base classes for Record and RecordSet objects used to represent records in Grist
|
|
tables. Individual tables use derived versions of these, which add per-column properties.
|
|
"""
|
|
|
|
from bisect import bisect_left, bisect_right
|
|
import functools
|
|
import sys
|
|
|
|
import six
|
|
|
|
@functools.total_ordering
|
|
class Record(object):
|
|
"""
|
|
Name: Record, rec
|
|
|
|
A Record represents a record of data. It is the primary means of accessing values in formulas. A
|
|
Record for a particular table has a property for each data and formula column in the table.
|
|
|
|
In a formula, `$field` is translated to `rec.field`, where `rec` is the Record for which the
|
|
formula is being evaluated.
|
|
|
|
For example:
|
|
```
|
|
def Full_Name(rec, table):
|
|
return rec.First_Name + ' ' + rec.LastName
|
|
|
|
def Name_Length(rec, table):
|
|
return len(rec.Full_Name)
|
|
```
|
|
"""
|
|
|
|
# Some documentation for method-like parts of Record, which aren't actually methods.
|
|
_DOC_EXTRA = (
|
|
"""
|
|
Name: $Field, rec.Field
|
|
Usage: __$__*Field* or __rec__*.Field*
|
|
|
|
Access the field named "Field" of the current record. E.g. `$First_Name` or `rec.First_Name`.
|
|
""",
|
|
"""
|
|
Name: $group, rec.group
|
|
Usage: __$group__
|
|
|
|
In a [summary table](summary-tables.md), `$group` is a special field
|
|
containing the list of Records that are summarized by the current summary line. E.g. the
|
|
formula `len($group)` counts the number of those records being summarized in each row.
|
|
|
|
See [RecordSet](#recordset) for useful properties offered by the returned object.
|
|
|
|
Examples:
|
|
```
|
|
sum($group.Amount) # Sum of the Amount field in the matching records
|
|
sum(r.Amount for r in $group) # Same as sum($group.Amount)
|
|
sum(r.Amount for r in $group if r > 0) # Sum of only the positive amounts
|
|
sum(r.Shares * r.Price for r in $group) # Sum of shares * price products
|
|
```
|
|
"""
|
|
)
|
|
|
|
# Slots are an optimization to avoid the need for a per-object __dict__.
|
|
__slots__ = ('_row_id', '_source_relation')
|
|
|
|
# Per-table derived classes override this and set it to the appropriate Table object.
|
|
_table = None
|
|
|
|
# Record is always a thin class, containing essentially a reference to a row in the table. The
|
|
# properties to access individual fields of a row are provided in per-table derived classes.
|
|
def __init__(self, row_id, relation=None):
|
|
"""
|
|
Creates a Record object.
|
|
table - Table object, in which this record lives.
|
|
row_id - The ID of the record within table.
|
|
relation - Relation object for how this record was obtained; used in dependency tracking.
|
|
|
|
In general you shouldn't call this constructor directly, but rather:
|
|
|
|
table.Record(row_id, relation)
|
|
|
|
which provides the table argument automatically.
|
|
"""
|
|
self._row_id = row_id
|
|
self._source_relation = relation or self._table._identity_relation
|
|
|
|
# Existing fields are added as @property methods in table.py. When no field is found, raise a
|
|
# more informative AttributeError.
|
|
def __getattr__(self, name):
|
|
return self._table._attribute_error(name, self._source_relation)
|
|
|
|
def __hash__(self):
|
|
return hash((self._table, self._row_id))
|
|
|
|
def __eq__(self, other):
|
|
return (isinstance(other, Record) and
|
|
(self._table, self._row_id) == (other._table, other._row_id))
|
|
|
|
def __ne__(self, other):
|
|
return not self.__eq__(other)
|
|
|
|
def __lt__(self, other):
|
|
return (self._table.table_id, self._row_id) < (other._table.table_id, other._row_id)
|
|
|
|
def __int__(self):
|
|
return self._row_id
|
|
|
|
def __nonzero__(self):
|
|
return bool(self._row_id)
|
|
|
|
__bool__ = __nonzero__
|
|
|
|
def __repr__(self):
|
|
return "%s[%s]" % (self._table.table_id, self._row_id)
|
|
|
|
def _clone_with_relation(self, src_relation):
|
|
return self._table.Record(self._row_id,
|
|
relation=src_relation.compose(self._source_relation))
|
|
|
|
|
|
class RecordSet(object):
|
|
"""
|
|
A RecordSet represents a collection of records, as returned by `Table.lookupRecords()` or
|
|
`$group` property in summary views.
|
|
|
|
A RecordSet allows iterating through the records:
|
|
```
|
|
sum(r.Amount for r in Students.lookupRecords(First_Name="John", Last_Name="Doe"))
|
|
min(r.DueDate for r in Tasks.lookupRecords(Owner="Bob"))
|
|
```
|
|
|
|
RecordSets also provide a convenient way to access the list of values for a particular field for
|
|
all the records, as `record_set.Field`. For example, the examples above are equivalent to:
|
|
```
|
|
sum(Students.lookupRecords(First_Name="John", Last_Name="Doe").Amount)
|
|
min(Tasks.lookupRecords(Owner="Bob").DueDate)
|
|
```
|
|
|
|
You can get the number of records in a RecordSet using `len`, e.g. `len($group)`.
|
|
"""
|
|
|
|
# Slots are an optimization to avoid the need for a per-object __dict__.
|
|
__slots__ = ('_row_ids', '_source_relation', '_group_by', '_sort_by', '_sort_key')
|
|
|
|
# Per-table derived classes override this and set it to the appropriate Table object.
|
|
_table = None
|
|
|
|
# Methods should be named with a leading underscore to avoid interfering with access to
|
|
# user-defined fields.
|
|
def __init__(self, row_ids, relation=None, group_by=None, sort_by=None, sort_key=None):
|
|
"""
|
|
group_by may be a dictionary mapping column names to values that are all the same for the given
|
|
RecordSet. sort_by may be the column name used for sorting this record set. Both are set by
|
|
lookupRecords, and used when using RecordSet to insert new records.
|
|
"""
|
|
self._row_ids = row_ids
|
|
self._source_relation = relation or self._table._identity_relation
|
|
# If row_ids is itself a RecordList, default to its _group_by, _sort_by, _sort_key properties.
|
|
self._group_by = group_by or getattr(row_ids, '_group_by', None)
|
|
self._sort_by = sort_by or getattr(row_ids, '_sort_by', None)
|
|
self._sort_key = sort_key or getattr(row_ids, '_sort_key', None)
|
|
|
|
def __len__(self):
|
|
return len(self._row_ids)
|
|
|
|
def __nonzero__(self):
|
|
return bool(self._row_ids)
|
|
|
|
__bool__ = __nonzero__
|
|
|
|
def __eq__(self, other):
|
|
return (isinstance(other, RecordSet) and
|
|
(self._table, self._row_ids) == (other._table, other._row_ids))
|
|
|
|
def __ne__(self, other):
|
|
return not self.__eq__(other)
|
|
|
|
def __iter__(self):
|
|
for row_id in self._row_ids:
|
|
yield self._table.Record(row_id, self._source_relation)
|
|
|
|
def __contains__(self, item):
|
|
"""item may be a Record or its row_id."""
|
|
if isinstance(item, int):
|
|
return item in self._row_ids
|
|
if isinstance(item, Record) and item._table == self._table:
|
|
return int(item) in self._row_ids
|
|
return False
|
|
|
|
def get_one(self):
|
|
# Pick the first record in the sorted order, or empty/sample record for empty RecordSet
|
|
row_id = self._row_ids[0] if self._row_ids else 0
|
|
return self._table.Record(row_id, self._source_relation)
|
|
|
|
def __getitem__(self, index):
|
|
# Allows subscripting a RecordSet as r[0] or r[-1].
|
|
row_id = self._row_ids[index]
|
|
return self._table.Record(row_id, self._source_relation)
|
|
|
|
def __getattr__(self, name):
|
|
return self._table._attribute_error(name, self._source_relation)
|
|
|
|
def __repr__(self):
|
|
return "%s[%s]" % (self._table.table_id, self._row_ids)
|
|
|
|
def _at(self, index):
|
|
"""
|
|
Returns element of RecordSet at the given index when the index is valid and non-negative.
|
|
Otherwise returns the empty/sample record.
|
|
"""
|
|
row_id = self._row_ids[index] if (0 <= index < len(self._row_ids)) else 0
|
|
return self._table.Record(row_id, self._source_relation)
|
|
|
|
def _clone_with_relation(self, src_relation):
|
|
return self._table.RecordSet(self._row_ids,
|
|
relation=src_relation.compose(self._source_relation),
|
|
group_by=self._group_by,
|
|
sort_by=self._sort_by,
|
|
sort_key=self._sort_key)
|
|
|
|
def _get_encodable_row_ids(self):
|
|
"""
|
|
Returns stored rowIds as a simple list or tuple type, even if actually stored as RecordList.
|
|
"""
|
|
# pylint: disable=unidiomatic-typecheck
|
|
if type(self._row_ids) in (list, tuple):
|
|
return self._row_ids
|
|
else:
|
|
return list(self._row_ids)
|
|
|
|
def _get_sort_key(self):
|
|
if not self._sort_key:
|
|
if self._sort_by:
|
|
raise ValueError("Sorted by %s but no sort_key" % (self._sort_by,))
|
|
raise ValueError("Can only use 'find' methods in a sorted reference list")
|
|
return self._sort_key
|
|
|
|
def _to_local_row_id(self, item):
|
|
if isinstance(item, int):
|
|
return item
|
|
if isinstance(item, Record) and item._table == self._table:
|
|
return int(item)
|
|
raise ValueError("unexpected search item") # Need better error
|
|
|
|
@property
|
|
def find(self):
|
|
"""
|
|
A set of methods for finding values in sorted set of records. For example:
|
|
```
|
|
Transactions.lookupRecords(..., sort_by="Date").find.lt($Date)
|
|
Table.lookupRecords(..., sort_by=("Foo", "Bar")).find.le(foo, bar)
|
|
```
|
|
|
|
If the `find` method is shadowed by a same-named user column, you may use `_find` instead.
|
|
|
|
The methods available are:
|
|
|
|
- `lt`: (less than) find nearest record with sort values < the given values
|
|
- `le`: (less than or equal to) find nearest record with sort values <= the given values
|
|
- `gt`: (greater than) find nearest record with sort values > the given values
|
|
- `ge`: (greater than or equal to) find nearest record with sort values >= the given values
|
|
- `eq`: (equal to) find nearest record with sort values == the given values
|
|
|
|
Example from https://templates.getgrist.com/5pHLanQNThxk/Payroll. Each person has a history of
|
|
pay rates, in the Rates table. To find a rate applicable on a certain date, here is how you
|
|
can do it old-style:
|
|
```
|
|
# Get all the rates for the Person and Role in this row.
|
|
rates = Rates.lookupRecords(Person=$Person, Role=$Role)
|
|
|
|
# Pick out only those rates whose Rate_Start is on or before this row's Date.
|
|
past_rates = [r for r in rates if r.Rate_Start <= $Date]
|
|
|
|
# Select the latest of past_rates, i.e. maximum by Rate_Start.
|
|
rate = max(past_rates, key=lambda r: r.Rate_Start)
|
|
|
|
# Return the Hourly_Rate from the relevant Rates record.
|
|
return rate.Hourly_Rate
|
|
```
|
|
|
|
With the new methods, it is much simpler:
|
|
```
|
|
rate = Rates.lookupRecords(Person=$Person, Role=$Role, sort_by="Rate_Start").find.le($Date)
|
|
return rate.Hourly_Rate
|
|
```
|
|
|
|
Note that this is also much faster when there are many rates for the same Person and Role.
|
|
"""
|
|
return FindOps(self)
|
|
|
|
@property
|
|
def _find(self):
|
|
return FindOps(self)
|
|
|
|
def _find_eq(self, *values):
|
|
found = self._bisect_find(bisect_left, 0, _min_row_id, values)
|
|
if found:
|
|
# 'found' means that we found a row that's greater-than-or-equal-to the values we are
|
|
# looking for. To check if the row is actually "equal", it remains to check if it is stictly
|
|
# greater than the passed-in values.
|
|
key = self._get_sort_key()
|
|
if key(found._row_id, values) < key(found._row_id):
|
|
return self._table.Record(0, self._source_relation)
|
|
return found
|
|
|
|
def _bisect_index(self, bisect_func, search_row_id, search_values=None):
|
|
key = self._get_sort_key()
|
|
# Note that 'key' argument is only available from Python 3.10.
|
|
return bisect_func(self._row_ids, key(search_row_id, search_values), key=key)
|
|
|
|
def _bisect_find(self, bisect_func, shift, search_row_id, search_values=None):
|
|
i = self._bisect_index(bisect_func, search_row_id, search_values=search_values)
|
|
return self._at(i + shift)
|
|
|
|
_min_row_id = -sys.float_info.max
|
|
_max_row_id = sys.float_info.max
|
|
|
|
if six.PY3:
|
|
class FindOps(object):
|
|
def __init__(self, record_set):
|
|
self._rset = record_set
|
|
|
|
def previous(self, row):
|
|
row_id = self._rset._to_local_row_id(row)
|
|
return self._rset._bisect_find(bisect_left, -1, row_id)
|
|
|
|
def next(self, row):
|
|
row_id = self._rset._to_local_row_id(row)
|
|
return self._rset._bisect_find(bisect_right, 0, row_id)
|
|
|
|
def rank(self, row, order="asc"):
|
|
row_id = self._rset._to_local_row_id(row)
|
|
index = self._rset._bisect_index(bisect_left, row_id)
|
|
if order == "asc":
|
|
return index + 1
|
|
elif order == "desc":
|
|
return len(self._rset) - index
|
|
else:
|
|
raise ValueError("The 'order' parameter must be \"asc\" (default) or \"desc\"")
|
|
|
|
def lt(self, *values):
|
|
return self._rset._bisect_find(bisect_left, -1, _min_row_id, values)
|
|
|
|
def le(self, *values):
|
|
return self._rset._bisect_find(bisect_right, -1, _max_row_id, values)
|
|
|
|
def gt(self, *values):
|
|
return self._rset._bisect_find(bisect_right, 0, _max_row_id, values)
|
|
|
|
def ge(self, *values):
|
|
return self._rset._bisect_find(bisect_left, 0, _min_row_id, values)
|
|
|
|
def eq(self, *values):
|
|
return self._rset._find_eq(*values)
|
|
else:
|
|
class FindOps(object):
|
|
def __init__(self, record_set):
|
|
raise NotImplementedError("Update engine to Python3 to use lookupRecords().find")
|
|
|
|
|
|
def adjust_record(relation, value):
|
|
"""
|
|
Helper to adjust a Record's source relation to be the composition with the given relation. This
|
|
is used to wrap values like `foo.bar`: if `bar` is a Record, then its source relation should be
|
|
the composition of the source relation of `foo` and the relation associated with `bar`.
|
|
"""
|
|
if isinstance(value, (Record, RecordSet)):
|
|
return value._clone_with_relation(relation)
|
|
return value
|