(core) Add a row to summary tables grouped by list column(s) corresponding to empty lists

Summary:
Adds some special handling to summary table and lookup logic:

- Source rows with empty choicelists/reflists get a corresponding summary row with an empty string/reference when grouping by that column, instead of excluding them from any group
- Adds a new `QueryOperation` 'empty' in the client which is used in `LinkingState`, `QuerySet`, and `recursiveMoveToCursorPos` to match empty lists in source tables against falsy values in linked summary tables.
- Adds a new parameter `match_empty` to the Python `CONTAINS` function so that regular formulas can implement the same behaviour as summary tables. See https://grist.slack.com/archives/C0234CPPXPA/p1654030490932119
- Uses the new `match_empty` argument in the formula generated for the `group` column when detaching a summary table.

Test Plan: Updated and extended Python and nbrowser tests of summary tables grouped by choicelists to test for new behaviour with empty lists.

Reviewers: georgegevoian

Reviewed By: georgegevoian

Differential Revision: https://phab.getgrist.com/D3471
This commit is contained in:
Alex Hall
2022-06-07 16:57:29 +02:00
parent 3b30c052bc
commit 1c89d08ea3
10 changed files with 128 additions and 43 deletions

View File

@@ -9,10 +9,10 @@ import itertools
import six
import functions
import records
import usertypes
import relabeling
import lookup
import table
import moment
from schema import RecalcWhen
@@ -31,7 +31,7 @@ def _record_ref_list_set(table_id, group_by, sort_by=None):
@usertypes.formulaType(usertypes.ReferenceList(table_id))
def func(rec, table):
lookup_table = table.docmodel.get_table(table_id)
return lookup_table.lookupRecords(sort_by=sort_by, **{group_by: lookup._Contains(rec.id)})
return lookup_table.lookupRecords(sort_by=sort_by, **{group_by: functions.CONTAINS(rec.id)})
return func

View File

@@ -151,7 +151,17 @@ def VLOOKUP(table, **field_value_pairs):
"""
return table.lookupOne(**field_value_pairs)
class _Contains(namedtuple("_Contains", "value")):
class _NoMatchEmpty(object):
"""
Singleton sentinel value for CONTAINS match_empty parameter to indicate no argument was passed
and no value should match against empty lists in lookups.
"""
def __repr__(self):
return "no_match_empty"
class _Contains(namedtuple("_Contains", "value match_empty")):
"""
Use this marker with [UserTable.lookupRecords](#lookuprecords) to find records
where a field of a list type (such as `Choice List` or `Reference List`) contains the given value.
@@ -169,6 +179,16 @@ class _Contains(namedtuple("_Contains", "value")):
In particular the values mustn't be strings, e.g. `"Comedy-Drama"` won't match
even though `"Drama" in "Comedy-Drama"` is `True` in Python.
It also won't match substrings within container elements, e.g. `["Comedy-Drama"]`.
You can optionally pass a second argument `match_empty` to indicate a value that
should be matched against empty lists in the looked up column.
For example, given this formula:
MoviesTable.lookupRecords(genre=CONTAINS(g, match_empty=''))
If `g` is `''` (i.e. equal to `match_empty`) then the column `genre` in the returned records
will either be an empty list (or other container) or a list containing `g` as usual.
"""
# While users should apply this marker to values in queries, internally
# the marker is moved to the column ID so that the LookupMapColumn knows how to
@@ -177,9 +197,16 @@ class _Contains(namedtuple("_Contains", "value")):
# The CONTAINS function is for users
# Having a function as the interface makes things like docs and autocomplete
# work more consistently
pass
def CONTAINS(value):
return _Contains(value)
no_match_empty = _NoMatchEmpty()
def CONTAINS(value, match_empty=_Contains.no_match_empty):
try:
hash(match_empty)
except TypeError:
raise TypeError("match_empty must be hashable")
return _Contains(value, match_empty)
CONTAINS.__doc__ = _Contains.__doc__

View File

@@ -198,6 +198,8 @@ class ContainsLookupMapColumn(BaseLookupMapColumn):
# group = [] essentially means there are no new keys in this call
if isinstance(group, (six.binary_type, six.text_type)):
group = []
elif not group and col_id.match_empty != _Contains.no_match_empty:
group = [col_id.match_empty]
else:
group = [group]

View File

@@ -388,9 +388,11 @@ class SummaryActions(object):
group_args = ', '.join(
'%s=%s' % (
c.summarySourceCol.colId,
'CONTAINS($%s)' % c.colId
if c.summarySourceCol.type.startswith(('ChoiceList', 'RefList:')) else
'$%s' % c.colId,
(
'CONTAINS($%s, match_empty="")' if c.summarySourceCol.type == 'ChoiceList' else
'CONTAINS($%s, match_empty=0)' if c.summarySourceCol.type.startswith('Reflist') else
'$%s'
) % c.colId,
)
for c in field_col_recs if c.summarySourceCol
)

View File

@@ -8,6 +8,7 @@ from six.moves import xrange
import column
import depend
import docmodel
import functions
import logger
import lookup
import records
@@ -336,8 +337,8 @@ class Table(object):
lookup_values = []
for group_col in groupby_cols:
lookup_value = getattr(rec, group_col)
if isinstance(self.all_columns[group_col],
(column.ChoiceListColumn, column.ReferenceListColumn)):
group_col_obj = self.all_columns[group_col]
if isinstance(group_col_obj, (column.ChoiceListColumn, column.ReferenceListColumn)):
# Check that ChoiceList/ReferenceList cells have appropriate types.
# Don't iterate over characters of a string.
if isinstance(lookup_value, (six.binary_type, six.text_type)):
@@ -347,6 +348,13 @@ class Table(object):
lookup_value = set(lookup_value)
except TypeError:
return []
if not lookup_value:
if isinstance(group_col_obj, column.ChoiceListColumn):
lookup_value = {""}
else:
lookup_value = {0}
else:
lookup_value = [lookup_value]
lookup_values.append(lookup_value)
@@ -459,11 +467,11 @@ class Table(object):
for col_id in sorted(kwargs):
value = kwargs[col_id]
if isinstance(value, lookup._Contains):
value = value.value
# While users should use CONTAINS on lookup values,
# the marker is moved to col_id so that the LookupMapColumn knows how to
# update its index correctly for that column.
col_id = lookup._Contains(col_id)
col_id = value._replace(value=col_id)
value = value.value
else:
col = self.get_column(col_id)
# Convert `value` to the correct type of rich value for that column
@@ -527,7 +535,7 @@ class Table(object):
# _summary_source_table._summary_simple determines whether
# the column named self._summary_helper_col_id is a single reference
# or a reference list.
lookup_value = rec if self._summary_simple else lookup._Contains(rec)
lookup_value = rec if self._summary_simple else functions.CONTAINS(rec)
return self._summary_source_table.lookup_records(**{
self._summary_helper_col_id: lookup_value
})

View File

@@ -142,10 +142,10 @@ class TestSummaryChoiceList(EngineTestCase):
{k: type(v) for k, v in self.engine.tables["Source"]._special_cols.items()},
{
'#summary#GristSummary_6_Source': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#GristSummary_6_Source')":
"#lookup#_Contains(value='#summary#GristSummary_6_Source', match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn,
'#summary#GristSummary_6_Source2': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#GristSummary_6_Source2')":
"#lookup#_Contains(value='#summary#GristSummary_6_Source2', match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn,
# simple summary and lookup
@@ -153,7 +153,7 @@ class TestSummaryChoiceList(EngineTestCase):
'#lookup##summary#GristSummary_6_Source3': lookup.SimpleLookupMapColumn,
'#summary#GristSummary_6_Source4': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#GristSummary_6_Source4')":
"#lookup#_Contains(value='#summary#GristSummary_6_Source4', match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn,
"#lookup#": lookup.SimpleLookupMapColumn,
@@ -203,14 +203,19 @@ class TestSummaryChoiceList(EngineTestCase):
[5, "a", "e", [21], 1],
])
# Remove record from source
self.remove_record("Source", 21)
# Empty choices1
self.update_record("Source", 21, choices1=None)
self.assertTableData('Source', data=[
["id", "choices1", "choices2", "other"],
[21, None, ["c", "d", "e"], "foo"],
])
# All summary rows are now empty
self.assertTableData('GristSummary_6_Source', data=[
["id", "choices1", "group", "count"],
[1, "a", [], 0],
[2, "b", [], 0],
[3, "", [21], 1],
])
self.assertTableData('GristSummary_6_Source2', data=[
@@ -220,6 +225,32 @@ class TestSummaryChoiceList(EngineTestCase):
[3, "b", "c", [], 0],
[4, "b", "d", [], 0],
[5, "a", "e", [], 0],
[6, "", "c", [21], 1],
[7, "", "d", [21], 1],
[8, "", "e", [21], 1],
])
# Remove record from source
self.remove_record("Source", 21)
# All summary rows are now empty
self.assertTableData('GristSummary_6_Source', data=[
["id", "choices1", "group", "count"],
[1, "a", [], 0],
[2, "b", [], 0],
[3, "", [], 0],
])
self.assertTableData('GristSummary_6_Source2', data=[
["id", "choices1", "choices2", "group", "count"],
[1, "a", "c", [], 0],
[2, "a", "d", [], 0],
[3, "b", "c", [], 0],
[4, "b", "d", [], 0],
[5, "a", "e", [], 0],
[6, "", "c", [], 0],
[7, "", "d", [], 0],
[8, "", "e", [], 0],
])
# Make rows with every combination of {a,b,ab} and {c,d,cd}
@@ -236,6 +267,8 @@ class TestSummaryChoiceList(EngineTestCase):
[107, ["L", "a"], ["L", "c", "d"]],
[108, ["L", "b"], ["L", "c", "d"]],
[109, ["L", "a", "b"], ["L", "c", "d"]],
# and one row with empty lists
[110, ["L"], ["L"]],
]
)
@@ -250,6 +283,7 @@ class TestSummaryChoiceList(EngineTestCase):
[107, ["a"], ["c", "d"]],
[108, ["b"], ["c", "d"]],
[109, ["a", "b"], ["c", "d"]],
[110, None, None],
])
# Summary tables now have an even distribution of combinations
@@ -257,6 +291,7 @@ class TestSummaryChoiceList(EngineTestCase):
["id", "choices1", "group", "count"],
[1, "a", [101, 103, 104, 106, 107, 109], 6],
[2, "b", [102, 103, 105, 106, 108, 109], 6],
[3, "", [110], 1],
])
summary_data = [
@@ -266,6 +301,10 @@ class TestSummaryChoiceList(EngineTestCase):
[3, "b", "c", [102, 103, 108, 109], 4],
[4, "b", "d", [105, 106, 108, 109], 4],
[5, "a", "e", [], 0],
[6, "", "c", [], 0],
[7, "", "d", [], 0],
[8, "", "e", [], 0],
[9, "", "", [110], 1],
]
self.assertTableData('GristSummary_6_Source2', data=summary_data)
@@ -284,8 +323,9 @@ class TestSummaryChoiceList(EngineTestCase):
Column(30, "count", "Int", isFormula=True, summarySourceCol=0,
formula="len($group)"),
Column(31, "group", "RefList:Source", isFormula=True, summarySourceCol=0,
formula="Source.lookupRecords(choices1=CONTAINS($choices1), choices2=CONTAINS($choices2))"),
formula='Source.lookupRecords('
'choices1=CONTAINS($choices1, match_empty=""), '
'choices2=CONTAINS($choices2, match_empty=""))'),
],
)
])