(core) Add a row to summary tables grouped by list column(s) corresponding to empty lists

Summary: Adds some special handling to summary table and lookup logic: - Source rows with empty choicelists/reflists get a corresponding summary row with an empty string/reference when grouping by that column, instead of excluding them from any group - Adds a new `QueryOperation` 'empty' in the client which is used in `LinkingState`, `QuerySet`, and `recursiveMoveToCursorPos` to match empty lists in source tables against falsy values in linked summary tables. - Adds a new parameter `match_empty` to the Python `CONTAINS` function so that regular formulas can implement the same behaviour as summary tables. See https://grist.slack.com/archives/C0234CPPXPA/p1654030490932119 - Uses the new `match_empty` argument in the formula generated for the `group` column when detaching a summary table. Test Plan: Updated and extended Python and nbrowser tests of summary tables grouped by choicelists to test for new behaviour with empty lists. Reviewers: georgegevoian Reviewed By: georgegevoian Differential Revision: https://phab.getgrist.com/D3471
2026-03-02 04:09:24 +00:00 · 2022-06-07 16:57:29 +02:00
parent 3b30c052bc
commit 1c89d08ea3
10 changed files with 128 additions and 43 deletions
--- a/sandbox/grist/docmodel.py
+++ b/sandbox/grist/docmodel.py
@@ -9,10 +9,10 @@ import itertools

 import six

+import functions
 import records
 import usertypes
 import relabeling
-import lookup
 import table
 import moment
 from schema import RecalcWhen
@@ -31,7 +31,7 @@ def _record_ref_list_set(table_id, group_by, sort_by=None):
  @usertypes.formulaType(usertypes.ReferenceList(table_id))
  def func(rec, table):
    lookup_table = table.docmodel.get_table(table_id)
-    return lookup_table.lookupRecords(sort_by=sort_by, **{group_by: lookup._Contains(rec.id)})
+    return lookup_table.lookupRecords(sort_by=sort_by, **{group_by: functions.CONTAINS(rec.id)})
  return func


--- a/sandbox/grist/functions/lookup.py
+++ b/sandbox/grist/functions/lookup.py
@@ -151,7 +151,17 @@ def VLOOKUP(table, **field_value_pairs):
  """
  return table.lookupOne(**field_value_pairs)

-class _Contains(namedtuple("_Contains", "value")):
+
+class _NoMatchEmpty(object):
+  """
+  Singleton sentinel value for CONTAINS match_empty parameter to indicate no argument was passed
+  and no value should match against empty lists in lookups.
+  """
+  def __repr__(self):
+    return "no_match_empty"
+
+
+class _Contains(namedtuple("_Contains", "value match_empty")):
  """
  Use this marker with [UserTable.lookupRecords](#lookuprecords) to find records
  where a field of a list type (such as `Choice List` or `Reference List`) contains the given value.
@@ -169,6 +179,16 @@ class _Contains(namedtuple("_Contains", "value")):
  In particular the values mustn't be strings, e.g. `"Comedy-Drama"` won't match
  even though `"Drama" in "Comedy-Drama"` is `True` in Python.
  It also won't match substrings within container elements, e.g. `["Comedy-Drama"]`.
+
+  You can optionally pass a second argument `match_empty` to indicate a value that
+  should be matched against empty lists in the looked up column.
+
+  For example, given this formula:
+
+      MoviesTable.lookupRecords(genre=CONTAINS(g, match_empty=''))
+
+  If `g` is `''` (i.e. equal to `match_empty`) then the column `genre` in the returned records
+  will either be an empty list (or other container) or a list containing `g` as usual.
  """
  # While users should apply this marker to values in queries, internally
  # the marker is moved to the column ID so that the LookupMapColumn knows how to
@@ -177,9 +197,16 @@ class _Contains(namedtuple("_Contains", "value")):
  # The CONTAINS function is for users
  # Having a function as the interface makes things like docs and autocomplete
  # work more consistently
-  pass

-def CONTAINS(value):
-  return _Contains(value)
+  no_match_empty = _NoMatchEmpty()
+
+
+def CONTAINS(value, match_empty=_Contains.no_match_empty):
+  try:
+    hash(match_empty)
+  except TypeError:
+    raise TypeError("match_empty must be hashable")
+
+  return _Contains(value, match_empty)

 CONTAINS.__doc__ = _Contains.__doc__
--- a/sandbox/grist/lookup.py
+++ b/sandbox/grist/lookup.py
@@ -198,6 +198,8 @@ class ContainsLookupMapColumn(BaseLookupMapColumn):
        # group = [] essentially means there are no new keys in this call
        if isinstance(group, (six.binary_type, six.text_type)):
          group = []
+        elif not group and col_id.match_empty != _Contains.no_match_empty:
+          group = [col_id.match_empty]
      else:
        group = [group]

--- a/sandbox/grist/summary.py
+++ b/sandbox/grist/summary.py
@@ -388,9 +388,11 @@ class SummaryActions(object):
    group_args = ', '.join(
      '%s=%s' % (
        c.summarySourceCol.colId,
-        'CONTAINS($%s)' % c.colId
-        if c.summarySourceCol.type.startswith(('ChoiceList', 'RefList:')) else
-        '$%s' % c.colId,
+        (
+          'CONTAINS($%s, match_empty="")' if c.summarySourceCol.type == 'ChoiceList' else
+          'CONTAINS($%s, match_empty=0)' if c.summarySourceCol.type.startswith('Reflist') else
+          '$%s'
+        ) % c.colId,
      )
      for c in field_col_recs if c.summarySourceCol
    )
--- a/sandbox/grist/table.py
+++ b/sandbox/grist/table.py
@@ -8,6 +8,7 @@ from six.moves import xrange
 import column
 import depend
 import docmodel
+import functions
 import logger
 import lookup
 import records
@@ -336,8 +337,8 @@ class Table(object):
        lookup_values = []
        for group_col in groupby_cols:
          lookup_value = getattr(rec, group_col)
-          if isinstance(self.all_columns[group_col],
-                        (column.ChoiceListColumn, column.ReferenceListColumn)):
+          group_col_obj = self.all_columns[group_col]
+          if isinstance(group_col_obj, (column.ChoiceListColumn, column.ReferenceListColumn)):
            # Check that ChoiceList/ReferenceList cells have appropriate types.
            # Don't iterate over characters of a string.
            if isinstance(lookup_value, (six.binary_type, six.text_type)):
@@ -347,6 +348,13 @@ class Table(object):
              lookup_value = set(lookup_value)
            except TypeError:
              return []
+
+            if not lookup_value:
+              if isinstance(group_col_obj, column.ChoiceListColumn):
+                lookup_value = {""}
+              else:
+                lookup_value = {0}
+
          else:
            lookup_value = [lookup_value]
          lookup_values.append(lookup_value)
@@ -459,11 +467,11 @@ class Table(object):
    for col_id in sorted(kwargs):
      value = kwargs[col_id]
      if isinstance(value, lookup._Contains):
-        value = value.value
        # While users should use CONTAINS on lookup values,
        # the marker is moved to col_id so that the LookupMapColumn knows how to
        # update its index correctly for that column.
-        col_id = lookup._Contains(col_id)
+        col_id = value._replace(value=col_id)
+        value = value.value
      else:
        col = self.get_column(col_id)
        # Convert `value` to the correct type of rich value for that column
@@ -527,7 +535,7 @@ class Table(object):
      # _summary_source_table._summary_simple determines whether
      # the column named self._summary_helper_col_id is a single reference
      # or a reference list.
-      lookup_value = rec if self._summary_simple else lookup._Contains(rec)
+      lookup_value = rec if self._summary_simple else functions.CONTAINS(rec)
      return self._summary_source_table.lookup_records(**{
        self._summary_helper_col_id: lookup_value
      })
--- a/sandbox/grist/test_summary_choicelist.py
+++ b/sandbox/grist/test_summary_choicelist.py
@@ -142,10 +142,10 @@ class TestSummaryChoiceList(EngineTestCase):
      {k: type(v) for k, v in self.engine.tables["Source"]._special_cols.items()},
      {
        '#summary#GristSummary_6_Source': column.ReferenceListColumn,
-        "#lookup#_Contains(value='#summary#GristSummary_6_Source')":
+        "#lookup#_Contains(value='#summary#GristSummary_6_Source', match_empty=no_match_empty)":
          lookup.ContainsLookupMapColumn,
        '#summary#GristSummary_6_Source2': column.ReferenceListColumn,
-        "#lookup#_Contains(value='#summary#GristSummary_6_Source2')":
+        "#lookup#_Contains(value='#summary#GristSummary_6_Source2', match_empty=no_match_empty)":
          lookup.ContainsLookupMapColumn,

        # simple summary and lookup
@@ -153,7 +153,7 @@ class TestSummaryChoiceList(EngineTestCase):
        '#lookup##summary#GristSummary_6_Source3': lookup.SimpleLookupMapColumn,

        '#summary#GristSummary_6_Source4': column.ReferenceListColumn,
-        "#lookup#_Contains(value='#summary#GristSummary_6_Source4')":
+        "#lookup#_Contains(value='#summary#GristSummary_6_Source4', match_empty=no_match_empty)":
          lookup.ContainsLookupMapColumn,

        "#lookup#": lookup.SimpleLookupMapColumn,
@@ -203,14 +203,19 @@ class TestSummaryChoiceList(EngineTestCase):
      [5, "a", "e", [21], 1],
    ])

-    # Remove record from source
-    self.remove_record("Source", 21)
+    # Empty choices1
+    self.update_record("Source", 21, choices1=None)
+
+    self.assertTableData('Source', data=[
+      ["id", "choices1", "choices2", "other"],
+      [21, None, ["c", "d", "e"], "foo"],
+    ])

-    # All summary rows are now empty
    self.assertTableData('GristSummary_6_Source', data=[
      ["id", "choices1", "group", "count"],
      [1, "a", [], 0],
      [2, "b", [], 0],
+      [3, "", [21], 1],
    ])

    self.assertTableData('GristSummary_6_Source2', data=[
@@ -220,6 +225,32 @@ class TestSummaryChoiceList(EngineTestCase):
      [3, "b", "c", [], 0],
      [4, "b", "d", [], 0],
      [5, "a", "e", [], 0],
+      [6, "", "c", [21], 1],
+      [7, "", "d", [21], 1],
+      [8, "", "e", [21], 1],
+    ])
+
+    # Remove record from source
+    self.remove_record("Source", 21)
+
+    # All summary rows are now empty
+    self.assertTableData('GristSummary_6_Source', data=[
+      ["id", "choices1", "group", "count"],
+      [1, "a", [], 0],
+      [2, "b", [], 0],
+      [3, "", [], 0],
+    ])
+
+    self.assertTableData('GristSummary_6_Source2', data=[
+      ["id", "choices1", "choices2", "group", "count"],
+      [1, "a", "c", [], 0],
+      [2, "a", "d", [], 0],
+      [3, "b", "c", [], 0],
+      [4, "b", "d", [], 0],
+      [5, "a", "e", [], 0],
+      [6, "",  "c", [], 0],
+      [7, "",  "d", [], 0],
+      [8, "",  "e", [], 0],
    ])

    # Make rows with every combination of {a,b,ab} and {c,d,cd}
@@ -236,6 +267,8 @@ class TestSummaryChoiceList(EngineTestCase):
        [107, ["L", "a"],      ["L", "c", "d"]],
        [108, ["L", "b"],      ["L", "c", "d"]],
        [109, ["L", "a", "b"], ["L", "c", "d"]],
+        # and one row with empty lists
+        [110, ["L"],           ["L"]],
      ]
    )

@@ -250,6 +283,7 @@ class TestSummaryChoiceList(EngineTestCase):
      [107, ["a"],      ["c", "d"]],
      [108, ["b"],      ["c", "d"]],
      [109, ["a", "b"], ["c", "d"]],
+      [110, None,       None],
    ])

    # Summary tables now have an even distribution of combinations
@@ -257,6 +291,7 @@ class TestSummaryChoiceList(EngineTestCase):
      ["id", "choices1", "group", "count"],
      [1, "a", [101, 103, 104, 106, 107, 109], 6],
      [2, "b", [102, 103, 105, 106, 108, 109], 6],
+      [3, "",  [110], 1],
    ])

    summary_data = [
@@ -266,6 +301,10 @@ class TestSummaryChoiceList(EngineTestCase):
      [3, "b", "c", [102, 103, 108, 109], 4],
      [4, "b", "d", [105, 106, 108, 109], 4],
      [5, "a", "e", [], 0],
+      [6, "", "c", [], 0],
+      [7, "", "d", [], 0],
+      [8, "", "e", [], 0],
+      [9, "", "", [110], 1],
    ]

    self.assertTableData('GristSummary_6_Source2', data=summary_data)
@@ -284,8 +323,9 @@ class TestSummaryChoiceList(EngineTestCase):
          Column(30, "count", "Int", isFormula=True, summarySourceCol=0,
                 formula="len($group)"),
          Column(31, "group", "RefList:Source", isFormula=True, summarySourceCol=0,
-                 formula="Source.lookupRecords(choices1=CONTAINS($choices1), choices2=CONTAINS($choices2))"),
-
+                 formula='Source.lookupRecords('
+                         'choices1=CONTAINS($choices1, match_empty=""), '
+                         'choices2=CONTAINS($choices2, match_empty=""))'),
        ],
      )
    ])