(core) Add a row to summary tables grouped by list column(s) corresponding to empty lists

Summary:
Adds some special handling to summary table and lookup logic:

- Source rows with empty choicelists/reflists get a corresponding summary row with an empty string/reference when grouping by that column, instead of excluding them from any group
- Adds a new `QueryOperation` 'empty' in the client which is used in `LinkingState`, `QuerySet`, and `recursiveMoveToCursorPos` to match empty lists in source tables against falsy values in linked summary tables.
- Adds a new parameter `match_empty` to the Python `CONTAINS` function so that regular formulas can implement the same behaviour as summary tables. See https://grist.slack.com/archives/C0234CPPXPA/p1654030490932119
- Uses the new `match_empty` argument in the formula generated for the `group` column when detaching a summary table.

Test Plan: Updated and extended Python and nbrowser tests of summary tables grouped by choicelists to test for new behaviour with empty lists.

Reviewers: georgegevoian

Reviewed By: georgegevoian

Differential Revision: https://phab.getgrist.com/D3471
This commit is contained in:
Alex Hall 2022-06-07 16:57:29 +02:00
parent 3b30c052bc
commit 1c89d08ea3
10 changed files with 128 additions and 43 deletions

View File

@ -55,7 +55,7 @@ import {CommDocUsage, CommDocUserAction} from 'app/common/CommTypes';
import {DisposableWithEvents} from 'app/common/DisposableWithEvents';
import {isSchemaAction, UserAction} from 'app/common/DocActions';
import {OpenLocalDocResult} from 'app/common/DocListAPI';
import {isList, isRefListType, RecalcWhen} from 'app/common/gristTypes';
import {isList, isListType, isRefListType, RecalcWhen} from 'app/common/gristTypes';
import {HashLink, IDocPage, isViewDocPage, SpecialDocPage, ViewDocPage} from 'app/common/gristUrls';
import {undef, waitObs} from 'app/common/gutil';
import {LocalPlugin} from "app/common/plugin";
@ -856,9 +856,12 @@ export class GristDoc extends DisposableWithEvents {
// must be a summary -- otherwise dealt with earlier.
const destTable = await this._getTableData(section);
for (const srcCol of srcSection.table.peek().groupByColumns.peek()) {
const filterColId = srcCol.summarySource.peek().colId.peek();
const filterCol = srcCol.summarySource.peek();
const filterColId = filterCol.colId.peek();
controller = destTable.getValue(cursorPos.rowId, filterColId);
query.operations[filterColId] = 'in';
// If the source groupby column is a ChoiceList or RefList, then null or '' in the summary table
// should match against an empty list in the source table.
query.operations[filterColId] = isListType(filterCol.type.peek()) && !controller ? 'empty' : 'in';
query.filters[filterColId] = isList(controller) ? controller.slice(1) : [controller];
}
}

View File

@ -7,14 +7,14 @@ import {ViewSectionRec} from "app/client/models/entities/ViewSectionRec";
import {RowId} from "app/client/models/rowset";
import {LinkConfig} from "app/client/ui/selectBy";
import {ClientQuery, QueryOperation} from "app/common/ActiveDocAPI";
import {isList, isRefListType} from "app/common/gristTypes";
import {isList, isListType, isRefListType} from "app/common/gristTypes";
import * as gutil from "app/common/gutil";
import {encodeObject} from 'app/plugin/objtypes';
import {Disposable, toKo} from "grainjs";
import * as ko from "knockout";
import identity = require('lodash/identity');
import mapValues = require('lodash/mapValues');
import pickBy = require('lodash/pickBy');
import identity = require('lodash/identity');
/**
@ -124,18 +124,14 @@ export class LinkingState extends Disposable {
const srcValue = srcTableData.getValue(srcRowId as number, colId);
result.filters[colId] = [srcValue];
result.operations[colId] = 'in';
if (isDirectSummary) {
const tgtColType = col.type();
if (tgtColType === 'ChoiceList' || tgtColType.startsWith('RefList:')) {
result.operations[colId] = 'intersects';
}
if (isDirectSummary && isListType(col.type())) {
// If the source groupby column is a ChoiceList or RefList, then null or '' in the summary table
// should match against an empty list in the source table.
result.operations[colId] = srcValue ? 'intersects' : 'empty';
}
}
_filterColValues(result);
}
} else if (isSummaryOf(tgtSection.table(), srcSection.table())) {
// TODO: We should move the cursor, but don't currently it for summaries. For that, we need a
// column or map representing the inverse of summary table's "group" column.
} else if (srcSection.parentKey() === 'custom') {
this.filterColValues = this._srcCustomFilter('id', 'in');
} else {

View File

@ -33,16 +33,16 @@ import {TableData} from 'app/client/models/TableData';
import {ActiveDocAPI, ClientQuery, QueryOperation} from 'app/common/ActiveDocAPI';
import {CellValue, TableDataAction} from 'app/common/DocActions';
import {DocData} from 'app/common/DocData';
import {isList} from "app/common/gristTypes";
import {nativeCompare} from 'app/common/gutil';
import {IRefCountSub, RefCountMap} from 'app/common/RefCountMap';
import {TableData as BaseTableData} from 'app/common/TableData';
import {RowFilterFunc} from 'app/common/RowFilterFunc';
import {TableData as BaseTableData} from 'app/common/TableData';
import {tbind} from 'app/common/tbind';
import {decodeObject} from "app/plugin/objtypes";
import {Disposable, Holder, IDisposableOwnerT} from 'grainjs';
import * as ko from 'knockout';
import debounce = require('lodash/debounce');
import {isList} from "app/common/gristTypes";
import {decodeObject} from "app/plugin/objtypes";
// Limit on the how many rows to request for OnDemand tables.
const ON_DEMAND_ROW_LIMIT = 10000;
@ -310,17 +310,21 @@ export function getFilterFunc(docData: DocData, query: ClientQuery): RowFilterFu
(colId) => {
const getter = tableData.getRowPropFunc(colId)!;
const values = new Set(query.filters[colId]);
switch (query.operations![colId]) {
switch (query.operations[colId]) {
case "intersects":
return (rowId: RowId) => {
const value = getter(rowId) as CellValue;
return isList(value) &&
(decodeObject(value) as unknown[]).some(v => values.has(v));
};
case "empty":
return (rowId: RowId) => {
const value = getter(rowId);
// `isList(value) && value.length === 1` means `value == ['L']` i.e. an empty list
return !value || isList(value) && value.length === 1;
};
case "in":
return (rowId: RowId) => values.has(getter(rowId));
default:
throw new Error("Unknown operation");
}
});
return (rowId: RowId) => colFuncs.every(f => f(rowId));
@ -342,7 +346,7 @@ function convertQueryToRefs(docModel: DocModel, query: ClientQuery): QueryRefs {
const values = query.filters[colId];
// Keep filter values sorted by value, for consistency.
values.sort(nativeCompare);
return [colRefsByColId[colId], query.operations![colId], values] as FilterTuple;
return [colRefsByColId[colId], query.operations[colId], values] as FilterTuple;
});
// Keep filters sorted by colRef, for consistency.
filterTuples.sort((a, b) =>

View File

@ -121,7 +121,10 @@ export interface QueryFilters {
[colId: string]: any[];
}
export type QueryOperation = "in" | "intersects";
// - in: value should be contained in filters array
// - intersects: value should be a list with some overlap with filters array
// - empty: value should be falsy (e.g. null) or an empty list, filters is ignored
export type QueryOperation = "in" | "intersects" | "empty";
/**
* Response from useQuerySet(). A query returns data AND creates a subscription to receive

View File

@ -9,10 +9,10 @@ import itertools
import six
import functions
import records
import usertypes
import relabeling
import lookup
import table
import moment
from schema import RecalcWhen
@ -31,7 +31,7 @@ def _record_ref_list_set(table_id, group_by, sort_by=None):
@usertypes.formulaType(usertypes.ReferenceList(table_id))
def func(rec, table):
lookup_table = table.docmodel.get_table(table_id)
return lookup_table.lookupRecords(sort_by=sort_by, **{group_by: lookup._Contains(rec.id)})
return lookup_table.lookupRecords(sort_by=sort_by, **{group_by: functions.CONTAINS(rec.id)})
return func

View File

@ -151,7 +151,17 @@ def VLOOKUP(table, **field_value_pairs):
"""
return table.lookupOne(**field_value_pairs)
class _Contains(namedtuple("_Contains", "value")):
class _NoMatchEmpty(object):
"""
Singleton sentinel value for CONTAINS match_empty parameter to indicate no argument was passed
and no value should match against empty lists in lookups.
"""
def __repr__(self):
return "no_match_empty"
class _Contains(namedtuple("_Contains", "value match_empty")):
"""
Use this marker with [UserTable.lookupRecords](#lookuprecords) to find records
where a field of a list type (such as `Choice List` or `Reference List`) contains the given value.
@ -169,6 +179,16 @@ class _Contains(namedtuple("_Contains", "value")):
In particular the values mustn't be strings, e.g. `"Comedy-Drama"` won't match
even though `"Drama" in "Comedy-Drama"` is `True` in Python.
It also won't match substrings within container elements, e.g. `["Comedy-Drama"]`.
You can optionally pass a second argument `match_empty` to indicate a value that
should be matched against empty lists in the looked up column.
For example, given this formula:
MoviesTable.lookupRecords(genre=CONTAINS(g, match_empty=''))
If `g` is `''` (i.e. equal to `match_empty`) then the column `genre` in the returned records
will either be an empty list (or other container) or a list containing `g` as usual.
"""
# While users should apply this marker to values in queries, internally
# the marker is moved to the column ID so that the LookupMapColumn knows how to
@ -177,9 +197,16 @@ class _Contains(namedtuple("_Contains", "value")):
# The CONTAINS function is for users
# Having a function as the interface makes things like docs and autocomplete
# work more consistently
pass
def CONTAINS(value):
return _Contains(value)
no_match_empty = _NoMatchEmpty()
def CONTAINS(value, match_empty=_Contains.no_match_empty):
try:
hash(match_empty)
except TypeError:
raise TypeError("match_empty must be hashable")
return _Contains(value, match_empty)
CONTAINS.__doc__ = _Contains.__doc__

View File

@ -198,6 +198,8 @@ class ContainsLookupMapColumn(BaseLookupMapColumn):
# group = [] essentially means there are no new keys in this call
if isinstance(group, (six.binary_type, six.text_type)):
group = []
elif not group and col_id.match_empty != _Contains.no_match_empty:
group = [col_id.match_empty]
else:
group = [group]

View File

@ -388,9 +388,11 @@ class SummaryActions(object):
group_args = ', '.join(
'%s=%s' % (
c.summarySourceCol.colId,
'CONTAINS($%s)' % c.colId
if c.summarySourceCol.type.startswith(('ChoiceList', 'RefList:')) else
'$%s' % c.colId,
(
'CONTAINS($%s, match_empty="")' if c.summarySourceCol.type == 'ChoiceList' else
'CONTAINS($%s, match_empty=0)' if c.summarySourceCol.type.startswith('Reflist') else
'$%s'
) % c.colId,
)
for c in field_col_recs if c.summarySourceCol
)

View File

@ -8,6 +8,7 @@ from six.moves import xrange
import column
import depend
import docmodel
import functions
import logger
import lookup
import records
@ -336,8 +337,8 @@ class Table(object):
lookup_values = []
for group_col in groupby_cols:
lookup_value = getattr(rec, group_col)
if isinstance(self.all_columns[group_col],
(column.ChoiceListColumn, column.ReferenceListColumn)):
group_col_obj = self.all_columns[group_col]
if isinstance(group_col_obj, (column.ChoiceListColumn, column.ReferenceListColumn)):
# Check that ChoiceList/ReferenceList cells have appropriate types.
# Don't iterate over characters of a string.
if isinstance(lookup_value, (six.binary_type, six.text_type)):
@ -347,6 +348,13 @@ class Table(object):
lookup_value = set(lookup_value)
except TypeError:
return []
if not lookup_value:
if isinstance(group_col_obj, column.ChoiceListColumn):
lookup_value = {""}
else:
lookup_value = {0}
else:
lookup_value = [lookup_value]
lookup_values.append(lookup_value)
@ -459,11 +467,11 @@ class Table(object):
for col_id in sorted(kwargs):
value = kwargs[col_id]
if isinstance(value, lookup._Contains):
value = value.value
# While users should use CONTAINS on lookup values,
# the marker is moved to col_id so that the LookupMapColumn knows how to
# update its index correctly for that column.
col_id = lookup._Contains(col_id)
col_id = value._replace(value=col_id)
value = value.value
else:
col = self.get_column(col_id)
# Convert `value` to the correct type of rich value for that column
@ -527,7 +535,7 @@ class Table(object):
# _summary_source_table._summary_simple determines whether
# the column named self._summary_helper_col_id is a single reference
# or a reference list.
lookup_value = rec if self._summary_simple else lookup._Contains(rec)
lookup_value = rec if self._summary_simple else functions.CONTAINS(rec)
return self._summary_source_table.lookup_records(**{
self._summary_helper_col_id: lookup_value
})

View File

@ -142,10 +142,10 @@ class TestSummaryChoiceList(EngineTestCase):
{k: type(v) for k, v in self.engine.tables["Source"]._special_cols.items()},
{
'#summary#GristSummary_6_Source': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#GristSummary_6_Source')":
"#lookup#_Contains(value='#summary#GristSummary_6_Source', match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn,
'#summary#GristSummary_6_Source2': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#GristSummary_6_Source2')":
"#lookup#_Contains(value='#summary#GristSummary_6_Source2', match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn,
# simple summary and lookup
@ -153,7 +153,7 @@ class TestSummaryChoiceList(EngineTestCase):
'#lookup##summary#GristSummary_6_Source3': lookup.SimpleLookupMapColumn,
'#summary#GristSummary_6_Source4': column.ReferenceListColumn,
"#lookup#_Contains(value='#summary#GristSummary_6_Source4')":
"#lookup#_Contains(value='#summary#GristSummary_6_Source4', match_empty=no_match_empty)":
lookup.ContainsLookupMapColumn,
"#lookup#": lookup.SimpleLookupMapColumn,
@ -203,14 +203,19 @@ class TestSummaryChoiceList(EngineTestCase):
[5, "a", "e", [21], 1],
])
# Remove record from source
self.remove_record("Source", 21)
# Empty choices1
self.update_record("Source", 21, choices1=None)
self.assertTableData('Source', data=[
["id", "choices1", "choices2", "other"],
[21, None, ["c", "d", "e"], "foo"],
])
# All summary rows are now empty
self.assertTableData('GristSummary_6_Source', data=[
["id", "choices1", "group", "count"],
[1, "a", [], 0],
[2, "b", [], 0],
[3, "", [21], 1],
])
self.assertTableData('GristSummary_6_Source2', data=[
@ -220,6 +225,32 @@ class TestSummaryChoiceList(EngineTestCase):
[3, "b", "c", [], 0],
[4, "b", "d", [], 0],
[5, "a", "e", [], 0],
[6, "", "c", [21], 1],
[7, "", "d", [21], 1],
[8, "", "e", [21], 1],
])
# Remove record from source
self.remove_record("Source", 21)
# All summary rows are now empty
self.assertTableData('GristSummary_6_Source', data=[
["id", "choices1", "group", "count"],
[1, "a", [], 0],
[2, "b", [], 0],
[3, "", [], 0],
])
self.assertTableData('GristSummary_6_Source2', data=[
["id", "choices1", "choices2", "group", "count"],
[1, "a", "c", [], 0],
[2, "a", "d", [], 0],
[3, "b", "c", [], 0],
[4, "b", "d", [], 0],
[5, "a", "e", [], 0],
[6, "", "c", [], 0],
[7, "", "d", [], 0],
[8, "", "e", [], 0],
])
# Make rows with every combination of {a,b,ab} and {c,d,cd}
@ -236,6 +267,8 @@ class TestSummaryChoiceList(EngineTestCase):
[107, ["L", "a"], ["L", "c", "d"]],
[108, ["L", "b"], ["L", "c", "d"]],
[109, ["L", "a", "b"], ["L", "c", "d"]],
# and one row with empty lists
[110, ["L"], ["L"]],
]
)
@ -250,6 +283,7 @@ class TestSummaryChoiceList(EngineTestCase):
[107, ["a"], ["c", "d"]],
[108, ["b"], ["c", "d"]],
[109, ["a", "b"], ["c", "d"]],
[110, None, None],
])
# Summary tables now have an even distribution of combinations
@ -257,6 +291,7 @@ class TestSummaryChoiceList(EngineTestCase):
["id", "choices1", "group", "count"],
[1, "a", [101, 103, 104, 106, 107, 109], 6],
[2, "b", [102, 103, 105, 106, 108, 109], 6],
[3, "", [110], 1],
])
summary_data = [
@ -266,6 +301,10 @@ class TestSummaryChoiceList(EngineTestCase):
[3, "b", "c", [102, 103, 108, 109], 4],
[4, "b", "d", [105, 106, 108, 109], 4],
[5, "a", "e", [], 0],
[6, "", "c", [], 0],
[7, "", "d", [], 0],
[8, "", "e", [], 0],
[9, "", "", [110], 1],
]
self.assertTableData('GristSummary_6_Source2', data=summary_data)
@ -284,8 +323,9 @@ class TestSummaryChoiceList(EngineTestCase):
Column(30, "count", "Int", isFormula=True, summarySourceCol=0,
formula="len($group)"),
Column(31, "group", "RefList:Source", isFormula=True, summarySourceCol=0,
formula="Source.lookupRecords(choices1=CONTAINS($choices1), choices2=CONTAINS($choices2))"),
formula='Source.lookupRecords('
'choices1=CONTAINS($choices1, match_empty=""), '
'choices2=CONTAINS($choices2, match_empty=""))'),
],
)
])