mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) Avoid quadratic time complexity in fetch_table with query
Summary: While looking at webhooks code, I noticed that it calls `ActiveDoc.fetchQuery` (which I think typically leads to the sandbox method `fetch_table`) with a query on the `id` column, where the values could potentially be all row IDs in the table (e.g. if a new column was added, as in a recent discussion about webhooks gone wrong). This suggests that `fetch_table` should try to avoid using a list for values when possible. In practice this only starts to become an issue at about 10k rows, so I don't know if this has caused any real problems, but it seemed like something worth fixing. Test Plan: Extended unit tests for correctness. Tested performance manually. Made a doc with this formula: ``` import time row_ids = list(Table2.table.row_ids) query = {"id": row_ids} start = time.time() result = table.table._engine.fetch_table('Table2', query=query) end = time.time() assert result[1] == row_ids end - start, len(row_ids) ``` Then put a bunch of rows in `Table2`. This diff made the returned elapsed time much less. Reviewers: georgegevoian Reviewed By: georgegevoian Subscribers: jarek Differential Revision: https://phab.getgrist.com/D4092
This commit is contained in:
parent
7a85aaa7a1
commit
fb09b7afa0
@ -388,9 +388,27 @@ class Engine(object):
|
|||||||
|
|
||||||
query_cols = []
|
query_cols = []
|
||||||
if query:
|
if query:
|
||||||
query_cols = [(table.get_column(col_id), values) for (col_id, values) in six.iteritems(query)]
|
for col_id, values in six.iteritems(query):
|
||||||
row_ids = [r for r in table.row_ids
|
col = table.get_column(col_id)
|
||||||
if all((c.raw_get(r) in values) for (c, values) in query_cols)]
|
try:
|
||||||
|
# Try to use a set for speed.
|
||||||
|
values = set(values)
|
||||||
|
except TypeError:
|
||||||
|
# Values contains an unhashable value, leave it as a list.
|
||||||
|
pass
|
||||||
|
query_cols.append((col, values))
|
||||||
|
row_ids = []
|
||||||
|
for r in table.row_ids:
|
||||||
|
for (c, values) in query_cols:
|
||||||
|
try:
|
||||||
|
if c.raw_get(r) not in values:
|
||||||
|
break
|
||||||
|
except TypeError:
|
||||||
|
# values is a set but c.raw_get(r) is unhashable, so it's definitely not in values
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# No break, i.e. all columns matched
|
||||||
|
row_ids.append(r)
|
||||||
|
|
||||||
for c in six.itervalues(table.all_columns):
|
for c in six.itervalues(table.all_columns):
|
||||||
# pylint: disable=too-many-boolean-expressions
|
# pylint: disable=too-many-boolean-expressions
|
||||||
|
@ -557,6 +557,30 @@ class TestEngine(EngineTestCase):
|
|||||||
self.assertEqualDocData({'Address': data},
|
self.assertEqualDocData({'Address': data},
|
||||||
{'Address': testutil.table_data_from_rows('Address', col_names, [])})
|
{'Address': testutil.table_data_from_rows('Address', col_names, [])})
|
||||||
|
|
||||||
|
# Test unhashable values in the column and in the query
|
||||||
|
self.add_column('Address', 'list', type='Any', isFormula=True,
|
||||||
|
formula='[1] if $id == 21 else 2')
|
||||||
|
col_names.append('list')
|
||||||
|
|
||||||
|
data = self.engine.fetch_table('Address', query={'list': [[1]]})
|
||||||
|
self.assertEqualDocData({'Address': data},
|
||||||
|
{'Address': testutil.table_data_from_rows('Address', col_names, [
|
||||||
|
[ 21, "New York", "NY" , 1, [1]],
|
||||||
|
])})
|
||||||
|
|
||||||
|
data = self.engine.fetch_table('Address', query={'list': [2]})
|
||||||
|
self.assertEqualDocData({'Address': data},
|
||||||
|
{'Address': testutil.table_data_from_rows('Address', col_names, [
|
||||||
|
[ 22, "Albany", "NY" , 2, 2],
|
||||||
|
])})
|
||||||
|
|
||||||
|
data = self.engine.fetch_table('Address', query={'list': [[1], 2]})
|
||||||
|
self.assertEqualDocData({'Address': data},
|
||||||
|
{'Address': testutil.table_data_from_rows('Address', col_names, [
|
||||||
|
[ 21, "New York", "NY" , 1, [1]],
|
||||||
|
[ 22, "Albany", "NY" , 2, 2],
|
||||||
|
])})
|
||||||
|
|
||||||
def test_schema_restore_on_error(self):
|
def test_schema_restore_on_error(self):
|
||||||
# Simulate an error inside a DocAction, and make sure we restore the schema (don't leave it in
|
# Simulate an error inside a DocAction, and make sure we restore the schema (don't leave it in
|
||||||
# inconsistent with metadata).
|
# inconsistent with metadata).
|
||||||
|
Loading…
Reference in New Issue
Block a user