From 6db92328839e1f597efad6208ea1e67ada166121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaros=C5=82aw=20Sadzi=C5=84ski?= Date: Mon, 24 Apr 2023 12:05:34 +0200 Subject: [PATCH] Extracting column data and preventing multiple columns --- .gitignore | 2 +- sandbox/grist/column.py | 62 ++++++++++++++++++---------- sandbox/grist/data.py | 48 +++++++++++++++++++++ sandbox/grist/docactions.py | 3 +- sandbox/grist/engine.py | 62 +++++++++++++++++++++++++++- sandbox/grist/poc.py | 49 ++++++++++++++++++++++ sandbox/grist/table.py | 6 +++ sandbox/grist/test_import_actions.py | 5 +++ sandbox/grist/test_recordlist.py | 5 +++ sandbox/grist/test_table_actions.py | 5 +++ 10 files changed, 222 insertions(+), 25 deletions(-) create mode 100644 sandbox/grist/data.py create mode 100644 sandbox/grist/poc.py diff --git a/.gitignore b/.gitignore index cc60d2eb..5b2e1ff1 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ # Build helper files. /.build* - +*.grist *.swp *.pyc *.bak diff --git a/sandbox/grist/column.py b/sandbox/grist/column.py index c0ba8906..6aefa45f 100644 --- a/sandbox/grist/column.py +++ b/sandbox/grist/column.py @@ -61,16 +61,23 @@ class BaseColumn(object): def __init__(self, table, col_id, col_info): self.type_obj = col_info.type_obj self._is_right_type = self.type_obj.is_right_type - self._data = [] self.col_id = col_id self.table_id = table.table_id + self.engine = table._engine self.node = depend.Node(self.table_id, col_id) self._is_formula = col_info.is_formula self._is_private = bool(col_info.method) and getattr(col_info.method, 'is_private', False) self.update_method(col_info.method) + self.detached = False - # Always initialize to include the special empty record at index 0. - self.growto(1) + self._data = None + self._table = table + self.engine.data.create_column(self) + + + def iterate(self): + for row_id, value in self._data.iterate(): + yield row_id, value def update_method(self, method): """ @@ -101,38 +108,46 @@ class BaseColumn(object): return self.method is not None def clear(self): - self._data = [] - self.growto(1) # Always include the special empty record at index 0. + if self.detached: + raise Exception('Column already detached: ', self.table_id, self.col_id) + self._data.clear() def destroy(self): """ Called when the column is deleted. """ - del self._data[:] + if self.detached: + print('Warning - destroying already detached column: ', self.table_id, self.col_id) + return + + self.engine.data.drop_column(self) def growto(self, size): - if len(self._data) < size: - self._data.extend([self.getdefault()] * (size - len(self._data))) + if self.detached: + raise Exception('Column already detached: ', self.table_id, self.col_id) + + self._data.growto(size) def size(self): - return len(self._data) + return self._data.size() def set(self, row_id, value): """ Sets the value of this column for the given row_id. Value should be as returned by convert(), i.e. of the right type, or alttext, or error (but should NOT be random wrong types). """ - try: - self._data[row_id] = value - except IndexError: - self.growto(row_id + 1) - self._data[row_id] = value + if self.detached: + raise Exception('Column already detached: ', self.table_id, self.col_id) + self._data.set(row_id, value) def unset(self, row_id): """ Sets the value for the given row_id to the default value. """ + if self.detached: + raise Exception('Column already detached: ', self.table_id, self.col_id) self.set(row_id, self.getdefault()) + self._data.unset(row_id) def get_cell_value(self, row_id, restore=False): """ @@ -182,10 +197,7 @@ class BaseColumn(object): Returns the value stored for the given row_id. This may be an error or alttext, and it does not convert to a richer object. """ - try: - return self._data[row_id] - except IndexError: - return self.getdefault() + return self._data.raw_get(row_id) def safe_get(self, row_id): """ @@ -212,7 +224,15 @@ class BaseColumn(object): """ Replace this column's data entirely with data from another column of the same exact type. """ - self._data[:] = other_column._data + if self.detached: + raise Exception('Column already detached: ', self.table_id, self.col_id) + if other_column.detached: + print('Warning: copying from detached column: ', other_column.table_id, other_column.col_id) + return + + print('Column {}.{} is copying from {}.{}'.format(self.table_id, self.col_id, other_column.table_id, other_column.col_id)) + + self._data.copy_from(other_column._data) def convert(self, value_to_convert): """ @@ -244,7 +264,7 @@ class ChoiceColumn(DataColumn): def rename_choices(self, renames): row_ids = [] values = [] - for row_id, value in enumerate(self._data): + for row_id, value in self.iterate(): if value is not None and self.type_obj.is_right_type(value): value = self._rename_cell_choice(renames, value) if value is not None: @@ -443,7 +463,7 @@ class BaseReferenceColumn(BaseColumn): # This is hacky: we should have an interface to iterate through values of a column. (As it is, # self._data may include values for non-existent rows; it works here because those values are # falsy, which makes them ignored by self._update_references). - for row_id, value in enumerate(self._data): + for row_id, value in self.iterate(): if self.type_obj.is_right_type(value): self._update_references(row_id, None, value) diff --git a/sandbox/grist/data.py b/sandbox/grist/data.py new file mode 100644 index 00000000..5973b249 --- /dev/null +++ b/sandbox/grist/data.py @@ -0,0 +1,48 @@ +class ColumnData(object): + def __init__(self, col): + self.col = col + self.data = [] + # Always initialize to include the special empty record at index 0. + self.growto(1) + + def drop(self): + del self.data[:] + + def growto(self, size): + if len(self.data) < size: + self.data.extend([self.getdefault()] * (size - len(self.data))) + + def getdefault(self): + return self.col.type_obj.default + + def size(self): + return len(self.data) + + def clear(self): + if self.size() == 1: + return + raise NotImplementedError("clear() not implemented for this column type") + + + def raw_get(self, row_id): + try: + return self.data[row_id] + except IndexError: + return self.getdefault() + + def set(self, row_id, value): + try: + self.data[row_id] = value + except IndexError: + self.growto(row_id + 1) + self.data[row_id] = value + + def iterate(self): + for i in range(1, len(self.data)): + yield i, self.raw_get(i) + + def copy_from(self, other_column): + self.data[:] = other_column.data + + def unset(self, row_id): + pass \ No newline at end of file diff --git a/sandbox/grist/docactions.py b/sandbox/grist/docactions.py index 592cb6c7..f1579726 100644 --- a/sandbox/grist/docactions.py +++ b/sandbox/grist/docactions.py @@ -209,8 +209,7 @@ class DocActions(object): # Fill in the new column with the values from the old column. new_column = table.get_column(col_id) - for row_id in table.row_ids: - new_column.set(row_id, old_column.raw_get(row_id)) + new_column.copy_from_column(old_column) # Generate the undo action. self._engine.out_actions.undo.append(actions.ModifyColumn(table_id, col_id, undo_col_info)) diff --git a/sandbox/grist/engine.py b/sandbox/grist/engine.py index 7f4926bc..6464e808 100644 --- a/sandbox/grist/engine.py +++ b/sandbox/grist/engine.py @@ -15,7 +15,7 @@ import six from six.moves import zip from six.moves.collections_abc import Hashable # pylint:disable-all from sortedcontainers import SortedSet - +from data import ColumnData import acl import actions import action_obj @@ -105,6 +105,57 @@ skipped_completions = re.compile(r'\.(_|lookupOrAddDerived|getSummarySourceGroup # column may refer to derived tables or independent tables. Derived tables would have an extra # property, marking them as derived, which would affect certain UI decisions. +class Database(object): + __slots__ = ('engine', 'tables') + + def __init__(self, engine): + self.engine = engine + self.tables = {} + + + def create_table(self, table): + if table.table_id in self.tables: + raise ValueError("Table %s already exists" % table.table_id) + print("Creating table %s" % table.table_id) + self.tables[table.table_id] = dict() + + + def drop_table(self, table): + if table.table_id not in self.tables: + raise ValueError("Table %s already exists" % table.table_id) + print("Deleting table %s" % table.table_id) + del self.tables[table.table_id] + + + def create_column(self, col): + if col.table_id not in self.tables: + self.tables[col.table_id] = dict() + + if col.col_id in self.tables[col.table_id]: + old_one = self.tables[col.table_id][col.col_id] + col._data = old_one._data + col._data.col = col + old_one.detached = True + old_one._data = None + else: + col._data = ColumnData(col) + # print('Column {}.{} is detaching column {}.{}'.format(self.table_id, self.col_id, old_one.table_id, old_one.col_id)) + # print('Creating column: ', self.table_id, self.col_id) + self.tables[col.table_id][col.col_id] = col + col.detached = False + + def drop_column(self, col): + tables = self.tables + + if col.table_id not in tables: + raise Exception('Table not found for column: ', col.table_id, col.col_id) + + if col.col_id not in tables[col.table_id]: + raise Exception('Column not found: ', col.table_id, col.col_id) + + print('Destroying column: ', col.table_id, col.col_id) + col._data.drop() + del tables[col.table_id][col.col_id] class Engine(object): """ @@ -140,6 +191,8 @@ class Engine(object): """ def __init__(self): + self.data = Database(self) # The document data, including logic (formulas), and metadata. + # The document data, including logic (formulas), and metadata (tables prefixed with "_grist_"). self.tables = {} # Maps table IDs (or names) to Table objects. @@ -204,6 +257,7 @@ class Engine(object): # The list of columns that got deleted while applying an action. self._gone_columns = [] + self._gone_tables = [] # The set of potentially unused LookupMapColumns. self._unused_lookups = set() @@ -1177,6 +1231,7 @@ class Engine(object): if user_table is None: for c in table.get_helper_columns(): self.delete_column(c) + self._gone_tables.append(table) def _maybe_update_trigger_dependencies(self): if not self._have_trigger_columns_changed: @@ -1302,6 +1357,7 @@ class Engine(object): # consistent internally as well as with the clients and database outside of the sandbox # (which won't see any changes in case of an error). log.info("Failed to apply useractions; reverting: %r" % (e,)) + print("Failed to apply useractions; reverting: %r" % (e,)) self._undo_to_checkpoint(checkpoint) # Check schema consistency again. If this fails, something is really wrong (we tried to go @@ -1361,6 +1417,7 @@ class Engine(object): """ #log.warn("Engine.apply_doc_action %s" % (doc_action,)) self._gone_columns = [] + self._gone_tables = [] action_name = doc_action.__class__.__name__ saved_schema = None @@ -1400,6 +1457,9 @@ class Engine(object): actions.prune_actions(self.out_actions.calc, col.table_id, col.col_id) col.destroy() + for table in self._gone_tables: + table.destroy() + # We normally recompute formulas before returning to the user; but some formulas are also used # internally in-between applying doc actions. We have this workaround to ensure that those are # up-to-date after each doc action. See more in comments for _bring_mlookups_up_to_date. diff --git a/sandbox/grist/poc.py b/sandbox/grist/poc.py new file mode 100644 index 00000000..48bf2f46 --- /dev/null +++ b/sandbox/grist/poc.py @@ -0,0 +1,49 @@ +import difflib +import functools +import json +import unittest +from collections import namedtuple +from pprint import pprint + +import six + +import actions +import column +import engine +import logger +import useractions +import testutil +import objtypes + + +eng = engine.Engine() +eng.load_empty() + + +def apply(actions): + if not actions: + return [] + if not isinstance(actions[0], list): + actions = [actions] + return eng.apply_user_actions([useractions.from_repr(a) for a in actions]) + + +try: + apply(['AddRawTable', 'Table1']) + apply(['AddRecord', 'Table1', None, {'A': 1, 'B': 2, 'C': 3}]) + # apply(['RenameColumn', 'Table1', 'A', 'NewA']) + apply(['RenameTable', 'Table1', 'Dwa']) + + # ['RemoveColumn', "Table1", 'A'], + # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}], + # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}], + # ['ModifyColumn', 'Table1', 'B', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 1'}], + #]) + + # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}], + # ['ModifyColumn', 'Table1', 'B', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 1'}], +finally: + # Test if method close is in engine + if hasattr(eng, 'close'): + eng.close() + \ No newline at end of file diff --git a/sandbox/grist/table.py b/sandbox/grist/table.py index 060625ac..144b1791 100644 --- a/sandbox/grist/table.py +++ b/sandbox/grist/table.py @@ -184,6 +184,8 @@ class Table(object): # Each table maintains a reference to the engine that owns it. self._engine = engine + engine.data.create_table(self) + # The UserTable object for this table, set in _rebuild_model self.user_table = None @@ -242,6 +244,10 @@ class Table(object): # is called seems to be too late, at least for unit tests. self._empty_lookup_column = self._get_lookup_map(()) + + def destroy(self): + self._engine.data.drop_table(self) + def _num_rows(self): """ Similar to `len(self.lookup_records())` but faster and doesn't create dependencies. diff --git a/sandbox/grist/test_import_actions.py b/sandbox/grist/test_import_actions.py index ca39086c..d5e24914 100644 --- a/sandbox/grist/test_import_actions.py +++ b/sandbox/grist/test_import_actions.py @@ -1,4 +1,5 @@ # pylint: disable=line-too-long +import unittest import logger import test_engine @@ -189,3 +190,7 @@ class TestImportActions(test_engine.EngineTestCase): [6, 3, [12]], [7, 1, [13, 14, 15]], # new section for transform preview ]) + + +if __name__ == "__main__": + unittest.main() diff --git a/sandbox/grist/test_recordlist.py b/sandbox/grist/test_recordlist.py index 3dd11e64..dd8d45e7 100644 --- a/sandbox/grist/test_recordlist.py +++ b/sandbox/grist/test_recordlist.py @@ -1,3 +1,4 @@ +import unittest import testutil import test_engine from objtypes import RecordSetStub @@ -100,3 +101,7 @@ class TestRecordList(test_engine.EngineTestCase): [1, "Mammals", [1, 3], ["Mammals", "Mammals"], [mammals, mammals]], [2, "Reptilia", [2, 4], ["Reptilia", "Reptilia"], [reptiles, reptiles]], ]) + + +if __name__ == "__main__": + unittest.main() diff --git a/sandbox/grist/test_table_actions.py b/sandbox/grist/test_table_actions.py index f6576cf0..bde08b38 100644 --- a/sandbox/grist/test_table_actions.py +++ b/sandbox/grist/test_table_actions.py @@ -1,3 +1,4 @@ +import unittest import logger import testutil @@ -306,3 +307,7 @@ class TestTableActions(test_engine.EngineTestCase): ]), ]), ]) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file