Extracting column data and preventing multiple columns

This commit is contained in:
Jarosław Sadziński 2023-04-24 12:05:34 +02:00
parent a3889a3b13
commit 6db9232883
10 changed files with 222 additions and 25 deletions

2
.gitignore vendored
View File

@ -14,7 +14,7 @@
# Build helper files.
/.build*
*.grist
*.swp
*.pyc
*.bak

View File

@ -61,16 +61,23 @@ class BaseColumn(object):
def __init__(self, table, col_id, col_info):
self.type_obj = col_info.type_obj
self._is_right_type = self.type_obj.is_right_type
self._data = []
self.col_id = col_id
self.table_id = table.table_id
self.engine = table._engine
self.node = depend.Node(self.table_id, col_id)
self._is_formula = col_info.is_formula
self._is_private = bool(col_info.method) and getattr(col_info.method, 'is_private', False)
self.update_method(col_info.method)
self.detached = False
# Always initialize to include the special empty record at index 0.
self.growto(1)
self._data = None
self._table = table
self.engine.data.create_column(self)
def iterate(self):
for row_id, value in self._data.iterate():
yield row_id, value
def update_method(self, method):
"""
@ -101,38 +108,46 @@ class BaseColumn(object):
return self.method is not None
def clear(self):
self._data = []
self.growto(1) # Always include the special empty record at index 0.
if self.detached:
raise Exception('Column already detached: ', self.table_id, self.col_id)
self._data.clear()
def destroy(self):
"""
Called when the column is deleted.
"""
del self._data[:]
if self.detached:
print('Warning - destroying already detached column: ', self.table_id, self.col_id)
return
self.engine.data.drop_column(self)
def growto(self, size):
if len(self._data) < size:
self._data.extend([self.getdefault()] * (size - len(self._data)))
if self.detached:
raise Exception('Column already detached: ', self.table_id, self.col_id)
self._data.growto(size)
def size(self):
return len(self._data)
return self._data.size()
def set(self, row_id, value):
"""
Sets the value of this column for the given row_id. Value should be as returned by convert(),
i.e. of the right type, or alttext, or error (but should NOT be random wrong types).
"""
try:
self._data[row_id] = value
except IndexError:
self.growto(row_id + 1)
self._data[row_id] = value
if self.detached:
raise Exception('Column already detached: ', self.table_id, self.col_id)
self._data.set(row_id, value)
def unset(self, row_id):
"""
Sets the value for the given row_id to the default value.
"""
if self.detached:
raise Exception('Column already detached: ', self.table_id, self.col_id)
self.set(row_id, self.getdefault())
self._data.unset(row_id)
def get_cell_value(self, row_id, restore=False):
"""
@ -182,10 +197,7 @@ class BaseColumn(object):
Returns the value stored for the given row_id. This may be an error or alttext, and it does
not convert to a richer object.
"""
try:
return self._data[row_id]
except IndexError:
return self.getdefault()
return self._data.raw_get(row_id)
def safe_get(self, row_id):
"""
@ -212,7 +224,15 @@ class BaseColumn(object):
"""
Replace this column's data entirely with data from another column of the same exact type.
"""
self._data[:] = other_column._data
if self.detached:
raise Exception('Column already detached: ', self.table_id, self.col_id)
if other_column.detached:
print('Warning: copying from detached column: ', other_column.table_id, other_column.col_id)
return
print('Column {}.{} is copying from {}.{}'.format(self.table_id, self.col_id, other_column.table_id, other_column.col_id))
self._data.copy_from(other_column._data)
def convert(self, value_to_convert):
"""
@ -244,7 +264,7 @@ class ChoiceColumn(DataColumn):
def rename_choices(self, renames):
row_ids = []
values = []
for row_id, value in enumerate(self._data):
for row_id, value in self.iterate():
if value is not None and self.type_obj.is_right_type(value):
value = self._rename_cell_choice(renames, value)
if value is not None:
@ -443,7 +463,7 @@ class BaseReferenceColumn(BaseColumn):
# This is hacky: we should have an interface to iterate through values of a column. (As it is,
# self._data may include values for non-existent rows; it works here because those values are
# falsy, which makes them ignored by self._update_references).
for row_id, value in enumerate(self._data):
for row_id, value in self.iterate():
if self.type_obj.is_right_type(value):
self._update_references(row_id, None, value)

48
sandbox/grist/data.py Normal file
View File

@ -0,0 +1,48 @@
class ColumnData(object):
def __init__(self, col):
self.col = col
self.data = []
# Always initialize to include the special empty record at index 0.
self.growto(1)
def drop(self):
del self.data[:]
def growto(self, size):
if len(self.data) < size:
self.data.extend([self.getdefault()] * (size - len(self.data)))
def getdefault(self):
return self.col.type_obj.default
def size(self):
return len(self.data)
def clear(self):
if self.size() == 1:
return
raise NotImplementedError("clear() not implemented for this column type")
def raw_get(self, row_id):
try:
return self.data[row_id]
except IndexError:
return self.getdefault()
def set(self, row_id, value):
try:
self.data[row_id] = value
except IndexError:
self.growto(row_id + 1)
self.data[row_id] = value
def iterate(self):
for i in range(1, len(self.data)):
yield i, self.raw_get(i)
def copy_from(self, other_column):
self.data[:] = other_column.data
def unset(self, row_id):
pass

View File

@ -209,8 +209,7 @@ class DocActions(object):
# Fill in the new column with the values from the old column.
new_column = table.get_column(col_id)
for row_id in table.row_ids:
new_column.set(row_id, old_column.raw_get(row_id))
new_column.copy_from_column(old_column)
# Generate the undo action.
self._engine.out_actions.undo.append(actions.ModifyColumn(table_id, col_id, undo_col_info))

View File

@ -15,7 +15,7 @@ import six
from six.moves import zip
from six.moves.collections_abc import Hashable # pylint:disable-all
from sortedcontainers import SortedSet
from data import ColumnData
import acl
import actions
import action_obj
@ -105,6 +105,57 @@ skipped_completions = re.compile(r'\.(_|lookupOrAddDerived|getSummarySourceGroup
# column may refer to derived tables or independent tables. Derived tables would have an extra
# property, marking them as derived, which would affect certain UI decisions.
class Database(object):
__slots__ = ('engine', 'tables')
def __init__(self, engine):
self.engine = engine
self.tables = {}
def create_table(self, table):
if table.table_id in self.tables:
raise ValueError("Table %s already exists" % table.table_id)
print("Creating table %s" % table.table_id)
self.tables[table.table_id] = dict()
def drop_table(self, table):
if table.table_id not in self.tables:
raise ValueError("Table %s already exists" % table.table_id)
print("Deleting table %s" % table.table_id)
del self.tables[table.table_id]
def create_column(self, col):
if col.table_id not in self.tables:
self.tables[col.table_id] = dict()
if col.col_id in self.tables[col.table_id]:
old_one = self.tables[col.table_id][col.col_id]
col._data = old_one._data
col._data.col = col
old_one.detached = True
old_one._data = None
else:
col._data = ColumnData(col)
# print('Column {}.{} is detaching column {}.{}'.format(self.table_id, self.col_id, old_one.table_id, old_one.col_id))
# print('Creating column: ', self.table_id, self.col_id)
self.tables[col.table_id][col.col_id] = col
col.detached = False
def drop_column(self, col):
tables = self.tables
if col.table_id not in tables:
raise Exception('Table not found for column: ', col.table_id, col.col_id)
if col.col_id not in tables[col.table_id]:
raise Exception('Column not found: ', col.table_id, col.col_id)
print('Destroying column: ', col.table_id, col.col_id)
col._data.drop()
del tables[col.table_id][col.col_id]
class Engine(object):
"""
@ -140,6 +191,8 @@ class Engine(object):
"""
def __init__(self):
self.data = Database(self) # The document data, including logic (formulas), and metadata.
# The document data, including logic (formulas), and metadata (tables prefixed with "_grist_").
self.tables = {} # Maps table IDs (or names) to Table objects.
@ -204,6 +257,7 @@ class Engine(object):
# The list of columns that got deleted while applying an action.
self._gone_columns = []
self._gone_tables = []
# The set of potentially unused LookupMapColumns.
self._unused_lookups = set()
@ -1177,6 +1231,7 @@ class Engine(object):
if user_table is None:
for c in table.get_helper_columns():
self.delete_column(c)
self._gone_tables.append(table)
def _maybe_update_trigger_dependencies(self):
if not self._have_trigger_columns_changed:
@ -1302,6 +1357,7 @@ class Engine(object):
# consistent internally as well as with the clients and database outside of the sandbox
# (which won't see any changes in case of an error).
log.info("Failed to apply useractions; reverting: %r" % (e,))
print("Failed to apply useractions; reverting: %r" % (e,))
self._undo_to_checkpoint(checkpoint)
# Check schema consistency again. If this fails, something is really wrong (we tried to go
@ -1361,6 +1417,7 @@ class Engine(object):
"""
#log.warn("Engine.apply_doc_action %s" % (doc_action,))
self._gone_columns = []
self._gone_tables = []
action_name = doc_action.__class__.__name__
saved_schema = None
@ -1400,6 +1457,9 @@ class Engine(object):
actions.prune_actions(self.out_actions.calc, col.table_id, col.col_id)
col.destroy()
for table in self._gone_tables:
table.destroy()
# We normally recompute formulas before returning to the user; but some formulas are also used
# internally in-between applying doc actions. We have this workaround to ensure that those are
# up-to-date after each doc action. See more in comments for _bring_mlookups_up_to_date.

49
sandbox/grist/poc.py Normal file
View File

@ -0,0 +1,49 @@
import difflib
import functools
import json
import unittest
from collections import namedtuple
from pprint import pprint
import six
import actions
import column
import engine
import logger
import useractions
import testutil
import objtypes
eng = engine.Engine()
eng.load_empty()
def apply(actions):
if not actions:
return []
if not isinstance(actions[0], list):
actions = [actions]
return eng.apply_user_actions([useractions.from_repr(a) for a in actions])
try:
apply(['AddRawTable', 'Table1'])
apply(['AddRecord', 'Table1', None, {'A': 1, 'B': 2, 'C': 3}])
# apply(['RenameColumn', 'Table1', 'A', 'NewA'])
apply(['RenameTable', 'Table1', 'Dwa'])
# ['RemoveColumn', "Table1", 'A'],
# ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
# ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
# ['ModifyColumn', 'Table1', 'B', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 1'}],
#])
# ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
# ['ModifyColumn', 'Table1', 'B', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 1'}],
finally:
# Test if method close is in engine
if hasattr(eng, 'close'):
eng.close()

View File

@ -184,6 +184,8 @@ class Table(object):
# Each table maintains a reference to the engine that owns it.
self._engine = engine
engine.data.create_table(self)
# The UserTable object for this table, set in _rebuild_model
self.user_table = None
@ -242,6 +244,10 @@ class Table(object):
# is called seems to be too late, at least for unit tests.
self._empty_lookup_column = self._get_lookup_map(())
def destroy(self):
self._engine.data.drop_table(self)
def _num_rows(self):
"""
Similar to `len(self.lookup_records())` but faster and doesn't create dependencies.

View File

@ -1,4 +1,5 @@
# pylint: disable=line-too-long
import unittest
import logger
import test_engine
@ -189,3 +190,7 @@ class TestImportActions(test_engine.EngineTestCase):
[6, 3, [12]],
[7, 1, [13, 14, 15]], # new section for transform preview
])
if __name__ == "__main__":
unittest.main()

View File

@ -1,3 +1,4 @@
import unittest
import testutil
import test_engine
from objtypes import RecordSetStub
@ -100,3 +101,7 @@ class TestRecordList(test_engine.EngineTestCase):
[1, "Mammals", [1, 3], ["Mammals", "Mammals"], [mammals, mammals]],
[2, "Reptilia", [2, 4], ["Reptilia", "Reptilia"], [reptiles, reptiles]],
])
if __name__ == "__main__":
unittest.main()

View File

@ -1,3 +1,4 @@
import unittest
import logger
import testutil
@ -306,3 +307,7 @@ class TestTableActions(test_engine.EngineTestCase):
]),
]),
])
if __name__ == "__main__":
unittest.main()