Extracting column data and preventing multiple columns

2025-06-13 20:53:59 +00:00 · 2023-04-24 12:05:34 +02:00 · 2023-04-24 12:05:34 +02:00 · 6db9232883
commit 6db9232883
parent a3889a3b13
10 changed files with 222 additions and 25 deletions
--- a/.gitignore
+++ b/.gitignore
@ -14,7 +14,7 @@

 # Build helper files.
 /.build*
-
+*.grist
 *.swp
 *.pyc
 *.bak
--- a/sandbox/grist/column.py
+++ b/sandbox/grist/column.py
@ -61,16 +61,23 @@ class BaseColumn(object):
  def __init__(self, table, col_id, col_info):
    self.type_obj = col_info.type_obj
    self._is_right_type = self.type_obj.is_right_type
-    self._data = []
    self.col_id = col_id
    self.table_id = table.table_id
+    self.engine = table._engine
    self.node = depend.Node(self.table_id, col_id)
    self._is_formula = col_info.is_formula
    self._is_private = bool(col_info.method) and getattr(col_info.method, 'is_private', False)
    self.update_method(col_info.method)
+    self.detached = False

-    # Always initialize to include the special empty record at index 0.
-    self.growto(1)
+    self._data = None
+    self._table = table
+    self.engine.data.create_column(self)
+
+
+  def iterate(self):
+    for row_id, value in self._data.iterate():
+      yield row_id, value

  def update_method(self, method):
    """
@ -101,38 +108,46 @@ class BaseColumn(object):
    return self.method is not None

  def clear(self):
-    self._data = []
-    self.growto(1)    # Always include the special empty record at index 0.
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
+    self._data.clear()

  def destroy(self):
    """
    Called when the column is deleted.
    """
-    del self._data[:]
+    if self.detached:
+      print('Warning - destroying already detached column: ', self.table_id, self.col_id)
+      return
+
+    self.engine.data.drop_column(self)

  def growto(self, size):
-    if len(self._data) < size:
-      self._data.extend([self.getdefault()] * (size - len(self._data)))
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
+
+    self._data.growto(size)

  def size(self):
-    return len(self._data)
+    return self._data.size()

  def set(self, row_id, value):
    """
    Sets the value of this column for the given row_id. Value should be as returned by convert(),
    i.e. of the right type, or alttext, or error (but should NOT be random wrong types).
    """
-    try:
-      self._data[row_id] = value
-    except IndexError:
-      self.growto(row_id + 1)
-      self._data[row_id] = value
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
+    self._data.set(row_id, value)

  def unset(self, row_id):
    """
    Sets the value for the given row_id to the default value.
    """
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
    self.set(row_id, self.getdefault())
+    self._data.unset(row_id)

  def get_cell_value(self, row_id, restore=False):
    """
@ -182,10 +197,7 @@ class BaseColumn(object):
    Returns the value stored for the given row_id. This may be an error or alttext, and it does
    not convert to a richer object.
    """
-    try:
-      return self._data[row_id]
-    except IndexError:
-      return self.getdefault()
+    return self._data.raw_get(row_id)

  def safe_get(self, row_id):
    """
@ -212,7 +224,15 @@ class BaseColumn(object):
    """
    Replace this column's data entirely with data from another column of the same exact type.
    """
-    self._data[:] = other_column._data
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
+    if other_column.detached:
+      print('Warning: copying from detached column: ', other_column.table_id, other_column.col_id)
+      return
+    
+    print('Column {}.{} is copying from {}.{}'.format(self.table_id, self.col_id, other_column.table_id, other_column.col_id))
+
+    self._data.copy_from(other_column._data)

  def convert(self, value_to_convert):
    """
@ -244,7 +264,7 @@ class ChoiceColumn(DataColumn):
  def rename_choices(self, renames):
    row_ids = []
    values = []
-    for row_id, value in enumerate(self._data):
+    for row_id, value in self.iterate():
      if value is not None and self.type_obj.is_right_type(value):
        value = self._rename_cell_choice(renames, value)
        if value is not None:
@ -443,7 +463,7 @@ class BaseReferenceColumn(BaseColumn):
    # This is hacky: we should have an interface to iterate through values of a column. (As it is,
    # self._data may include values for non-existent rows; it works here because those values are
    # falsy, which makes them ignored by self._update_references).
-    for row_id, value in enumerate(self._data):
+    for row_id, value in self.iterate():
      if self.type_obj.is_right_type(value):
        self._update_references(row_id, None, value)

--- a/sandbox/grist/data.py
+++ b/sandbox/grist/data.py
@ -0,0 +1,48 @@
+class ColumnData(object):
+  def __init__(self, col):
+    self.col = col
+    self.data = []
+    # Always initialize to include the special empty record at index 0.
+    self.growto(1)
+
+  def drop(self):
+    del self.data[:]
+
+  def growto(self, size):
+    if len(self.data) < size:
+      self.data.extend([self.getdefault()] * (size - len(self.data)))
+
+  def getdefault(self):
+    return self.col.type_obj.default
+  
+  def size(self):
+    return len(self.data)
+  
+  def clear(self):
+    if self.size() == 1:
+      return
+    raise NotImplementedError("clear() not implemented for this column type")
+
+
+  def raw_get(self, row_id):
+    try:
+      return self.data[row_id]
+    except IndexError:
+      return self.getdefault()
+    
+  def set(self, row_id, value):
+    try:
+      self.data[row_id] = value
+    except IndexError:
+      self.growto(row_id + 1)
+      self.data[row_id] = value
+
+  def iterate(self):
+    for i in range(1, len(self.data)):
+      yield i, self.raw_get(i)
+
+  def copy_from(self, other_column):
+    self.data[:] = other_column.data
+
+  def unset(self, row_id):
+    pass
--- a/sandbox/grist/docactions.py
+++ b/sandbox/grist/docactions.py
@ -209,8 +209,7 @@ class DocActions(object):

    # Fill in the new column with the values from the old column.
    new_column = table.get_column(col_id)
-    for row_id in table.row_ids:
-      new_column.set(row_id, old_column.raw_get(row_id))
+    new_column.copy_from_column(old_column)

    # Generate the undo action.
    self._engine.out_actions.undo.append(actions.ModifyColumn(table_id, col_id, undo_col_info))
--- a/sandbox/grist/engine.py
+++ b/sandbox/grist/engine.py
@ -15,7 +15,7 @@ import six
 from six.moves import zip
 from six.moves.collections_abc import Hashable  # pylint:disable-all
 from sortedcontainers import SortedSet
-
+from data import ColumnData
 import acl
 import actions
 import action_obj
@ -105,6 +105,57 @@ skipped_completions = re.compile(r'\.(_|lookupOrAddDerived|getSummarySourceGroup
 # column may refer to derived tables or independent tables. Derived tables would have an extra
 # property, marking them as derived, which would affect certain UI decisions.

+class Database(object):
+  __slots__ = ('engine', 'tables')
+
+  def __init__(self, engine):
+    self.engine = engine
+    self.tables = {}
+
+
+  def create_table(self, table):
+    if table.table_id in self.tables:
+      raise ValueError("Table %s already exists" % table.table_id)
+    print("Creating table %s" % table.table_id)
+    self.tables[table.table_id] = dict()
+
+
+  def drop_table(self, table):
+    if table.table_id not in self.tables:
+      raise ValueError("Table %s already exists" % table.table_id)
+    print("Deleting table %s" % table.table_id)
+    del self.tables[table.table_id]
+
+
+  def create_column(self, col):
+    if col.table_id not in self.tables:
+      self.tables[col.table_id] = dict()
+
+    if col.col_id in self.tables[col.table_id]:
+      old_one = self.tables[col.table_id][col.col_id]
+      col._data = old_one._data
+      col._data.col = col
+      old_one.detached = True
+      old_one._data = None
+    else:
+      col._data = ColumnData(col)
+      # print('Column {}.{} is detaching column {}.{}'.format(self.table_id, self.col_id, old_one.table_id, old_one.col_id))
+    # print('Creating column: ', self.table_id, self.col_id)
+    self.tables[col.table_id][col.col_id] = col
+    col.detached = False
+
+  def drop_column(self, col):
+    tables = self.tables
+
+    if col.table_id not in tables:
+      raise Exception('Table not found for column: ', col.table_id, col.col_id)
+    
+    if col.col_id not in tables[col.table_id]:
+      raise Exception('Column not found: ', col.table_id, col.col_id)
+
+    print('Destroying column: ', col.table_id, col.col_id)
+    col._data.drop()
+    del tables[col.table_id][col.col_id]

 class Engine(object):
  """
@ -140,6 +191,8 @@ class Engine(object):
  """

  def __init__(self):
+    self.data = Database(self)      # The document data, including logic (formulas), and metadata.
+
    # The document data, including logic (formulas), and metadata (tables prefixed with "_grist_").
    self.tables = {}                # Maps table IDs (or names) to Table objects.

@ -204,6 +257,7 @@ class Engine(object):

    # The list of columns that got deleted while applying an action.
    self._gone_columns = []
+    self._gone_tables = []

    # The set of potentially unused LookupMapColumns.
    self._unused_lookups = set()
@ -1177,6 +1231,7 @@ class Engine(object):
    if user_table is None:
      for c in table.get_helper_columns():
        self.delete_column(c)
+      self._gone_tables.append(table)

  def _maybe_update_trigger_dependencies(self):
    if not self._have_trigger_columns_changed:
@ -1302,6 +1357,7 @@ class Engine(object):
      # consistent internally as well as with the clients and database outside of the sandbox
      # (which won't see any changes in case of an error).
      log.info("Failed to apply useractions; reverting: %r" % (e,))
+      print("Failed to apply useractions; reverting: %r" % (e,))
      self._undo_to_checkpoint(checkpoint)

      # Check schema consistency again. If this fails, something is really wrong (we tried to go
@ -1361,6 +1417,7 @@ class Engine(object):
    """
    #log.warn("Engine.apply_doc_action %s" % (doc_action,))
    self._gone_columns = []
+    self._gone_tables = []

    action_name = doc_action.__class__.__name__
    saved_schema = None
@ -1400,6 +1457,9 @@ class Engine(object):
      actions.prune_actions(self.out_actions.calc, col.table_id, col.col_id)
      col.destroy()

+    for table in self._gone_tables:
+      table.destroy()
+
    # We normally recompute formulas before returning to the user; but some formulas are also used
    # internally in-between applying doc actions. We have this workaround to ensure that those are
    # up-to-date after each doc action. See more in comments for _bring_mlookups_up_to_date.
--- a/sandbox/grist/poc.py
+++ b/sandbox/grist/poc.py
@ -0,0 +1,49 @@
+import difflib
+import functools
+import json
+import unittest
+from collections import namedtuple
+from pprint import pprint
+
+import six
+
+import actions
+import column
+import engine
+import logger
+import useractions
+import testutil
+import objtypes
+
+
+eng = engine.Engine()
+eng.load_empty()
+
+
+def apply(actions):
+  if not actions:
+    return []
+  if not isinstance(actions[0], list):
+    actions = [actions]
+  return eng.apply_user_actions([useractions.from_repr(a) for a in actions])
+  
+
+try:
+  apply(['AddRawTable', 'Table1'])
+  apply(['AddRecord', 'Table1', None, {'A': 1, 'B': 2, 'C': 3}])
+  # apply(['RenameColumn', 'Table1', 'A', 'NewA'])
+  apply(['RenameTable', 'Table1', 'Dwa'])
+
+  #  ['RemoveColumn', "Table1", 'A'],
+    # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
+    # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
+    # ['ModifyColumn', 'Table1', 'B', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 1'}],
+  #])
+
+    # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
+    # ['ModifyColumn', 'Table1', 'B', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 1'}],
+finally:
+  # Test if method close is in engine
+  if hasattr(eng, 'close'):
+    eng.close()
+ 
--- a/sandbox/grist/table.py
+++ b/sandbox/grist/table.py
@ -184,6 +184,8 @@ class Table(object):
    # Each table maintains a reference to the engine that owns it.
    self._engine = engine

+    engine.data.create_table(self)
+
    # The UserTable object for this table, set in _rebuild_model
    self.user_table = None

@ -242,6 +244,10 @@ class Table(object):
    # is called seems to be too late, at least for unit tests.
    self._empty_lookup_column = self._get_lookup_map(())

+
+  def destroy(self):
+    self._engine.data.drop_table(self)
+
  def _num_rows(self):
    """
    Similar to `len(self.lookup_records())` but faster and doesn't create dependencies.
--- a/sandbox/grist/test_import_actions.py
+++ b/sandbox/grist/test_import_actions.py
@ -1,4 +1,5 @@
 # pylint: disable=line-too-long
+import unittest
 import logger
 import test_engine

@ -189,3 +190,7 @@ class TestImportActions(test_engine.EngineTestCase):
      [6, 3, [12]],
      [7, 1, [13, 14, 15]],  # new section for transform preview
    ])
+
+
+if __name__ == "__main__":
+  unittest.main()
--- a/sandbox/grist/test_recordlist.py
+++ b/sandbox/grist/test_recordlist.py
@ -1,3 +1,4 @@
+import unittest
 import testutil
 import test_engine
 from objtypes import RecordSetStub
@ -100,3 +101,7 @@ class TestRecordList(test_engine.EngineTestCase):
      [1,    "Mammals",  [1, 3],      ["Mammals", "Mammals"],   [mammals, mammals]],
      [2,    "Reptilia", [2, 4],      ["Reptilia", "Reptilia"], [reptiles, reptiles]],
    ])
+
+
+if __name__ == "__main__":
+  unittest.main()
--- a/sandbox/grist/test_table_actions.py
+++ b/sandbox/grist/test_table_actions.py
@ -1,3 +1,4 @@
+import unittest
 import logger

 import testutil
@ -306,3 +307,7 @@ class TestTableActions(test_engine.EngineTestCase):
        ]),
      ]),
    ])
+
+
+if __name__ == "__main__":
+  unittest.main()