From 6db92328839e1f597efad6208ea1e67ada166121 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaros=C5=82aw=20Sadzi=C5=84ski?=
 <jaroslaw.sadzinski@gmail.com>
Date: Mon, 24 Apr 2023 12:05:34 +0200
Subject: [PATCH] Extracting column data and preventing multiple columns

---
 .gitignore                           |  2 +-
 sandbox/grist/column.py              | 62 ++++++++++++++++++----------
 sandbox/grist/data.py                | 48 +++++++++++++++++++++
 sandbox/grist/docactions.py          |  3 +-
 sandbox/grist/engine.py              | 62 +++++++++++++++++++++++++++-
 sandbox/grist/poc.py                 | 49 ++++++++++++++++++++++
 sandbox/grist/table.py               |  6 +++
 sandbox/grist/test_import_actions.py |  5 +++
 sandbox/grist/test_recordlist.py     |  5 +++
 sandbox/grist/test_table_actions.py  |  5 +++
 10 files changed, 222 insertions(+), 25 deletions(-)
 create mode 100644 sandbox/grist/data.py
 create mode 100644 sandbox/grist/poc.py

diff --git a/.gitignore b/.gitignore
index cc60d2eb..5b2e1ff1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,7 @@
 
 # Build helper files.
 /.build*
-
+*.grist
 *.swp
 *.pyc
 *.bak
diff --git a/sandbox/grist/column.py b/sandbox/grist/column.py
index c0ba8906..6aefa45f 100644
--- a/sandbox/grist/column.py
+++ b/sandbox/grist/column.py
@@ -61,16 +61,23 @@ class BaseColumn(object):
   def __init__(self, table, col_id, col_info):
     self.type_obj = col_info.type_obj
     self._is_right_type = self.type_obj.is_right_type
-    self._data = []
     self.col_id = col_id
     self.table_id = table.table_id
+    self.engine = table._engine
     self.node = depend.Node(self.table_id, col_id)
     self._is_formula = col_info.is_formula
     self._is_private = bool(col_info.method) and getattr(col_info.method, 'is_private', False)
     self.update_method(col_info.method)
+    self.detached = False
 
-    # Always initialize to include the special empty record at index 0.
-    self.growto(1)
+    self._data = None
+    self._table = table
+    self.engine.data.create_column(self)
+
+
+  def iterate(self):
+    for row_id, value in self._data.iterate():
+      yield row_id, value
 
   def update_method(self, method):
     """
@@ -101,38 +108,46 @@ class BaseColumn(object):
     return self.method is not None
 
   def clear(self):
-    self._data = []
-    self.growto(1)    # Always include the special empty record at index 0.
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
+    self._data.clear()
 
   def destroy(self):
     """
     Called when the column is deleted.
     """
-    del self._data[:]
+    if self.detached:
+      print('Warning - destroying already detached column: ', self.table_id, self.col_id)
+      return
+
+    self.engine.data.drop_column(self)
 
   def growto(self, size):
-    if len(self._data) < size:
-      self._data.extend([self.getdefault()] * (size - len(self._data)))
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
+
+    self._data.growto(size)
 
   def size(self):
-    return len(self._data)
+    return self._data.size()
 
   def set(self, row_id, value):
     """
     Sets the value of this column for the given row_id. Value should be as returned by convert(),
     i.e. of the right type, or alttext, or error (but should NOT be random wrong types).
     """
-    try:
-      self._data[row_id] = value
-    except IndexError:
-      self.growto(row_id + 1)
-      self._data[row_id] = value
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
+    self._data.set(row_id, value)
 
   def unset(self, row_id):
     """
     Sets the value for the given row_id to the default value.
     """
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
     self.set(row_id, self.getdefault())
+    self._data.unset(row_id)
 
   def get_cell_value(self, row_id, restore=False):
     """
@@ -182,10 +197,7 @@ class BaseColumn(object):
     Returns the value stored for the given row_id. This may be an error or alttext, and it does
     not convert to a richer object.
     """
-    try:
-      return self._data[row_id]
-    except IndexError:
-      return self.getdefault()
+    return self._data.raw_get(row_id)
 
   def safe_get(self, row_id):
     """
@@ -212,7 +224,15 @@ class BaseColumn(object):
     """
     Replace this column's data entirely with data from another column of the same exact type.
     """
-    self._data[:] = other_column._data
+    if self.detached:
+      raise Exception('Column already detached: ', self.table_id, self.col_id)
+    if other_column.detached:
+      print('Warning: copying from detached column: ', other_column.table_id, other_column.col_id)
+      return
+    
+    print('Column {}.{} is copying from {}.{}'.format(self.table_id, self.col_id, other_column.table_id, other_column.col_id))
+
+    self._data.copy_from(other_column._data)
 
   def convert(self, value_to_convert):
     """
@@ -244,7 +264,7 @@ class ChoiceColumn(DataColumn):
   def rename_choices(self, renames):
     row_ids = []
     values = []
-    for row_id, value in enumerate(self._data):
+    for row_id, value in self.iterate():
       if value is not None and self.type_obj.is_right_type(value):
         value = self._rename_cell_choice(renames, value)
         if value is not None:
@@ -443,7 +463,7 @@ class BaseReferenceColumn(BaseColumn):
     # This is hacky: we should have an interface to iterate through values of a column. (As it is,
     # self._data may include values for non-existent rows; it works here because those values are
     # falsy, which makes them ignored by self._update_references).
-    for row_id, value in enumerate(self._data):
+    for row_id, value in self.iterate():
       if self.type_obj.is_right_type(value):
         self._update_references(row_id, None, value)
 
diff --git a/sandbox/grist/data.py b/sandbox/grist/data.py
new file mode 100644
index 00000000..5973b249
--- /dev/null
+++ b/sandbox/grist/data.py
@@ -0,0 +1,48 @@
+class ColumnData(object):
+  def __init__(self, col):
+    self.col = col
+    self.data = []
+    # Always initialize to include the special empty record at index 0.
+    self.growto(1)
+
+  def drop(self):
+    del self.data[:]
+
+  def growto(self, size):
+    if len(self.data) < size:
+      self.data.extend([self.getdefault()] * (size - len(self.data)))
+
+  def getdefault(self):
+    return self.col.type_obj.default
+  
+  def size(self):
+    return len(self.data)
+  
+  def clear(self):
+    if self.size() == 1:
+      return
+    raise NotImplementedError("clear() not implemented for this column type")
+
+
+  def raw_get(self, row_id):
+    try:
+      return self.data[row_id]
+    except IndexError:
+      return self.getdefault()
+    
+  def set(self, row_id, value):
+    try:
+      self.data[row_id] = value
+    except IndexError:
+      self.growto(row_id + 1)
+      self.data[row_id] = value
+
+  def iterate(self):
+    for i in range(1, len(self.data)):
+      yield i, self.raw_get(i)
+
+  def copy_from(self, other_column):
+    self.data[:] = other_column.data
+
+  def unset(self, row_id):
+    pass
\ No newline at end of file
diff --git a/sandbox/grist/docactions.py b/sandbox/grist/docactions.py
index 592cb6c7..f1579726 100644
--- a/sandbox/grist/docactions.py
+++ b/sandbox/grist/docactions.py
@@ -209,8 +209,7 @@ class DocActions(object):
 
     # Fill in the new column with the values from the old column.
     new_column = table.get_column(col_id)
-    for row_id in table.row_ids:
-      new_column.set(row_id, old_column.raw_get(row_id))
+    new_column.copy_from_column(old_column)
 
     # Generate the undo action.
     self._engine.out_actions.undo.append(actions.ModifyColumn(table_id, col_id, undo_col_info))
diff --git a/sandbox/grist/engine.py b/sandbox/grist/engine.py
index 7f4926bc..6464e808 100644
--- a/sandbox/grist/engine.py
+++ b/sandbox/grist/engine.py
@@ -15,7 +15,7 @@ import six
 from six.moves import zip
 from six.moves.collections_abc import Hashable  # pylint:disable-all
 from sortedcontainers import SortedSet
-
+from data import ColumnData
 import acl
 import actions
 import action_obj
@@ -105,6 +105,57 @@ skipped_completions = re.compile(r'\.(_|lookupOrAddDerived|getSummarySourceGroup
 # column may refer to derived tables or independent tables. Derived tables would have an extra
 # property, marking them as derived, which would affect certain UI decisions.
 
+class Database(object):
+  __slots__ = ('engine', 'tables')
+
+  def __init__(self, engine):
+    self.engine = engine
+    self.tables = {}
+
+
+  def create_table(self, table):
+    if table.table_id in self.tables:
+      raise ValueError("Table %s already exists" % table.table_id)
+    print("Creating table %s" % table.table_id)
+    self.tables[table.table_id] = dict()
+
+
+  def drop_table(self, table):
+    if table.table_id not in self.tables:
+      raise ValueError("Table %s already exists" % table.table_id)
+    print("Deleting table %s" % table.table_id)
+    del self.tables[table.table_id]
+
+
+  def create_column(self, col):
+    if col.table_id not in self.tables:
+      self.tables[col.table_id] = dict()
+
+    if col.col_id in self.tables[col.table_id]:
+      old_one = self.tables[col.table_id][col.col_id]
+      col._data = old_one._data
+      col._data.col = col
+      old_one.detached = True
+      old_one._data = None
+    else:
+      col._data = ColumnData(col)
+      # print('Column {}.{} is detaching column {}.{}'.format(self.table_id, self.col_id, old_one.table_id, old_one.col_id))
+    # print('Creating column: ', self.table_id, self.col_id)
+    self.tables[col.table_id][col.col_id] = col
+    col.detached = False
+
+  def drop_column(self, col):
+    tables = self.tables
+
+    if col.table_id not in tables:
+      raise Exception('Table not found for column: ', col.table_id, col.col_id)
+    
+    if col.col_id not in tables[col.table_id]:
+      raise Exception('Column not found: ', col.table_id, col.col_id)
+
+    print('Destroying column: ', col.table_id, col.col_id)
+    col._data.drop()
+    del tables[col.table_id][col.col_id]
 
 class Engine(object):
   """
@@ -140,6 +191,8 @@ class Engine(object):
   """
 
   def __init__(self):
+    self.data = Database(self)      # The document data, including logic (formulas), and metadata.
+
     # The document data, including logic (formulas), and metadata (tables prefixed with "_grist_").
     self.tables = {}                # Maps table IDs (or names) to Table objects.
 
@@ -204,6 +257,7 @@ class Engine(object):
 
     # The list of columns that got deleted while applying an action.
     self._gone_columns = []
+    self._gone_tables = []
 
     # The set of potentially unused LookupMapColumns.
     self._unused_lookups = set()
@@ -1177,6 +1231,7 @@ class Engine(object):
     if user_table is None:
       for c in table.get_helper_columns():
         self.delete_column(c)
+      self._gone_tables.append(table)
 
   def _maybe_update_trigger_dependencies(self):
     if not self._have_trigger_columns_changed:
@@ -1302,6 +1357,7 @@ class Engine(object):
       # consistent internally as well as with the clients and database outside of the sandbox
       # (which won't see any changes in case of an error).
       log.info("Failed to apply useractions; reverting: %r" % (e,))
+      print("Failed to apply useractions; reverting: %r" % (e,))
       self._undo_to_checkpoint(checkpoint)
 
       # Check schema consistency again. If this fails, something is really wrong (we tried to go
@@ -1361,6 +1417,7 @@ class Engine(object):
     """
     #log.warn("Engine.apply_doc_action %s" % (doc_action,))
     self._gone_columns = []
+    self._gone_tables = []
 
     action_name = doc_action.__class__.__name__
     saved_schema = None
@@ -1400,6 +1457,9 @@ class Engine(object):
       actions.prune_actions(self.out_actions.calc, col.table_id, col.col_id)
       col.destroy()
 
+    for table in self._gone_tables:
+      table.destroy()
+
     # We normally recompute formulas before returning to the user; but some formulas are also used
     # internally in-between applying doc actions. We have this workaround to ensure that those are
     # up-to-date after each doc action. See more in comments for _bring_mlookups_up_to_date.
diff --git a/sandbox/grist/poc.py b/sandbox/grist/poc.py
new file mode 100644
index 00000000..48bf2f46
--- /dev/null
+++ b/sandbox/grist/poc.py
@@ -0,0 +1,49 @@
+import difflib
+import functools
+import json
+import unittest
+from collections import namedtuple
+from pprint import pprint
+
+import six
+
+import actions
+import column
+import engine
+import logger
+import useractions
+import testutil
+import objtypes
+
+
+eng = engine.Engine()
+eng.load_empty()
+
+
+def apply(actions):
+  if not actions:
+    return []
+  if not isinstance(actions[0], list):
+    actions = [actions]
+  return eng.apply_user_actions([useractions.from_repr(a) for a in actions])
+  
+
+try:
+  apply(['AddRawTable', 'Table1'])
+  apply(['AddRecord', 'Table1', None, {'A': 1, 'B': 2, 'C': 3}])
+  # apply(['RenameColumn', 'Table1', 'A', 'NewA'])
+  apply(['RenameTable', 'Table1', 'Dwa'])
+
+  #  ['RemoveColumn', "Table1", 'A'],
+    # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
+    # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
+    # ['ModifyColumn', 'Table1', 'B', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 1'}],
+  #])
+
+    # ['AddColumn', 'Table1', 'D', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 3'}],
+    # ['ModifyColumn', 'Table1', 'B', {'type': 'Numeric', 'isFormula': True, 'formula': '$A + 1'}],
+finally:
+  # Test if method close is in engine
+  if hasattr(eng, 'close'):
+    eng.close()
+ 
\ No newline at end of file
diff --git a/sandbox/grist/table.py b/sandbox/grist/table.py
index 060625ac..144b1791 100644
--- a/sandbox/grist/table.py
+++ b/sandbox/grist/table.py
@@ -184,6 +184,8 @@ class Table(object):
     # Each table maintains a reference to the engine that owns it.
     self._engine = engine
 
+    engine.data.create_table(self)
+
     # The UserTable object for this table, set in _rebuild_model
     self.user_table = None
 
@@ -242,6 +244,10 @@ class Table(object):
     # is called seems to be too late, at least for unit tests.
     self._empty_lookup_column = self._get_lookup_map(())
 
+
+  def destroy(self):
+    self._engine.data.drop_table(self)
+
   def _num_rows(self):
     """
     Similar to `len(self.lookup_records())` but faster and doesn't create dependencies.
diff --git a/sandbox/grist/test_import_actions.py b/sandbox/grist/test_import_actions.py
index ca39086c..d5e24914 100644
--- a/sandbox/grist/test_import_actions.py
+++ b/sandbox/grist/test_import_actions.py
@@ -1,4 +1,5 @@
 # pylint: disable=line-too-long
+import unittest
 import logger
 import test_engine
 
@@ -189,3 +190,7 @@ class TestImportActions(test_engine.EngineTestCase):
       [6, 3, [12]],
       [7, 1, [13, 14, 15]],  # new section for transform preview
     ])
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/sandbox/grist/test_recordlist.py b/sandbox/grist/test_recordlist.py
index 3dd11e64..dd8d45e7 100644
--- a/sandbox/grist/test_recordlist.py
+++ b/sandbox/grist/test_recordlist.py
@@ -1,3 +1,4 @@
+import unittest
 import testutil
 import test_engine
 from objtypes import RecordSetStub
@@ -100,3 +101,7 @@ class TestRecordList(test_engine.EngineTestCase):
       [1,    "Mammals",  [1, 3],      ["Mammals", "Mammals"],   [mammals, mammals]],
       [2,    "Reptilia", [2, 4],      ["Reptilia", "Reptilia"], [reptiles, reptiles]],
     ])
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/sandbox/grist/test_table_actions.py b/sandbox/grist/test_table_actions.py
index f6576cf0..bde08b38 100644
--- a/sandbox/grist/test_table_actions.py
+++ b/sandbox/grist/test_table_actions.py
@@ -1,3 +1,4 @@
+import unittest
 import logger
 
 import testutil
@@ -306,3 +307,7 @@ class TestTableActions(test_engine.EngineTestCase):
         ]),
       ]),
     ])
+
+
+if __name__ == "__main__":
+  unittest.main()
\ No newline at end of file