gristlabs_grist-core/sandbox/grist/engine.py

1385 lines
61 KiB
Python
Raw Normal View History

# pylint:disable=too-many-lines
"""
The data engine ties the code generated from the schema with the document data, and with
dependency tracking.
"""
import itertools
import re
import rlcompleter
import sys
import time
import traceback
from collections import namedtuple, OrderedDict, defaultdict
import six
from six.moves import zip
from six.moves.collections_abc import Hashable # pylint:disable-all
from sortedcontainers import SortedSet
import acl
import actions
import action_obj
from autocomplete_context import AutocompleteContext
from codebuilder import DOLLAR_REGEX
import depend
import docactions
import docmodel
import gencode
import logger
import match_counter
import objtypes
from objtypes import strict_equal
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
from relation import SingleRowsIdentityRelation
import schema
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
from schema import RecalcWhen
import table as table_module
from user import User # pylint:disable=wrong-import-order
import useractions
import column
import urllib_patch # noqa imported for side effect # pylint:disable=unused-import
log = logger.Logger(__name__, logger.INFO)
if six.PY2:
reload(sys)
sys.setdefaultencoding('utf8') # noqa # pylint:disable=no-member
class OrderError(Exception):
"""
An exception thrown and handled internally, representing when
evaluating a formula for a cell requires a value from another cell
(or lookup) that has not yet itself been evaluated. Formulas used
to be evaluated recursively, on the program stack, but now ordering
is organized explicitly by watching for this exception and adapting
evaluation order appropriately.
"""
def __init__(self, message, node, row_id):
super(OrderError, self).__init__(message)
self.node = node # The column of the cell evaluated out of order.
self.row_id = row_id # The row_id of the cell evaluated out of order.
self.requiring_node = None # The column of the original cell being evaluated.
# Added later since not known at point of exception.
self.requiring_row_id = None # the row_id of the original cell being evaluated
def set_requirer(self, node, row_id):
self.requiring_node = node
self.requiring_row_id = row_id
# An item of work to be done by Engine._update
WorkItem = namedtuple('WorkItem', ('node', 'row_ids', 'locks'))
# skip private members, and methods we don't want to expose to users.
skipped_completions = re.compile(r'\.(_|lookupOrAddDerived|getSummarySourceGroup)')
# The schema for the data is documented in gencode.py.
# There is a general process by which values get recomputed. There are two stages:
# (1) when raw data is loaded or changed by an action, it marks things as "dirty".
# This is done using engine.recompute_map, which maps Nodes to sets of dirty rows.
# (2) when up-to-date data is needed, _recompute is called, and updates the dirty rows.
# Up-to-date data is needed when it's required externally (e.g. to send to client), and
# may be needed recursively when other data is being recomputed.
# In this implementation, rows are identified by a row_id, which functions like an index, so that
# data may be stored in lists and typed arrays. This is very memory-efficient when row_ids are
# dense, but bad when they get too sparse. TODO The proposed solution is to have a condense
# operation which renumbers row_ids when they get too sparse.
# TODO:
# We should support types SubRecord, SubRecordList, and SubRecordMap. Original thought was to
# represent them as derived tables with special names, such as "Foo.field". This breaks several
# assumptions about how to organize generated code. Instead, we can use derived tables with valid
# names (such as "Foo_field"), and add an actual column "field" with an appropriate type. This
# column may refer to derived tables or independent tables. Derived tables would have an extra
# property, marking them as derived, which would affect certain UI decisions.
class Engine(object):
"""
The Engine is the core of the grist per-document logic. Some of its methods form the API exposed
to the Node controller. These are:
Initialization:
load_empty()
Initializes an empty document; useful for newly-created documents.
load_meta_tables(meta_tables, meta_columns)
load_table(table_data)
load_done()
These three must be called in-order to initialize a non-empty document.
- First, load_meta_tables() must be called with data for the two special metadata tables
containing the schema. It returns the list of other table names the data engine expects.
- Then load_table() must be called once for each of the other tables (both special tables,
and user tables), with that table's data (no need to call it for empty tables).
- Finally, load_done() must be called once to finish initialization.
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
NOTE: instead of load_done(), Grist now applies the no-op 'Calculate' user action.
Other methods:
fetch_table(table_id, formulas)
Returns a TableData object containing the full data for the table. Formula columns
are included only if formulas is True.
apply_user_actions(user_actions, user)
Applies a list of UserActions, which are tuples consisting of the name of the action
method (as defind in useractions.py) and the arguments to it. Returns ActionGroup tuple,
containing several categories of DocActions, including the results of computations.
"""
def __init__(self):
2022-02-19 09:46:49 +00:00
# The document data, including logic (formulas), and metadata (tables prefixed with "_grist_").
self.tables = {} # Maps table IDs (or names) to Table objects.
# Schema contains information about tables and columns, needed in particular to generate the
# code, from which in turn we create all the Table and Column objects. Schema is an
# OrderedDict of tableIds to schema.SchemaTable objects. Each of those contains a .columns
# OrderedDict of colId to schema.SchemaColumns objects. Order is used when generating code.
self.schema = OrderedDict()
# A more convenient interface to the document metadata.
self.docmodel = docmodel.DocModel(self)
# The module containing the compiled user code generated from the schema.
self.gencode = gencode.GenCode()
# Maintain the dependency graph of what Nodes (columns) depend on what other Nodes.
self.dep_graph = depend.Graph()
# Maps Nodes to sets of dirty rows (that need to be recomputed).
self.recompute_map = {}
# Maps Nodes to sets of done rows (to avoid recomputing in an infinite loop).
self._recompute_done_map = {}
# Contains Nodes once an exception value has been seen for them.
self._is_node_exception_reported = set()
# Contains Edges (node1, node2, relation) already seen during formula accesses.
self._recompute_edge_set = set()
# Sanity-check counter to check if we are making progress.
self._recompute_done_counter = 0
# Maps Nodes to a list of [rowId, value] pairs for cells that have been changed.
# Ordered to preserve the order in which first change was made to a column.
# This allows actions to be emitted in a legacy order that a lot of tests depend
# on. Not necessary to functioning, just a convenience.
self._changes_map = OrderedDict()
# This is set when we are running engine._update_loop, which has the ability to
# evaluate dependencies. We check this flag in engine._recompute_in_order, which will
# start an update loop if called without one already in place.
self._in_update_loop = False
# A set of (node, row_id) cell references. When evaluating a formula, a dependency
# on any of these cells implies a circular dependency.
self._locked_cells = set()
# The lists of actions of different kinds, built up while applying an action.
self.out_actions = action_obj.ActionGroup()
# What's currently being computed
self._current_node = None
self._current_row_id = None
# Certain recomputations are triggered by a particular doc action. This keep track of it.
self._triggering_doc_action = None
# The list of columns that got deleted while applying an action.
self._gone_columns = []
# The set of potentially unused LookupMapColumns.
self._unused_lookups = set()
# Create the formula tracer that can be overridden to trace formula evaluations. It is called
# with the Column and Record object for the formula about to be evaluated. It's used in tests.
self.formula_tracer = lambda col, record: None
# Create the object that knows how to interpret UserActions.
self.doc_actions = docactions.DocActions(self)
# Create the object that knows how to interpret UserActions.
self.user_actions = useractions.UserActions(self)
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
# Map from node to set of row_ids, for cells that should not be recalculated because they are
# data columns manually changed in this UserAction.
self._prevent_recompute_map = {}
# Whether any trigger columns may need to have their dependencies rebuilt.
self._have_trigger_columns_changed = True
# A flag for when a useraction causes a schema change, to verify consistency afterwards.
self._schema_updated = False
# Stores an exception representing the first unevaluated cell met while recomputing the
# current cell.
self._cell_required_error = None
# User that is currently applying user actions.
self._user = None
# In general you should access the property autocomplete_context instead,
# which initialises this attribute lazily when it's needed by autocomplete.
# When the schema changes and usercode is regenerated, this needs to be updated,
# but creating a new AutocompleteContext is quite expensive, so we instead
# clear this cached context on schema changes and let the property recreate it as needed.
self._autocomplete_context = None
self._table_stats = {"meta": [], "user": []}
@property
def autocomplete_context(self):
# See the comment on _autocomplete_context in __init__ above.
if self._autocomplete_context is None:
self._autocomplete_context = AutocompleteContext(self.gencode.usercode.__dict__)
return self._autocomplete_context
def record_table_stats(self, table_data, table_data_repr):
table_id = table_data.table_id
category = "meta" if table_id.startswith("_grist") else "user"
result = dict(
rows=len(table_data.row_ids),
columns=len(table_data.columns),
bytes=len(table_data_repr or ""),
table_id=table_id,
)
result["cells"] = result["rows"] * result["columns"]
self._table_stats[category].append(result)
def get_table_stats(self):
result = defaultdict(int, num_user_tables=len(self._table_stats["user"]))
for table in self._table_stats["meta"]:
for field in ["rows", "bytes"]:
key = "%s_%s" % (table["table_id"], field)
result[key] = table[field]
for table in self._table_stats["user"]:
for field in table:
if field == "table_id":
continue
key = "user_%s" % field
result[key] += table[field]
return dict(result)
def load_empty(self):
"""
Initialize an empty document, e.g. a newly-created one.
"""
self.load_meta_tables(actions.TableData('_grist_Tables', [], {}),
actions.TableData('_grist_Tables_column', [], {}))
self.load_done()
def load_meta_tables(self, meta_tables, meta_columns):
"""
Must be the first method to call for this Engine. The arguments must contain the data for the
_grist_Tables and _grist_Tables_column tables, in the form of actions.TableData.
Returns the list of all the other table names that data engine expects to be loaded.
"""
self.schema = schema.build_schema(meta_tables, meta_columns)
# Compile the user-defined module code (containing all formulas in particular).
self.rebuild_usercode()
# Load the data into the now-existing metadata tables. This isn't used directly, it's just a
# mirror of the schema for storage and for looking at.
self.load_table(meta_tables)
self.load_table(meta_columns)
return sorted(table_id for table_id in self.tables
if table_id not in (meta_tables.table_id, meta_columns.table_id))
def load_table(self, data):
"""
Must be called for each of the metadata tables (except the ones given to load_meta), and for
each user-defined table. The argument is an actions.TableData object.
"""
table = self.tables[data.table_id]
# Clear all columns, whether or not they are present in the data.
for column in six.itervalues(table.all_columns):
column.clear()
# Only load columns that aren't stored.
columns = {col_id: data for (col_id, data) in six.iteritems(data.columns)
if table.has_column(col_id)}
# Add the records.
self.add_records(data.table_id, data.row_ids, columns)
def load_done(self):
"""
Finalizes the loading of data into this Engine.
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
NOTE: instead of load_done(), Grist now applies the no-op 'Calculate' user action.
"""
self._bring_all_up_to_date()
def add_records(self, table_id, row_ids, column_values):
"""
Helper to add records to the given table, with row_ids and column_values having the same
interpretation as in TableData or BulkAddRecords. It's used both for the initial loading of
data, and for BulkAddRecords itself.
"""
table = self.tables[table_id]
growto_size = (max(row_ids) + 1) if row_ids else 1
# Create the new records.
id_column = table.get_column('id')
id_column.growto(growto_size)
for row_id in row_ids:
id_column.set(row_id, row_id)
# Resize all columns to the full table size.
table.grow_to_max()
# Load the new values.
for col_id, values in six.iteritems(column_values):
column = table.get_column(col_id)
column.growto(growto_size)
for row_id, value in zip(row_ids, values):
column.set(row_id, value)
# Invalidate new records to cause the formula columns to get recomputed.
self.invalidate_records(table_id, row_ids)
def fetch_table(self, table_id, formulas=True, private=False, query=None):
"""
Returns TableData object representing all data in this table.
"""
table = self.tables[table_id]
column_values = {}
query_cols = []
if query:
query_cols = [(table.get_column(col_id), values) for (col_id, values) in six.iteritems(query)]
row_ids = [r for r in table.row_ids
if all((c.raw_get(r) in values) for (c, values) in query_cols)]
for c in six.itervalues(table.all_columns):
# pylint: disable=too-many-boolean-expressions
if ((formulas or not c.is_formula())
and (private or not c.is_private())
and c.col_id != "id" and not column.is_virtual_column(c.col_id)):
column_values[c.col_id] = [c.raw_get(r) for r in row_ids]
return actions.TableData(table_id, row_ids, column_values)
def fetch_table_schema(self):
return self.gencode.get_user_text()
def fetch_meta_tables(self, formulas=True):
"""
Returns {table_id: TableData} mapping for all metadata tables (those starting with '_grist_').
Note the slight naming difference with load_meta_tables: that one expects just two
extra-special tables, whereas fetch_meta_tables returns all special tables.
"""
return {table_id: self.fetch_table(table_id, formulas=formulas)
for table_id in self.tables if table_id.startswith('_grist_')}
def find_col_from_values(self, values, n, opt_table_id=None):
"""
Returns a list of colRefs for columns whose values match a given list. The results are ordered
from best to worst according to the number of matches of distinct values.
If n is non-zero, limits the results to that number. If opt_table_id is given, search only
that table for matching columns.
"""
start_time = time.time()
# Exclude default values, since these will often result in matching new/incomplete columns.
# If a value is unhashable, set() will fail, so we check for that.
sample = set(v for v in values if isinstance(v, Hashable))
matched_cols = []
# If the column has no values, return
if not sample:
return []
search_cols = (self.docmodel.get_table_rec(opt_table_id).columns
if opt_table_id in self.tables else self.docmodel.columns.all)
m = match_counter.MatchCounter(sample)
# Iterates through each valid column in the document, counting matches.
for c in search_cols:
if (not gencode._is_special_table(c.tableId) and
column.is_visible_column(c.colId) and
not c.type.startswith('Ref')):
table = self.tables[c.tableId]
col = table.get_column(c.colId)
matches = m.count_unique(col.raw_get(r) for r in itertools.islice(table.row_ids, 1000))
if matches > 0:
matched_cols.append((matches, c.id))
# Sorts the matched columns by the matches, then select the best-matching columns
matched_cols.sort(reverse=True)
if n:
matched_cols = matched_cols[:n]
log.info('Found column from values in %.3fs' % (time.time() - start_time))
return [c[1] for c in matched_cols]
def assert_schema_consistent(self):
"""
Asserts that the internally-stored schema is equivalent to the schema as represented by the
special tables of metadata.
"""
meta_tables = self.fetch_table('_grist_Tables')
meta_columns = self.fetch_table('_grist_Tables_column')
gen_schema = schema.build_schema(meta_tables, meta_columns)
gen_schema_dicts = {k: (t.tableId, dict(t.columns))
for k, t in six.iteritems(gen_schema)}
cur_schema_dicts = {k: (t.tableId, dict(t.columns))
for k, t in six.iteritems(self.schema)}
if cur_schema_dicts != gen_schema_dicts:
import pprint
import difflib
a = (pprint.pformat(cur_schema_dicts) + "\n").splitlines(True)
b = (pprint.pformat(gen_schema_dicts) + "\n").splitlines(True)
raise AssertionError("Internal schema different from that in metadata:\n" +
"".join(difflib.unified_diff(a, b, fromfile="internal", tofile="metadata")))
# Check there are no stray column records (they aren't picked up by schema diffs, but will
# cause inconsistencies with future tables).
# TODO: This inconsistency can be triggered by undo of an AddTable action if the table
# acquired more columns in subsequent actions. We may want to check for similar situations
# with other metadata, e.g. ViewSection fields, where they'd cause different symptoms.
# (Or better ensure consistency by design by applying undo correctly, probably via rebase).
valid_table_refs = set(meta_tables.row_ids)
col_parent_ids = set(meta_columns.columns['parentId'])
if col_parent_ids > valid_table_refs:
collist = sorted(actions.transpose_bulk_action(meta_columns),
key=lambda c: (c.parentId, c.parentPos))
raise AssertionError("Internal schema inconsistent; extra columns in metadata:\n"
+ "\n".join(' #%s %s' %
(c.id, schema.SchemaColumn(c.colId, c.type, bool(c.isFormula), c.formula))
for c in collist if c.parentId not in valid_table_refs))
def dump_state(self):
self.dep_graph.dump_graph()
self.dump_recompute_map()
def dump_recompute_map(self):
log.debug("Recompute map (%d nodes):" % len(self.recompute_map))
for node, dirty_rows in six.iteritems(self.recompute_map):
log.debug(" Node %s: %s" % (node, dirty_rows))
def _use_node(self, node, relation, row_ids=[]):
# This is used whenever a formula accesses any part of any record. It's hot code, and
# it's worth optimizing.
if self._current_node:
# Add an edge to indicate that the node being computed depends on the node passed in.
# Note that during evaluation, we only *add* dependencies. We *remove* them by clearing them
# whenever ALL rows for a node are invalidated (on schema changes and reloads).
edge = (self._current_node, node, relation)
if edge not in self._recompute_edge_set:
self.dep_graph.add_edge(*edge)
self._recompute_edge_set.add(edge)
# This check is not essential here, but is an optimization that saves cycles.
if self.recompute_map.get(node) is None:
return
self._recompute(node, row_ids)
def _pre_update(self):
"""
Called at beginning of _bring_all_up_to_date or _bring_mlookups_up_to_date.
Makes sure cell change accumulation is reset.
"""
self._changes_map = OrderedDict()
self._recompute_done_map = {}
self._locked_cells = set()
self._is_node_exception_reported = set()
self._recompute_edge_set = set()
self._cell_required_error = None
def _post_update(self):
"""
Called at end of _bring_all_up_to_date or _bring_mlookups_up_to_date.
Issues actions for any accumulated cell changes.
"""
for node, changes in six.iteritems(self._changes_map):
table = self.tables[node.table_id]
col = table.get_column(node.col_id)
# If there are changes, save them in out_actions.
if changes and not col.is_private():
self.out_actions.summary.add_changes(node.table_id, node.col_id, changes)
self._pre_update() # empty lists/sets/maps
def _update_loop(self, work_items, ignore_other_changes=False):
"""
Called to compute the specified cells, including any nested dependencies.
Consumes OrderError exceptions, and reacts to them with a strategy for
reordering cell evaluation. That strategy is currently simple:
* Maintain a stack of work item triplets. Each work item has:
- A node (table/column pair).
- A list of row_ids to compute (this can be None, meaning "all").
- A list of row_ids to "unlock" once finished.
* Until stack is empty, take a work item off the stack and attempt to
_recompute the specified rows of the specified node.
- If an OrderError is received, first check it is for a cell we
requested (_recompute will opportunistically try to compute
other cells we haven't asked for, and it is important for the
purposes of cycle detection to discount that).
- If so, "lock" that cell, push the current work item back on the
stack (remembering which cell to unlock later), and add a new
work item for the cell that threw the OrderError.
+ The "lock" serves only for cycle detection.
+ The order of stack placement means that the cell that threw
the OrderError will now be evaluated before the cell that
depends on it.
- If not, ignore the OrderError. If we actually need that cell,
We'll get back to it later as we work up the work_items stack.
* The _recompute method, as mentioned, will attempt to compute not
just the requested rows of a particular column, but any other dirty
cells in that column. This is an important optimization for the
common case of columns with non-self-referring dependencies.
"""
self._in_update_loop = True
while self.recompute_map:
self._recompute_done_counter = 0
self._expected_done_counter = 0
while work_items:
node, row_ids, locks = work_items.pop()
try:
self._recompute_step(node, require_rows=row_ids)
except OrderError as e:
# Need to schedule re-ordered evaluation
assert node == e.requiring_node
assert (not row_ids) or (e.requiring_row_id in row_ids)
# Put current work item back on stack, and don't dispose its locks
work_items.append(WorkItem(node, row_ids, locks))
locks = []
# Add a new work item for the cell we are following up, and lock
# it to forbid circular dependencies
lock = (node, e.requiring_row_id)
work_items.append(WorkItem(e.node, [e.row_id], [lock]))
self._locked_cells.add(lock)
# Discard any locks once work item is complete
for lock in locks:
if lock not in self._locked_cells:
# If cell is already unlocked, don't double-count it.
continue
self._locked_cells.discard(lock)
# Sanity check: make sure we've computed at least one more cell
self._expected_done_counter += 1
if self._recompute_done_counter < self._expected_done_counter:
raise Exception('data engine not making progress updating dependencies')
if ignore_other_changes:
# For _bring_mlookups_up_to_date, we should only wait for the work items
# explicitly requested.
break
# Sanity check that we computed at least one cell.
if self.recompute_map and self._recompute_done_counter == 0:
raise Exception('data engine not making progress updating formulas')
# Figure out remaining work to do, maintaining classic Grist ordering.
work_items = self._make_sorted_work_items(self.recompute_map.keys())
self._in_update_loop = False
def _make_sorted_work_items(self, nodes): # pylint:disable=no-self-use
# Build WorkItems from a list of nodes, maintaining classic Grist ordering (in order by name).
# WorkItems are processed from the end (hence reverse=True). Additionally, we sort all
# #lookups to be processed first. See note in _bring_mlookups_up_to_date why this is important.
nodes = sorted(nodes, reverse=True, key=lambda n: (not n.col_id.startswith('#lookup'), n))
return [WorkItem(node, None, []) for node in nodes]
def _bring_all_up_to_date(self):
# Bring all nodes up to date. We iterate in sorted order of the keys so that the order is
# deterministic (which is helpful for tests in particular).
self._pre_update()
try:
# Figure out remaining work to do, maintaining classic Grist ordering.
work_items = self._make_sorted_work_items(self.recompute_map.keys())
self._update_loop(work_items)
# Check if any potentially unused LookupMaps are still unused, and if so, delete them.
for lookup_map in self._unused_lookups:
if self.dep_graph.remove_node_if_unused(lookup_map.node):
self.delete_column(lookup_map)
finally:
self._unused_lookups.clear()
self._post_update()
def _bring_mlookups_up_to_date(self, triggering_doc_action):
# Just bring the *metadata* lookup nodes up to date.
#
# In general, lookup nodes don't know exactly what depends on them until they are
# recomputed. So invalidating lookup nodes doesn't complete all invalidation; further
# invalidations may be generated in the course of recomputing the lookup nodes.
#
# We use some private formulas on metadata tables internally (e.g. for a list columns of a
# table). This method is part of a somewhat hacky solution in apply_doc_action: to force
# recomputation of lookup nodes to ensure that we see up-to-date results between applying doc
# actions.
#
# For regular data, correct values aren't needed until we recompute formulas. So we process
# lookups before other formulas, but do not need to update lookups after each doc_action.
#
# In addition, we expose the triggering doc_action so that lookupOrAddDerived can avoid adding
# a record to a derived table when the trigger itself is a change to the derived table. This
# currently only happens on undo, and is admittedly an ugly workaround.
self._pre_update()
try:
self._triggering_doc_action = triggering_doc_action
nodes = [node for node in self.recompute_map
if node.col_id.startswith('#lookup') and node.table_id.startswith('_grist_')]
work_items = self._make_sorted_work_items(nodes)
self._update_loop(work_items, ignore_other_changes=True)
finally:
self._triggering_doc_action = None
self._post_update()
def is_triggered_by_table_action(self, table_id):
# Workaround for lookupOrAddDerived that prevents AddRecord from being created when the
# trigger is itself an action for the same table. See comments for _bring_mlookups_up_to_date.
a = self._triggering_doc_action
return a and getattr(a, 'table_id', None) == table_id
def bring_col_up_to_date(self, col_obj):
"""
Public interface to recompute a column if it is dirty. It also generates a calc or stored
action and adds it into self.out_actions object.
"""
self._pre_update()
try:
self._recompute_done_map.pop(col_obj.node, None)
self._recompute(col_obj.node)
finally:
self._post_update()
def get_formula_error(self, table_id, col_id, row_id):
"""
Returns an error message (traceback) for one concrete cell which user clicked.
It is sufficient in case when we want to get traceback for only one formula cell with error,
not recomputing the whole column and dependent columns as well. So it recomputes the formula
for this cell and returns error message with details.
"""
table = self.tables[table_id]
col = table.get_column(col_id)
checkpoint = self._get_undo_checkpoint()
try:
result = self._recompute_one_cell(table, col, row_id)
# If the error is gone for a trigger formula
if col.has_formula() and not col.is_formula():
if not isinstance(result, objtypes.RaisedException):
# Get the error stored in the cell
# and change it to show to the user that no traceback is available
error_in_cell = objtypes.decode_object(col.raw_get(row_id))
assert isinstance(error_in_cell, objtypes.RaisedException)
return error_in_cell.no_traceback()
return result
finally:
# It is possible for formula evaluation to have side-effects that produce DocActions (e.g.
# lookupOrAddDerived() creates those). In case of get_formula_error(), these aren't fully
# processed (e.g. don't get applied to DocStorage), so it's important to reverse them.
self._undo_to_checkpoint(checkpoint)
def _recompute(self, node, row_ids=None):
"""
Make sure cells of a node are up to date, recomputing as necessary. Can optionally
be limited to a list of rows that are of interest.
"""
if self._in_update_loop:
# This is a nested evaluation. If there are in fact any cells to evaluate,
# this must result in an OrderError. We let engine._recompute_step
# take care of figuring this out.
self._recompute_step(node, allow_evaluation=False, require_rows=row_ids)
else:
# Sometimes _use_node is called from outside _update_loop. In this case,
# we start an _update_loop to compute whatever is required. Otherwise
# nested dependencies would not get computed.
self._update_loop([WorkItem(node, row_ids, [])], ignore_other_changes=True)
def _recompute_step(self, node, allow_evaluation=True, require_rows=None): # pylint: disable=too-many-statements
"""
Recomputes a node (i.e. column), evaluating the appropriate formula for the given rows
to get new values. Only columns whose .has_formula() is true should ever have invalidated rows
in recompute_map (this includes data columns with a default formula, for newly-added records).
If `allow_evaluation` is false, any time we would recompute a node, we instead throw
an OrderError exception. This is used to "flatten" computation - instead of evaluating
nested dependencies on the program stack, an external loop will evaluate them in an
unnested order. Remember that formulas may access other columns, and column access calls
engine._use_node, which calls _recompute to bring those nodes up to date.
Recompute records changes in _changes_map, which is used later to generate appropriate
BulkUpdateRecord actions, either calc (for formulas) or stored (for non-formula columns).
"""
dirty_rows = self.recompute_map.get(node, None)
if dirty_rows is None:
return
table = self.tables[node.table_id]
col = table.get_column(node.col_id)
assert col.has_formula(), "Engine._recompute: called on no-formula node %s" % (node,)
# Get a sorted list of row IDs, excluding deleted rows (they will sometimes end up in
# recompute_map) and rows already done (since _recompute_done_map got cleared).
if node not in self._recompute_done_map:
# Before starting to evaluate a formula, call reset_rows()
# on all relations with nodes we depend on. E.g. this is
# used for lookups, so that we can reset stored lookup
# information for rows that are about to get reevaluated.
self.dep_graph.reset_dependencies(node, dirty_rows)
self._recompute_done_map[node] = set()
exclude = self._recompute_done_map[node]
if dirty_rows == depend.ALL_ROWS:
dirty_rows = SortedSet(r for r in table.row_ids if r not in exclude)
self.recompute_map[node] = dirty_rows
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
exempt = self._prevent_recompute_map.get(node, None)
if exempt:
# If allow_evaluation=False we're not supposed to actually compute dirty_rows.
# But we may need to compute them later,
# so ensure self.recompute_map[node] isn't mutated by separating it from dirty_rows.
# Therefore dirty_rows is assigned a new value. Note that -= would be a mutation.
dirty_rows = dirty_rows - exempt
if allow_evaluation:
self.recompute_map[node] = dirty_rows
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
require_rows = sorted(require_rows or [])
previous_current_node = self._current_node
# Prevents dependency creation for non-formula nodes. A non-formula column may include a
# formula to eval for a newly-added record. Those shouldn't create dependencies.
self._current_node = node if col.is_formula() else None
changes = None
cleaned = [] # this lists row_ids that can be removed from dirty_rows once we are no
# longer iterating on it.
try:
require_count = len(require_rows)
for i, row_id in enumerate(itertools.chain(require_rows, dirty_rows)):
required = i < require_count or require_count == 0
if require_count and row_id not in dirty_rows:
# Nothing need be done for required rows that are already up to date.
continue
if row_id not in table.row_ids or row_id in exclude:
# We can declare victory for absent or excluded rows.
cleaned.append(row_id)
continue
if not allow_evaluation:
# We're not actually in a position to evaluate this cell, we need to just
# report that we needed an _update_loop will arrange for us to be called
# again in a better order.
if required:
msg = 'Cell value not available yet'
err = OrderError(msg, node, row_id)
if not self._cell_required_error:
# Cache the exception in case user consumes it or modifies it in their formula.
self._cell_required_error = OrderError(msg, node, row_id)
raise err
# For common-case formulas, all cells in a column are likely to fail in the same way,
# so don't bother trying more from this column until we've reordered.
return
try:
# We figure out if we've hit a cycle here. If so, we just let _recompute_on_cell
# know, so it can set the cell value appropriately and do some other bookkeeping.
cycle = required and (node, row_id) in self._locked_cells
value = self._recompute_one_cell(table, col, row_id, cycle=cycle, node=node)
except OrderError as e:
if not required:
# We're out of order, but for a cell we were evaluating opportunistically.
# Don't throw an exception, since it could lead us off on a wild goose
# chase - let _update_loop focus on one path at a time.
return
# Keep track of why this cell was needed.
e.requiring_node = node
e.requiring_row_id = row_id
raise e
# Successfully evaluated a cell! Unlock it if it was locked, so other cells can
# use it without triggering a cyclic dependency error.
self._locked_cells.discard((node, row_id))
if isinstance(value, objtypes.RaisedException):
is_first = node not in self._is_node_exception_reported
if is_first:
self._is_node_exception_reported.add(node)
log.info(value.details)
# strip out details after logging
value = objtypes.RaisedException(value.error, user_input=value.user_input)
# TODO: validation columns should be wrapped to always return True/False (catching
# exceptions), so that we don't need special handling here.
if column.is_validation_column_name(col.col_id):
value = (value in (True, None))
# Convert the value, and if needed, set, and include into the returned action.
value = col.convert(value)
previous = col.raw_get(row_id)
if not strict_equal(value, previous):
if not changes:
changes = self._changes_map.setdefault(node, [])
changes.append((row_id, previous, value))
col.set(row_id, value)
exclude.add(row_id)
cleaned.append(row_id)
self._recompute_done_counter += 1
finally:
self._current_node = previous_current_node
# Usually dirty_rows refers to self.recompute_map[node], so this modifies both
dirty_rows -= cleaned
# However it's possible for them to be different
# (see above where `exempt` is nonempty and allow_evaluation=True)
# so here we check self.recompute_map[node] directly
if not self.recompute_map[node]:
self.recompute_map.pop(node)
def _recompute_one_cell(self, table, col, row_id, cycle=False, node=None):
"""
Recomputes an one formula cell and returns a value.
The value can be:
- the recomputed value in case there are no errors
- exception
- exception with details if flag include_details is set
"""
self._current_row_id = row_id
# Baffling, but keeping a reference to current generated "usercode" module protects against a
# seeming garbage-collection bug: if during formula evaluation the module gets regenerated
# (e.g. a side-effect causes a formula column to change to non-formula), the stale-module
# formula code that's still running will see None values in the usermodule's module-dictionary;
# just keeping this extra reference allows stale formulas to see valid values.
usercode_reference = self.gencode.usercode
checkpoint = self._get_undo_checkpoint()
record = table.Record(row_id, table._identity_relation)
value = None
try:
if cycle:
raise depend.CircularRefError("Circular Reference")
if not col.is_formula():
value = col.get_cell_value(int(record), restore=True)
result = col.method(record, table.user_table, value, self._user)
else:
result = col.method(record, table.user_table)
if self._cell_required_error:
raise self._cell_required_error # pylint: disable=raising-bad-type
self.formula_tracer(col, record)
return result
except: # pylint: disable=bare-except
# Since col.method runs untrusted user code, we use a bare except to catch all
# exceptions (even those not derived from BaseException).
# Before storing the exception value, make sure there isn't an OrderError pending.
# If there is, we will raise it after undoing any side effects.
order_error = self._cell_required_error
# Otherwise, we use sys.exc_info to recover the raised exception object.
regular_error = sys.exc_info()[1] if not order_error else None
# It is possible for formula evaluation to have side-effects that produce DocActions (e.g.
# lookupOrAddDerived() creates those). If there is an error, undo any such side-effects.
self._undo_to_checkpoint(checkpoint)
# Now we can raise the order error, if there was one. Cell evaluation will be reordered
# in response.
if order_error:
self._cell_required_error = None
raise order_error # pylint: disable=raising-bad-type
self.formula_tracer(col, record)
include_details = (node not in self._is_node_exception_reported) if node else True
if not col.is_formula():
return objtypes.RaisedException(regular_error, include_details, user_input=value)
else:
return objtypes.RaisedException(regular_error, include_details)
def convert_action_values(self, action):
"""
Given a BulkUpdateRecord or BulkAddRecord action, convert the values using the appropriate
Column objects, replacing them with the right-type value, alttext, or error objects.
"""
table_id, row_ids, column_values = action
table = self.tables[action.table_id]
new_values = {}
extra_actions = []
for col_id, values in six.iteritems(column_values):
col_obj = table.get_column(col_id)
values = [col_obj.convert(val) for val in values]
# If there are values for any PositionNumber columns, ensure PositionNumbers are ordered as
# intended but are all unique, which may require updating other positions.
nvalues, adjustments = col_obj.prepare_new_values(values,
action_summary=self.out_actions.summary)
if adjustments:
extra_actions.append(actions.BulkUpdateRecord(
action.table_id, [r for r,v in adjustments], {col_id: [v for r,v in adjustments]}))
new_values[col_id] = nvalues
if isinstance(action, (actions.BulkAddRecord, actions.ReplaceTableData)):
# Make sure we call prepare_new_values() for ALL columns when adding rows. The for-loop
# above does it for columns explicitly mentioned; this section does it for the other
# columns, using their default values as input to prepare_new_values().
ignore_data = isinstance(action, actions.ReplaceTableData)
for col_id, col_obj in six.iteritems(table.all_columns):
if col_id in column_values or column.is_virtual_column(col_id) or col_obj.is_formula():
continue
defaults = [col_obj.getdefault() for r in row_ids]
# We use defaults to get new values or adjustments. If we are replacing data, we'll make
# the adjustments without regard to the existing data.
nvalues, adjustments = col_obj.prepare_new_values(defaults, ignore_data=ignore_data,
action_summary=self.out_actions.summary)
if adjustments:
extra_actions.append(actions.BulkUpdateRecord(
action.table_id, [r for r,v in adjustments], {col_id: [v for r,v in adjustments]}))
if nvalues != defaults:
new_values[col_id] = nvalues
# Return action of the same type (e.g. BulkUpdateAction, BulkAddAction), but with new values,
# as well as any extra actions that were generated (as could happen for position adjustments).
return (type(action)(table_id, row_ids, new_values), extra_actions)
def trim_update_action(self, action):
"""
Takes a BulkUpdateAction, and returns a new BulkUpdateAction with only those rows that
actually cause any changes.
"""
table_id, row_ids, column_values = action
table = self.tables[action.table_id]
# Collect for each column the Column object and a list of new values.
cols = [(table.get_column(col_id), values) for (col_id, values) in six.iteritems(column_values)]
# In comparisons below, we rely here on Python's "==" operator to check for equality. After a
# type conversion, it may compare the new type to the old, e.g. 1 == 1.0 == True. It's
# important that such equality is acceptable also to JS and to DocStorage. So far, it seems
# just right.
# Find columns for which any value actually changed.
cols = [(col_obj, values) for (col_obj, values) in cols
if any(values[i] != col_obj.raw_get(row_id) for (i, row_id) in enumerate(row_ids))]
# Now find the indices of rows for which any value actually changed from what's in its Column.
row_subset = [i for i, row_id in enumerate(row_ids)
if any(values[i] != col_obj.raw_get(row_id) for (col_obj, values) in cols)]
# Create and return a new action with just the selected subset of rows.
return actions.BulkUpdateRecord(
action.table_id,
[row_ids[i] for i in row_subset],
{col_obj.col_id: [values[i] for i in row_subset]
for (col_obj, values) in cols}
)
def invalidate_records(self, table_id, row_ids=depend.ALL_ROWS, col_ids=None,
data_cols_to_recompute=frozenset()):
"""
Invalidate the records with the given row_ids. If col_ids is given, only those columns are
invalidated (otherwise all columns). If data_cols_to_recompute is given, then non-formula
col_ids that have an associated formula will get invalidated too, to cause recomputation.
Note that it's not just about formula columns; pure data columns need to cause invalidation of
formula columns that depend on them. Those data columns that have an associated formula may
additionally (typically on AddRecord) be themselves invalidated, to cause recomputation.
"""
table = self.tables[table_id]
columns = (table.all_columns.values()
if col_ids is None else [table.get_column(c) for c in col_ids])
for column in columns:
# If data_cols_to_recompute includes this column, compute its default formula. This
# flag is set on AddRecord and BulkAddRecord, when a default formula needs to be computed.
self.invalidate_column(column, row_ids, column.col_id in data_cols_to_recompute)
def invalidate_column(self, col_obj, row_ids=depend.ALL_ROWS, recompute_data_col=False):
# Normally, only formula columns use include_self (to recompute themselves). However, if
# recompute_data_col is set, default formulas will also be computed.
include_self = col_obj.is_formula() or (col_obj.has_formula() and recompute_data_col)
self.dep_graph.invalidate_deps(col_obj.node, row_ids, self.recompute_map,
include_self=include_self)
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
def prevent_recalc(self, node, row_ids, should_prevent):
prevented = self._prevent_recompute_map.setdefault(node, set())
if should_prevent:
prevented.update(row_ids)
else:
prevented.difference_update(row_ids)
def rebuild_usercode(self):
"""
Compiles the usercode from the schema, and updates all tables and columns to match.
"""
self.gencode.make_module(self.schema)
# Re-populate self.tables, reusing existing tables whenever possible.
old_tables = self.tables
self.tables = {}
sorted_tables = []
for table_id, user_table in six.iteritems(self.gencode.usercode.__dict__):
if not isinstance(user_table, table_module.UserTable):
continue
self.tables[table_id] = table = (
old_tables.get(table_id) or table_module.Table(table_id, self)
)
# Process non-summary tables first so that summary tables
# can read correct metadata about their source tables
key = (hasattr(user_table.Model, '_summarySourceTable'), table_id)
sorted_tables.append((key, table, user_table))
sorted_tables.sort()
# Now update the table model for each table, and tie it to its UserTable object.
for _, table, user_table in sorted_tables:
self._update_table_model(table, user_table)
user_table._set_table_impl(table)
# For any tables that are gone, use self._update_table_model to clean them up.
for table_id, table in six.iteritems(old_tables):
if table_id not in self.tables:
self._update_table_model(table, None)
# Update docmodel with references to the updated metadata tables.
self.docmodel.update_tables()
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
# Set flag to rebuild dependencies of trigger columns after any potential renames, etc.
self.trigger_columns_changed()
# Clear the cached context used for autocompletions.
# See the comment on _autocomplete_context in __init__.
self._autocomplete_context = None
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
def trigger_columns_changed(self):
self._have_trigger_columns_changed = True
def _update_table_model(self, table, user_table):
"""
Updates the given Table object to match the given user_table (from usercode module). This
builds new columns as needed, and cleans up. To clean up state for a table getting removed,
pass in user_table of None.
"""
# Save the dict of columns before the update.
old_columns = table.all_columns.copy()
if user_table is None:
new_columns = {}
else:
# Update the table's model. This also builds new columns if needed.
table._rebuild_model(user_table)
new_columns = table.all_columns
added_col_ids = six.viewkeys(new_columns) - six.viewkeys(old_columns)
deleted_col_ids = six.viewkeys(old_columns) - six.viewkeys(new_columns)
# Invalidate the columns that got added and anything that depends on them.
if added_col_ids:
self.invalidate_records(table.table_id, col_ids=added_col_ids)
for col_id in deleted_col_ids:
self.invalidate_column(old_columns[col_id])
# Schedule deleted columns for clean-up.
for c in deleted_col_ids:
self.delete_column(old_columns[c])
if user_table is None:
for c in table.get_helper_columns():
self.delete_column(c)
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
def _maybe_update_trigger_dependencies(self):
if not self._have_trigger_columns_changed:
return
self._have_trigger_columns_changed = False
# Without being very smart, if trigger-formula dependencies change for any columns, rebuild
# them for all columns. Specifically, we will create nodes and edges in the dependency graph.
for table_id, table in six.iteritems(self.tables):
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
if table_id.startswith('_grist_'):
# We can skip metadata tables, there are no trigger-formulas there.
continue
for col_id, col_obj in six.iteritems(table.all_columns):
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
if col_obj.is_formula() or not col_obj.has_formula():
continue
col_rec = self.docmodel.columns.lookupOne(tableId=table_id, colId=col_id)
out_node = depend.Node(table_id, col_id)
rel = SingleRowsIdentityRelation(table_id)
self.dep_graph.clear_dependencies(out_node)
# When we have explicit dependencies, add them to dep_graph.
if col_rec.recalcWhen == RecalcWhen.DEFAULT:
for dc in col_rec.recalcDeps:
in_node = depend.Node(table_id, dc.colId)
edge = depend.Edge(out_node, in_node, rel)
if edge not in self._recompute_edge_set:
self._recompute_edge_set.add(edge)
self.dep_graph.add_edge(*edge)
def delete_column(self, col_obj):
# Remove the column from its table.
if col_obj.table_id in self.tables:
self.tables[col_obj.table_id].delete_column(col_obj)
# Invalidate anything that depends on the column being deleted. The column may be gone from
# the table itself, so we use invalidate_column directly.
self.invalidate_column(col_obj)
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
# Remove reference to the column from the dependency graph and the recompute_map.
self.dep_graph.clear_dependencies(col_obj.node)
self.recompute_map.pop(col_obj.node, None)
# Mark the column to be destroyed at the end of applying this docaction.
self._gone_columns.append(col_obj)
def new_column_name(self, table):
"""
Invalidate anything that referenced unknown columns, in case the newly-added name fixes the
broken reference.
"""
self.dep_graph.invalidate_deps(table._new_columns_node, depend.ALL_ROWS, self.recompute_map,
include_self=False)
def update_current_time(self):
self.dep_graph.invalidate_deps(self._current_time_node, depend.ALL_ROWS, self.recompute_map,
include_self=False)
def use_current_time(self):
"""
Add a dependency on the current time to the current evaluating node,
so that calling update_current_time() will invalidate the node and cause its reevaluation.
"""
if not self._current_node:
return
table_id = self._current_node[0]
table = self.tables[table_id]
self._use_node(self._current_time_node, table._identity_relation)
_current_time_node = ("#now", None)
def mark_lookupmap_for_cleanup(self, lookup_map_column):
"""
Once a LookupMapColumn seems no longer used, it's added here. We'll check after recomputing
everything, and if still unused, will clean it up.
"""
self._unused_lookups.add(lookup_map_column)
def count_rows(self):
return sum(
table._num_rows()
for table_id, table in six.iteritems(self.tables)
if useractions.is_user_table(table_id)
)
def apply_user_actions(self, user_actions, user=None):
"""
Applies the list of user_actions. Returns an ActionGroup.
"""
# We currently recompute everything and send all calc actions back on every change. If clients
# only need a subset of data loaded, it would be better to filter calc actions, and
# include only those the clients care about. For side-effects, we might want to recompute
# everything, and only filter what we send.
self.out_actions = action_obj.ActionGroup()
self._user = User(user, self.tables) if user else None
checkpoint = self._get_undo_checkpoint()
try:
for user_action in user_actions:
self._schema_updated = False
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
# At the start of each useraction, clear exemptions. These are used to avoid recalcs of
# trigger-formula columns for which the same useractions sets an explicit value.
self._prevent_recompute_map.clear()
self.out_actions.retValues.append(self._apply_one_user_action(user_action))
# If the UserAction touched the schema, check that it is now consistent with metadata.
if self._schema_updated:
self.assert_schema_consistent()
except Exception as e:
# Save full exception info, so that we can rethrow accurately even if undo also fails.
exc_info = sys.exc_info()
# If we get an exception, we should revert all changes applied so far, to keep things
# consistent internally as well as with the clients and database outside of the sandbox
# (which won't see any changes in case of an error).
log.info("Failed to apply useractions; reverting: %r" % (e,))
self._undo_to_checkpoint(checkpoint)
# Check schema consistency again. If this fails, something is really wrong (we tried to go
# back to a good state but failed). We'll just report it loudly.
try:
if self._schema_updated:
self.assert_schema_consistent()
except Exception:
log.error("Inconsistent schema after revert on failure: %s" % traceback.format_exc())
# Re-raise the original exception
# In Python 2, 'raise' raises the most recent exception,
# which may come from the try/except just above
# Python 3 keeps track of nested exceptions better
if six.PY2:
six.reraise(*exc_info)
else:
raise
(core) Implement trigger formulas (generalizing default formulas) Summary: Trigger formulas can be calculated for new records, or for new records and updates to certain fields, or all fields. They do not recalculate on open, and they MAY be set directly by the user, including for data-cleaning. - Column metadata now includes recalcWhen and recalcDeps fields. - Trigger formulas are NOT recalculated on open or on schema changes. - When recalcWhen is "never", formula isn't calculated even for new records. - When recalcWhen is "allupdates", formula is calculated for new records and any manual (non-formula) updates to the record. - When recalcWhen is "", formula is calculated for new records, and changes to recalcDeps fields (which may be formula fields or column itself). - A column whose recalcDeps includes itself is a "data-cleaning" column; a value set by the user will still trigger the formula. - All trigger-formulas receive a "value" argument (to support the case above). Small changes - Update RefLists (used for recalcDeps) when target rows are deleted. - Add RecordList.__contains__ (for `rec in refList` or `id in refList` checks) - Clarify that Calculate action has replaced load_done() in practice, and use it in tests too, to better match reality. Left for later: - UI for setting recalcWhen / recalcDeps. - Implementation of actions such as "Recalculate for all cells". - Allowing trigger-formulas access to the current user's info. Test Plan: Added a comprehensive python-side test for various trigger combinations Reviewers: paulfitz, alexmojaki Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2872
2021-06-25 20:34:20 +00:00
# If needed, rebuild dependencies for trigger formulas.
self._maybe_update_trigger_dependencies()
# Note that recalculations and auto-removals get included after processing all useractions.
self._bring_all_up_to_date()
# Apply any triggered record removals. If anything does get removed, recalculate what's needed.
while self.docmodel.apply_auto_removes():
self._bring_all_up_to_date()
self.out_actions.flush_calc_changes()
self.out_actions.check_sanity()
self._user = None
return self.out_actions
def acl_split(self, action_group):
"""
Splits ActionGroups, as returned e.g. from apply_user_actions, by permissions. Returns a
single ActionBundle containing of all of the original action_groups.
"""
# pylint:disable=no-self-use
return acl.acl_read_split(action_group)
def _apply_one_user_action(self, user_action):
"""
Applies a single user action to the document, without running any triggered updates.
A UserAction is a tuple whose first element is the name of the action.
"""
log.debug("applying user_action %s" % (user_action,))
return getattr(self.user_actions, user_action.__class__.__name__)(*user_action)
def apply_doc_action(self, doc_action):
"""
Applies a doc action, which is a step of a user action. It is represented by an Action object
as defined in actions.py.
"""
#log.warn("Engine.apply_doc_action %s" % (doc_action,))
self._gone_columns = []
action_name = doc_action.__class__.__name__
saved_schema = None
if action_name in actions.schema_actions:
self._schema_updated = True
# Make a copy of the schema. If a bug causes a docaction to fail after modifying schema, we
# restore it, or we'll end up with mismatching schema and metadata.
saved_schema = schema.clone_schema(self.schema)
try:
getattr(self.doc_actions, action_name)(*doc_action)
except Exception:
# Save full exception info, so that we can rethrow accurately even if this clause also fails.
exc_info = sys.exc_info()
if saved_schema:
log.info("Restoring schema and usercode on exception")
self.schema = saved_schema
try:
self.rebuild_usercode()
except Exception:
log.error("Error rebuilding usercode after restoring schema: %s" % traceback.format_exc())
# Re-raise the original exception
# In Python 2, 'raise' raises the most recent exception,
# which may come from the try/except just above
# Python 3 keeps track of nested exceptions better
if six.PY2:
six.reraise(*exc_info)
else:
raise
# If any columns got deleted, destroy them to clear _back_references in other tables, and to
# force errors if anything still uses them. Also clear them from calc actions if needed.
for col in self._gone_columns:
# Calc actions may already be generated if the column deletion was triggered by auto-removal.
actions.prune_actions(self.out_actions.calc, col.table_id, col.col_id)
col.destroy()
# We normally recompute formulas before returning to the user; but some formulas are also used
# internally in-between applying doc actions. We have this workaround to ensure that those are
# up-to-date after each doc action. See more in comments for _bring_mlookups_up_to_date.
# We check _in_update_loop to avoid a recursive call (happens when a formula produces an
# action, as for derived/summary tables).
if not self._in_update_loop:
self._bring_mlookups_up_to_date(doc_action)
def autocomplete(self, txt, table_id, column_id, user):
"""
Return a list of suggested completions of the python fragment supplied.
"""
# replace $ with rec. and add a dummy rec object
tweaked_txt = DOLLAR_REGEX.sub(r'rec.', txt)
# convert a bare $ with nothing after it also
if txt == '$':
tweaked_txt = 'rec.'
table = self.tables[table_id]
autocomplete_context = self.autocomplete_context
context = autocomplete_context.get_context()
context['rec'] = table.sample_record
# Remove values from the context that need to be recomputed.
context.pop('value', None)
context.pop('user', None)
column = table.get_column(column_id) if table.has_column(column_id) else None
if column and not column.is_formula():
# Add trigger formula completions.
context['value'] = column.sample_value()
context['user'] = User(user, self.tables, is_sample=True)
completer = rlcompleter.Completer(context)
results = []
at = 0
while True:
# Get a possible completion. Result will be None or "<tweaked_txt><extra suggestion>"
result = completer.complete(tweaked_txt, at)
at += 1
if not result:
break
if skipped_completions.search(result):
continue
results.append(autocomplete_context.process_result(result))
# If we changed the prefix (expanding the $ symbol) we now need to change it back.
if tweaked_txt != txt:
results = [txt + result[len(tweaked_txt):] for result in results]
# pylint:disable=unidiomatic-typecheck
results.sort(key=lambda r: r[0] if type(r) == tuple else r)
return results
def _get_undo_checkpoint(self):
"""
You may call _get_undo_checkpoint() and pass its result into _undo_to_checkpoint() to undo
DocActions saved since the first call; but only while in a single apply_user_actions() call.
"""
# We produce a tuple of lengths: one for each of the properties of out_actions ActionObj.
aobj = self.out_actions
return (len(aobj.calc), len(aobj.stored), len(aobj.undo), len(aobj.retValues))
def _undo_to_checkpoint(self, checkpoint):
"""
See _get_undo_checkpoint() above.
"""
# Check if out_actions ActionObj grew at all since _get_undo_checkpoint(). If yes, revert by
# applying any undo actions, and trim it back to original state (if we don't trim it, it will
# only grow further, with undo actions themselves getting applied as new doc actions).
new_checkpoint = self._get_undo_checkpoint()
if new_checkpoint != checkpoint:
(len_calc, len_stored, len_undo, len_ret) = checkpoint
undo_actions = self.out_actions.undo[len_undo:]
log.info("Reverting %d doc actions" % len(undo_actions))
self.user_actions.ApplyUndoActions([actions.get_action_repr(a) for a in undo_actions])
del self.out_actions.calc[len_calc:]
del self.out_actions.stored[len_stored:]
del self.out_actions.direct[len_stored:]
del self.out_actions.undo[len_undo:]
del self.out_actions.retValues[len_ret:]
# end