(core) Replace compute stack and frames with _current_node and _current_row_id

Summary:
This is an attempt to optimise Engine._use_node. It doesn't seem to actually improve overall performance significantly, but it shouldn't make it worse, and I think it's an improvement to the code.

It turns out that there's no need to track a stack of compute frames any more. The only time we get close to nested evaluation, we set allow_evaluation=False to prevent it actually happening. So there's only one 'frame' during actual evaluation, which means we can get rid of the concept of frames entirely. This allows simplifying the code and letting the computer do less work in general.

Test Plan: this

Reviewers: dsagal

Reviewed By: dsagal

Subscribers: dsagal

Differential Revision: https://phab.getgrist.com/D3310
This commit is contained in:
Alex Hall 2022-03-10 22:34:21 +02:00
parent f02174eb7e
commit 2d0978559b
3 changed files with 104 additions and 135 deletions

View File

@ -129,18 +129,6 @@ class Engine(object):
containing several categories of DocActions, including the results of computations. containing several categories of DocActions, including the results of computations.
""" """
class ComputeFrame(object):
"""
Represents the node and ID of the value currently being recomputed. There is a stack of
ComputeFrames, because during computation we may access other out-of-date nodes, and need to
recompute those first.
compute_frame.current_row_id gets set to each record ID as we go through them.
"""
def __init__(self, node):
self.node = node
self.current_row_id = None
def __init__(self): def __init__(self):
# The document data, including logic (formulas), and metadata (tables prefixed with "_grist_"). # The document data, including logic (formulas), and metadata (tables prefixed with "_grist_").
self.tables = {} # Maps table IDs (or names) to Table objects. self.tables = {} # Maps table IDs (or names) to Table objects.
@ -193,8 +181,9 @@ class Engine(object):
# The lists of actions of different kinds, built up while applying an action. # The lists of actions of different kinds, built up while applying an action.
self.out_actions = action_obj.ActionGroup() self.out_actions = action_obj.ActionGroup()
# Stack of compute frames. # What's currently being computed
self._compute_stack = [] self._current_node = None
self._current_row_id = None
# Certain recomputations are triggered by a particular doc action. This keep track of it. # Certain recomputations are triggered by a particular doc action. This keep track of it.
self._triggering_doc_action = None self._triggering_doc_action = None
@ -478,35 +467,15 @@ class Engine(object):
for node, dirty_rows in six.iteritems(self.recompute_map): for node, dirty_rows in six.iteritems(self.recompute_map):
log.debug(" Node %s: %s" % (node, dirty_rows)) log.debug(" Node %s: %s" % (node, dirty_rows))
@contextlib.contextmanager
def open_compute_frame(self, node):
"""
Use as: `with open_compute_frame(node) as frame:`. This automatically maintains the stack of
ComputeFrames, pushing and popping reliably.
"""
frame = Engine.ComputeFrame(node)
self._compute_stack.append(frame)
try:
yield frame
finally:
self._compute_stack.pop()
def get_current_frame(self):
"""
Returns the compute frame currently being computed, or None if there isn't one.
"""
return self._compute_stack[-1] if self._compute_stack and self._compute_stack[-1].node else None
def _use_node(self, node, relation, row_ids=[]): def _use_node(self, node, relation, row_ids=[]):
# This is used whenever a formula accesses any part of any record. It's hot code, and # This is used whenever a formula accesses any part of any record. It's hot code, and
# it's worth optimizing. # it's worth optimizing.
if self._compute_stack and self._compute_stack[-1].node: if self._current_node:
# Add an edge to indicate that the node being computed depends on the node passed in. # Add an edge to indicate that the node being computed depends on the node passed in.
# Note that during evaluation, we only *add* dependencies. We *remove* them by clearing them # Note that during evaluation, we only *add* dependencies. We *remove* them by clearing them
# whenever ALL rows for a node are invalidated (on schema changes and reloads). # whenever ALL rows for a node are invalidated (on schema changes and reloads).
current_node = self._compute_stack[-1].node edge = (self._current_node, node, relation)
edge = (current_node, node, relation)
if edge not in self._recompute_edge_set: if edge not in self._recompute_edge_set:
self.dep_graph.add_edge(*edge) self.dep_graph.add_edge(*edge)
self._recompute_edge_set.add(edge) self._recompute_edge_set.add(edge)
@ -694,7 +663,7 @@ class Engine(object):
col = table.get_column(col_id) col = table.get_column(col_id)
checkpoint = self._get_undo_checkpoint() checkpoint = self._get_undo_checkpoint()
try: try:
result = self._recompute_one_cell(None, table, col, row_id) result = self._recompute_one_cell(table, col, row_id)
# If the error is gone for a trigger formula # If the error is gone for a trigger formula
if col.has_formula() and not col.is_formula(): if col.has_formula() and not col.is_formula():
if not isinstance(result, objtypes.RaisedException): if not isinstance(result, objtypes.RaisedException):
@ -778,14 +747,14 @@ class Engine(object):
require_rows = sorted(require_rows or []) require_rows = sorted(require_rows or [])
previous_current_node = self._current_node
# Prevents dependency creation for non-formula nodes. A non-formula column may include a # Prevents dependency creation for non-formula nodes. A non-formula column may include a
# formula to eval for a newly-added record. Those shouldn't create dependencies. # formula to eval for a newly-added record. Those shouldn't create dependencies.
formula_node = node if col.is_formula() else None self._current_node = node if col.is_formula() else None
changes = None changes = None
cleaned = [] # this lists row_ids that can be removed from dirty_rows once we are no cleaned = [] # this lists row_ids that can be removed from dirty_rows once we are no
# longer iterating on it. # longer iterating on it.
with self.open_compute_frame(formula_node) as frame:
try: try:
require_count = len(require_rows) require_count = len(require_rows)
for i, row_id in enumerate(itertools.chain(require_rows, dirty_rows)): for i, row_id in enumerate(itertools.chain(require_rows, dirty_rows)):
@ -815,7 +784,7 @@ class Engine(object):
# We figure out if we've hit a cycle here. If so, we just let _recompute_on_cell # We figure out if we've hit a cycle here. If so, we just let _recompute_on_cell
# know, so it can set the cell value appropriately and do some other bookkeeping. # know, so it can set the cell value appropriately and do some other bookkeeping.
cycle = required and (node, row_id) in self._locked_cells cycle = required and (node, row_id) in self._locked_cells
value = self._recompute_one_cell(frame, table, col, row_id, cycle=cycle, node=node) value = self._recompute_one_cell(table, col, row_id, cycle=cycle, node=node)
except OrderError as e: except OrderError as e:
if not required: if not required:
# We're out of order, but for a cell we were evaluating opportunistically. # We're out of order, but for a cell we were evaluating opportunistically.
@ -864,6 +833,7 @@ class Engine(object):
dirty_rows = None dirty_rows = None
finally: finally:
self._current_node = previous_current_node
for row_id in cleaned: for row_id in cleaned:
# Usually dirty_rows refers to self.recompute_map[node], so this modifies both # Usually dirty_rows refers to self.recompute_map[node], so this modifies both
dirty_rows.discard(row_id) dirty_rows.discard(row_id)
@ -873,7 +843,7 @@ class Engine(object):
if not self.recompute_map[node]: if not self.recompute_map[node]:
self.recompute_map.pop(node) self.recompute_map.pop(node)
def _recompute_one_cell(self, frame, table, col, row_id, cycle=False, node=None): def _recompute_one_cell(self, table, col, row_id, cycle=False, node=None):
""" """
Recomputes an one formula cell and returns a value. Recomputes an one formula cell and returns a value.
The value can be: The value can be:
@ -881,8 +851,7 @@ class Engine(object):
- exception - exception
- exception with details if flag include_details is set - exception with details if flag include_details is set
""" """
if frame: self._current_row_id = row_id
frame.current_row_id = row_id
# Baffling, but keeping a reference to current generated "usercode" module protects against a # Baffling, but keeping a reference to current generated "usercode" module protects against a
# seeming garbage-collection bug: if during formula evaluation the module gets regenerated # seeming garbage-collection bug: if during formula evaluation the module gets regenerated
@ -1325,9 +1294,9 @@ class Engine(object):
# We normally recompute formulas before returning to the user; but some formulas are also used # We normally recompute formulas before returning to the user; but some formulas are also used
# internally in-between applying doc actions. We have this workaround to ensure that those are # internally in-between applying doc actions. We have this workaround to ensure that those are
# up-to-date after each doc action. See more in comments for _bring_mlookups_up_to_date. # up-to-date after each doc action. See more in comments for _bring_mlookups_up_to_date.
# We check _compute_stack to avoid a recursive call (happens when a formula produces an # We check _in_update_loop to avoid a recursive call (happens when a formula produces an
# action, as for derived/summary tables). # action, as for derived/summary tables).
if not self._compute_stack: if not self._in_update_loop:
self._bring_mlookups_up_to_date(doc_action) self._bring_mlookups_up_to_date(doc_action)
def autocomplete(self, txt, table_id, column_id, user): def autocomplete(self, txt, table_id, column_id, user):

View File

@ -577,7 +577,7 @@ def _prepare_record_dict(record, dates_as_iso=False, expand_refs=0):
table_id = record._table.table_id table_id = record._table.table_id
docmodel = record._table._engine.docmodel docmodel = record._table._engine.docmodel
columns = docmodel.get_table_rec(table_id).columns columns = docmodel.get_table_rec(table_id).columns
frame = record._table._engine.get_current_frame() current_node = record._table._engine._current_node
result = {'id': int(record)} result = {'id': int(record)}
errors = {} errors = {}
@ -590,7 +590,7 @@ def _prepare_record_dict(record, dates_as_iso=False, expand_refs=0):
# Avoid trying to access the cell being evaluated, since cycles get detected even if the # Avoid trying to access the cell being evaluated, since cycles get detected even if the
# CircularRef exception is caught. TODO This is hacky, and imperfect. If another column # CircularRef exception is caught. TODO This is hacky, and imperfect. If another column
# references a column containing the RECORD(rec) call, CircularRefError will still happen. # references a column containing the RECORD(rec) call, CircularRefError will still happen.
if frame and frame.node == (table_id, col_id): if current_node == (table_id, col_id):
continue continue
try: try:

View File

@ -125,17 +125,17 @@ class BaseLookupMapColumn(column.BaseColumn):
the current frame to the returned records. Returns an empty set if no records match. the current frame to the returned records. Returns an empty set if no records match.
""" """
key = tuple(_extract(val) for val in key) key = tuple(_extract(val) for val in key)
current_frame = self._engine.get_current_frame() engine = self._engine
if current_frame: if engine._current_node:
rel = self._get_relation(current_frame.node) rel = self._get_relation(engine._current_node)
rel._add_lookup(current_frame.current_row_id, key) rel._add_lookup(engine._current_row_id, key)
else: else:
rel = None rel = None
# The _use_node call both brings LookupMapColumn up-to-date, and creates a dependency on it. # The _use_node call both brings LookupMapColumn up-to-date, and creates a dependency on it.
# Relation of None isn't valid, but it happens to be unused when there is no current_frame. # Relation of None isn't valid, but it happens to be unused when there is no current_frame.
row_ids = self._row_key_map.lookup_right(key, set()) row_ids = self._row_key_map.lookup_right(key, set())
self._engine._use_node(self.node, rel, row_ids) engine._use_node(self.node, rel, row_ids)
if not row_ids: if not row_ids:
row_ids = self._row_key_map.lookup_right(key, set()) row_ids = self._row_key_map.lookup_right(key, set())