(core) New API to collect timing information from formula evaluation.

Summary:
- /timing/start endpoint to start collecting information
- /timing/stop endpoint to stop collecting
- /timing to retrive data gatherd so far

Timings are collected for all columns (including hidden/helpers/system)

Test Plan: Added new

Reviewers: paulfitz

Reviewed By: paulfitz

Differential Revision: https://phab.getgrist.com/D4230
This commit is contained in:
Jarosław Sadziński
2024-04-18 14:13:16 +02:00
parent c187ca3093
commit bd07e9c026
12 changed files with 530 additions and 46 deletions

View File

@@ -36,6 +36,7 @@ import sandbox
import schema
from schema import RecalcWhen
import table as table_module
from timing import DummyTiming
from user import User # pylint:disable=wrong-import-order
import useractions
import column
@@ -263,6 +264,8 @@ class Engine(object):
# make multiple different requests without needing to keep all the responses in memory.
self._cached_request_keys = set()
self._timing = DummyTiming()
@property
def autocomplete_context(self):
# See the comment on _autocomplete_context in __init__ above.
@@ -969,51 +972,53 @@ class Engine(object):
assert not cycle
record = AttributeRecorder(record, "rec", record_attributes)
value = None
try:
if cycle:
raise depend.CircularRefError("Circular Reference")
if not col.is_formula():
value = col.get_cell_value(int(record), restore=True)
with FakeStdStreams():
result = col.method(record, table.user_table, value, self._user)
else:
with FakeStdStreams():
result = col.method(record, table.user_table)
if self._cell_required_error:
raise self._cell_required_error # pylint: disable=raising-bad-type
self.formula_tracer(col, record)
return result
except MemoryError:
# Don't try to wrap memory errors.
raise
except: # pylint: disable=bare-except
# Since col.method runs untrusted user code, we use a bare except to catch all
# exceptions (even those not derived from BaseException).
with self._timing.measure(col.node):
try:
if cycle:
raise depend.CircularRefError("Circular Reference")
if not col.is_formula():
value = col.get_cell_value(int(record), restore=True)
with FakeStdStreams():
result = col.method(record, table.user_table, value, self._user)
else:
with FakeStdStreams():
result = col.method(record, table.user_table)
if self._cell_required_error:
raise self._cell_required_error # pylint: disable=raising-bad-type
self.formula_tracer(col, record)
return result
except MemoryError:
# Don't try to wrap memory errors.
raise
except: # pylint: disable=bare-except
# Since col.method runs untrusted user code, we use a bare except to catch all
# exceptions (even those not derived from BaseException).
# Before storing the exception value, make sure there isn't an OrderError pending.
# If there is, we will raise it after undoing any side effects.
order_error = self._cell_required_error
# Before storing the exception value, make sure there isn't an OrderError pending.
# If there is, we will raise it after undoing any side effects.
order_error = self._cell_required_error
# Otherwise, we use sys.exc_info to recover the raised exception object.
regular_error = sys.exc_info()[1] if not order_error else None
# Otherwise, we use sys.exc_info to recover the raised exception object.
regular_error = sys.exc_info()[1] if not order_error else None
# It is possible for formula evaluation to have side-effects that produce DocActions (e.g.
# lookupOrAddDerived() creates those). If there is an error, undo any such side-effects.
self._undo_to_checkpoint(checkpoint)
# It is possible for formula evaluation to have side-effects that produce DocActions (e.g.
# lookupOrAddDerived() creates those). If there is an error, undo any such side-effects.
self._undo_to_checkpoint(checkpoint)
# Now we can raise the order error, if there was one. Cell evaluation will be reordered
# in response.
if order_error:
self._cell_required_error = None
raise order_error # pylint: disable=raising-bad-type
# Now we can raise the order error, if there was one. Cell evaluation will be reordered
# in response.
if order_error:
self._timing.mark("order_error")
self._cell_required_error = None
raise order_error # pylint: disable=raising-bad-type
self.formula_tracer(col, record)
self.formula_tracer(col, record)
include_details = (node not in self._is_node_exception_reported) if node else True
if not col.is_formula():
return objtypes.RaisedException(regular_error, include_details, user_input=value)
else:
return objtypes.RaisedException(regular_error, include_details)
include_details = (node not in self._is_node_exception_reported) if node else True
if not col.is_formula():
return objtypes.RaisedException(regular_error, include_details, user_input=value)
else:
return objtypes.RaisedException(regular_error, include_details)
def convert_action_values(self, action):
"""

View File

@@ -5,6 +5,8 @@ and starts the grist sandbox. See engine.py for the API documentation.
import os
import random
import sys
from timing import DummyTiming, Timing
sys.path.append('thirdparty')
# pylint: disable=wrong-import-position
@@ -158,6 +160,20 @@ def run(sandbox):
def evaluate_formula(table_id, col_id, row_id):
return formula_prompt.evaluate_formula(eng, table_id, col_id, row_id)
@export
def start_timing():
eng._timing = Timing()
@export
def stop_timing():
stats = eng._timing.get()
eng._timing = DummyTiming()
return stats
@export
def get_timings():
return eng._timing.get()
export(parse_acl_formula)
export(eng.load_empty)
export(eng.load_done)

115
sandbox/grist/timing.py Normal file
View File

@@ -0,0 +1,115 @@
import contextlib
import time
import six
class Timing(object):
def __init__(self):
self._items = {}
self._marks_stack = []
@contextlib.contextmanager
def measure(self, key):
start = time.time()
stack_start_len = len(self._marks_stack)
try:
yield
finally:
end = time.time()
self._record_time(key, end - start)
# Handle the marks added while in this invocation.
n = len(self._marks_stack) - stack_start_len
if n > 0:
next_mark = ("end", end)
while n > 0:
mark = self._marks_stack.pop()
self._record_time("{}@{}={}:{}".format(key, n, mark[0], next_mark[0]),
next_mark[1] - mark[1])
next_mark = mark
n -= 1
self._record_time("{}@{}={}:{}".format(key, n, "start", next_mark[0]), next_mark[1] - start)
def mark(self, mark_name):
self._marks_stack.append((mark_name, time.time()))
def get(self, clear = True):
# Copy it and clear immediately if requested.
timing_log = self._items.copy()
if clear:
self.clear()
# Stats will contain a json like structure with table_id, col_id, sum, count, average, max
# and optionally a array of marks (in similar format)
stats = []
for key, t in sorted(timing_log.items(), key=lambda x: str(x[0])):
# Key can be either a node (tuple with table_id and col_id) or a string with a mark.
# The list is sorted so, we always first get the stats for the node and then the marks.
# We will add marks to the last node.
if isinstance(key, tuple):
stats.append({"tableId": key[0], "colId": key[1], "sum": t.sum, "count": t.count,
"average": t.average, "max": t.max})
else:
# Create a marks array for the last node or append to the existing one.
if stats:
prev = stats[-1].get("marks", [])
stats[-1]["marks"] = prev + [{
"name": key, "sum": t.sum,
"count": t.count, "average": t.average,
"max": t.max
}]
return stats
def dump(self):
out = []
for key, t in sorted(self._items.items(), key=lambda x: str(x[0])):
out.append("%6d, %10f, %10f, %10f, %s" % (t.count, t.average, t.max, t.sum, key))
print("Timing\n" + "\n".join(out))
self.clear()
def _record_time(self, key, time_sec):
t = self._items.get(key)
if not t:
t = self._items[key] = TimingStats()
t.add(time_sec)
def clear(self):
self._items.clear()
# An implementation that adds minimal overhead.
class DummyTiming(object):
# pylint: disable=no-self-use,unused-argument,no-member
def measure(self, key):
if six.PY2:
return contextlib.nested()
return contextlib.nullcontext()
def mark(self, mark_name):
pass
def dump(self):
pass
def get(self, clear = True):
return []
def clear(self):
pass
class TimingStats(object):
def __init__(self):
self.count = 0
self.sum = 0
self.max = 0
@property
def average(self):
return self.sum / self.count if self.count > 0 else 0
def add(self, value):
self.count += 1
self.sum += value
if value > self.max:
self.max = value