(core) New API to collect timing information from formula evaluation.

Summary: - /timing/start endpoint to start collecting information - /timing/stop endpoint to stop collecting - /timing to retrive data gatherd so far Timings are collected for all columns (including hidden/helpers/system) Test Plan: Added new Reviewers: paulfitz Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D4230
2026-03-02 04:09:24 +00:00 · 2024-04-18 14:13:16 +02:00
parent c187ca3093
commit bd07e9c026
12 changed files with 530 additions and 46 deletions
--- a/sandbox/grist/engine.py
+++ b/sandbox/grist/engine.py
@@ -36,6 +36,7 @@ import sandbox
 import schema
 from schema import RecalcWhen
 import table as table_module
+from timing import DummyTiming
 from user import User # pylint:disable=wrong-import-order
 import useractions
 import column
@@ -263,6 +264,8 @@ class Engine(object):
    # make multiple different requests without needing to keep all the responses in memory.
    self._cached_request_keys = set()

+    self._timing = DummyTiming()
+
  @property
  def autocomplete_context(self):
    # See the comment on _autocomplete_context in __init__ above.
@@ -969,51 +972,53 @@ class Engine(object):
      assert not cycle
      record = AttributeRecorder(record, "rec", record_attributes)
    value = None
-    try:
-      if cycle:
-        raise depend.CircularRefError("Circular Reference")
-      if not col.is_formula():
-        value = col.get_cell_value(int(record), restore=True)
-        with FakeStdStreams():
-          result = col.method(record, table.user_table, value, self._user)
-      else:
-        with FakeStdStreams():
-          result = col.method(record, table.user_table)
-      if self._cell_required_error:
-        raise self._cell_required_error  # pylint: disable=raising-bad-type
-      self.formula_tracer(col, record)
-      return result
-    except MemoryError:
-      # Don't try to wrap memory errors.
-      raise
-    except:  # pylint: disable=bare-except
-      # Since col.method runs untrusted user code, we use a bare except to catch all
-      # exceptions (even those not derived from BaseException).
+    with self._timing.measure(col.node):
+      try:
+        if cycle:
+          raise depend.CircularRefError("Circular Reference")
+        if not col.is_formula():
+          value = col.get_cell_value(int(record), restore=True)
+          with FakeStdStreams():
+            result = col.method(record, table.user_table, value, self._user)
+        else:
+          with FakeStdStreams():
+            result = col.method(record, table.user_table)
+        if self._cell_required_error:
+          raise self._cell_required_error  # pylint: disable=raising-bad-type
+        self.formula_tracer(col, record)
+        return result
+      except MemoryError:
+        # Don't try to wrap memory errors.
+        raise
+      except:  # pylint: disable=bare-except
+        # Since col.method runs untrusted user code, we use a bare except to catch all
+        # exceptions (even those not derived from BaseException).

-      # Before storing the exception value, make sure there isn't an OrderError pending.
-      # If there is, we will raise it after undoing any side effects.
-      order_error = self._cell_required_error
+        # Before storing the exception value, make sure there isn't an OrderError pending.
+        # If there is, we will raise it after undoing any side effects.
+        order_error = self._cell_required_error

-      # Otherwise, we use sys.exc_info to recover the raised exception object.
-      regular_error = sys.exc_info()[1] if not order_error else None
+        # Otherwise, we use sys.exc_info to recover the raised exception object.
+        regular_error = sys.exc_info()[1] if not order_error else None

-      # It is possible for formula evaluation to have side-effects that produce DocActions (e.g.
-      # lookupOrAddDerived() creates those). If there is an error, undo any such side-effects.
-      self._undo_to_checkpoint(checkpoint)
+        # It is possible for formula evaluation to have side-effects that produce DocActions (e.g.
+        # lookupOrAddDerived() creates those). If there is an error, undo any such side-effects.
+        self._undo_to_checkpoint(checkpoint)

-      # Now we can raise the order error, if there was one.  Cell evaluation will be reordered
-      # in response.
-      if order_error:
-        self._cell_required_error = None
-        raise order_error  # pylint: disable=raising-bad-type
+        # Now we can raise the order error, if there was one.  Cell evaluation will be reordered
+        # in response.
+        if order_error:
+          self._timing.mark("order_error")
+          self._cell_required_error = None
+          raise order_error  # pylint: disable=raising-bad-type

-      self.formula_tracer(col, record)
+        self.formula_tracer(col, record)

-      include_details = (node not in self._is_node_exception_reported) if node else True
-      if not col.is_formula():
-        return objtypes.RaisedException(regular_error, include_details, user_input=value)
-      else:
-        return objtypes.RaisedException(regular_error, include_details)
+        include_details = (node not in self._is_node_exception_reported) if node else True
+        if not col.is_formula():
+          return objtypes.RaisedException(regular_error, include_details, user_input=value)
+        else:
+          return objtypes.RaisedException(regular_error, include_details)

  def convert_action_values(self, action):
    """
--- a/sandbox/grist/main.py
+++ b/sandbox/grist/main.py
@@ -5,6 +5,8 @@ and starts the grist sandbox. See engine.py for the API documentation.
 import os
 import random
 import sys
+
+from timing import DummyTiming, Timing
 sys.path.append('thirdparty')
 # pylint: disable=wrong-import-position

@@ -158,6 +160,20 @@ def run(sandbox):
  def evaluate_formula(table_id, col_id, row_id):
    return formula_prompt.evaluate_formula(eng, table_id, col_id, row_id)

+  @export
+  def start_timing():
+    eng._timing = Timing()
+
+  @export
+  def stop_timing():
+    stats = eng._timing.get()
+    eng._timing = DummyTiming()
+    return stats
+
+  @export
+  def get_timings():
+    return eng._timing.get()
+
  export(parse_acl_formula)
  export(eng.load_empty)
  export(eng.load_done)
--- a/sandbox/grist/timing.py
+++ b/sandbox/grist/timing.py
@@ -0,0 +1,115 @@
+import contextlib
+import time
+import six
+
+
+class Timing(object):
+  def __init__(self):
+    self._items = {}
+    self._marks_stack = []
+
+  @contextlib.contextmanager
+  def measure(self, key):
+    start = time.time()
+    stack_start_len = len(self._marks_stack)
+    try:
+      yield
+    finally:
+      end = time.time()
+      self._record_time(key, end - start)
+
+      # Handle the marks added while in this invocation.
+      n = len(self._marks_stack) - stack_start_len
+      if n > 0:
+        next_mark = ("end", end)
+        while n > 0:
+          mark = self._marks_stack.pop()
+          self._record_time("{}@{}={}:{}".format(key, n, mark[0], next_mark[0]),
+              next_mark[1] - mark[1])
+          next_mark = mark
+          n -= 1
+        self._record_time("{}@{}={}:{}".format(key, n, "start", next_mark[0]), next_mark[1] - start)
+
+  def mark(self, mark_name):
+    self._marks_stack.append((mark_name, time.time()))
+
+  def get(self, clear = True):
+    # Copy it and clear immediately if requested.
+    timing_log = self._items.copy()
+    if clear:
+      self.clear()
+    # Stats will contain a json like structure with table_id, col_id, sum, count, average, max
+    # and optionally a array of marks (in similar format)
+    stats = []
+    for key, t in sorted(timing_log.items(), key=lambda x: str(x[0])):
+      # Key can be either a node (tuple with table_id and col_id) or a string with a mark.
+      # The list is sorted so, we always first get the stats for the node and then the marks.
+      # We will add marks to the last node.
+      if isinstance(key, tuple):
+        stats.append({"tableId": key[0], "colId": key[1], "sum": t.sum, "count": t.count,
+                      "average": t.average, "max": t.max})
+      else:
+        # Create a marks array for the last node or append to the existing one.
+        if stats:
+          prev = stats[-1].get("marks", [])
+          stats[-1]["marks"] = prev + [{
+            "name": key, "sum": t.sum,
+            "count": t.count, "average": t.average,
+            "max": t.max
+          }]
+    return stats
+
+  def dump(self):
+    out = []
+    for key, t in sorted(self._items.items(), key=lambda x: str(x[0])):
+      out.append("%6d, %10f, %10f, %10f, %s" % (t.count, t.average, t.max, t.sum, key))
+    print("Timing\n" + "\n".join(out))
+    self.clear()
+
+  def _record_time(self, key, time_sec):
+    t = self._items.get(key)
+    if not t:
+      t = self._items[key] = TimingStats()
+    t.add(time_sec)
+
+  def clear(self):
+    self._items.clear()
+
+
+
+# An implementation that adds minimal overhead.
+class DummyTiming(object):
+  # pylint: disable=no-self-use,unused-argument,no-member
+  def measure(self, key):
+    if six.PY2:
+      return contextlib.nested()
+    return contextlib.nullcontext()
+
+  def mark(self, mark_name):
+    pass
+
+  def dump(self):
+    pass
+
+  def get(self, clear = True):
+    return []
+
+  def clear(self):
+    pass
+
+
+class TimingStats(object):
+  def __init__(self):
+    self.count = 0
+    self.sum = 0
+    self.max = 0
+
+  @property
+  def average(self):
+    return self.sum / self.count if self.count > 0 else 0
+
+  def add(self, value):
+    self.count += 1
+    self.sum += value
+    if value > self.max:
+      self.max = value