(core) External requests

Summary: Adds a Python function `REQUEST` which makes an HTTP GET request. Behind the scenes it: - Raises a special exception to stop trying to evaluate the current cell and just keep the existing value. - Notes the request arguments which will be returned by `apply_user_actions`. - Makes the actual request in NodeJS, which sends back the raw response data in a new action `RespondToRequests` which reevaluates the cell(s) that made the request. - Wraps the response data in a class which mimics the `Response` class of the `requests` library. In certain cases, this asynchronous flow doesn't work and the sandbox will instead synchronously call an exported JS method: - When reevaluating a single cell to get a formula error, the request is made synchronously. - When a formula makes multiple requests, the earlier responses are retrieved synchronously from files which store responses as long as needed to complete evaluating formulas. See https://grist.slack.com/archives/CL1LQ8AT0/p1653399747810139 Test Plan: Added Python and nbrowser tests. Reviewers: georgegevoian Reviewed By: georgegevoian Subscribers: paulfitz, dsagal Differential Revision: https://phab.getgrist.com/D3429
2026-03-02 04:09:24 +00:00 · 2022-06-17 20:49:18 +02:00
parent abebe812db
commit 9fffb491f9
17 changed files with 582 additions and 24 deletions
--- a/sandbox/grist/action_obj.py
+++ b/sandbox/grist/action_obj.py
@@ -27,6 +27,7 @@ class ActionGroup(object):
    self.undo     = []
    self.retValues = []
    self.summary = ActionSummary()
+    self.requests = {}

  def flush_calc_changes(self):
    """
--- a/sandbox/grist/engine.py
+++ b/sandbox/grist/engine.py
@@ -30,6 +30,7 @@ import match_counter
 import objtypes
 from objtypes import strict_equal
 from relation import SingleRowsIdentityRelation
+import sandbox
 import schema
 from schema import RecalcWhen
 import table as table_module
@@ -66,6 +67,16 @@ class OrderError(Exception):
    self.requiring_node = node
    self.requiring_row_id = row_id

+
+class RequestingError(Exception):
+  """
+  An exception thrown and handled internally, a bit like OrderError.
+  Indicates that the formula called the REQUEST function and needs to delegate an HTTP request
+  to the NodeJS server.
+  """
+  pass
+
+
 # An item of work to be done by Engine._update
 WorkItem = namedtuple('WorkItem', ('node', 'row_ids', 'locks'))

@@ -186,6 +197,7 @@ class Engine(object):
    # What's currently being computed
    self._current_node = None
    self._current_row_id = None
+    self._is_current_node_formula = False  # True for formula columns, False for trigger formulas

    # Certain recomputations are triggered by a particular doc action. This keep track of it.
    self._triggering_doc_action = None
@@ -236,6 +248,19 @@ class Engine(object):

    self._table_stats = {"meta": [], "user": []}

+    #### Attributes used by the REQUEST function:
+    # True when the formula should synchronously call the exported JS method to make the request
+    # immediately instead of reevaluating the formula later. Used when reevaluating a single
+    # formula cell to get an error traceback.
+    self._sync_request = False
+    # dict of string keys to responses, set by the RespondToRequests user action to reevaluate
+    # formulas based on a batch of completed requests.
+    self._request_responses = {}
+    # set of string keys identifying requests that are currently cached in files and can thus
+    # be fetched synchronously via the exported JS method. This allows a single formula to
+    # make multiple different requests without needing to keep all the responses in memory.
+    self._cached_request_keys = set()
+
  @property
  def autocomplete_context(self):
    # See the comment on _autocomplete_context in __init__ above.
@@ -480,7 +505,7 @@ class Engine(object):
    if self._peeking:
      return

-    if self._current_node:
+    if self._is_current_node_formula:
      # Add an edge to indicate that the node being computed depends on the node passed in.
      # Note that during evaluation, we only *add* dependencies. We *remove* them by clearing them
      # whenever ALL rows for a node are invalidated (on schema changes and reloads).
@@ -671,6 +696,8 @@ class Engine(object):
    table = self.tables[table_id]
    col = table.get_column(col_id)
    checkpoint = self._get_undo_checkpoint()
+    # Makes calls to REQUEST synchronous, since raising a RequestingError can't work here.
+    self._sync_request = True
    try:
      result = self._recompute_one_cell(table, col, row_id)
      # If the error is gone for a trigger formula
@@ -686,6 +713,7 @@ class Engine(object):
      # It is possible for formula evaluation to have side-effects that produce DocActions (e.g.
      # lookupOrAddDerived() creates those). In case of get_formula_error(), these aren't fully
      # processed (e.g. don't get applied to DocStorage), so it's important to reverse them.
+      self._sync_request = False
      self._undo_to_checkpoint(checkpoint)

  def _recompute(self, node, row_ids=None):
@@ -757,9 +785,11 @@ class Engine(object):
    require_rows = sorted(require_rows or [])

    previous_current_node = self._current_node
+    previous_is_current_node_formula = self._is_current_node_formula
+    self._current_node = node
    # Prevents dependency creation for non-formula nodes. A non-formula column may include a
    # formula to eval for a newly-added record. Those shouldn't create dependencies.
-    self._current_node = node if col.is_formula() else None
+    self._is_current_node_formula = col.is_formula()

    changes = None
    cleaned = []    # this lists row_ids that can be removed from dirty_rows once we are no
@@ -789,11 +819,16 @@ class Engine(object):
          # For common-case formulas, all cells in a column are likely to fail in the same way,
          # so don't bother trying more from this column until we've reordered.
          return
+
+        making_request = False
        try:
          # We figure out if we've hit a cycle here.  If so, we just let _recompute_on_cell
          # know, so it can set the cell value appropriately and do some other bookkeeping.
          cycle = required and (node, row_id) in self._locked_cells
          value = self._recompute_one_cell(table, col, row_id, cycle=cycle, node=node)
+        except RequestingError:
+          making_request = True
+          value = RequestingError
        except OrderError as e:
          if not required:
            # We're out of order, but for a cell we were evaluating opportunistically.
@@ -822,19 +857,24 @@ class Engine(object):
        if column.is_validation_column_name(col.col_id):
          value = (value in (True, None))

-        # Convert the value, and if needed, set, and include into the returned action.
-        value = col.convert(value)
-        previous = col.raw_get(row_id)
-        if not strict_equal(value, previous):
-          if not changes:
-            changes = self._changes_map.setdefault(node, [])
-          changes.append((row_id, previous, value))
-          col.set(row_id, value)
+        # When the formula raises a RequestingError, leave the existing value in the cell.
+        # The formula will be evaluated again soon when we have a response.
+        if not making_request:
+          # Convert the value, and if needed, set, and include into the returned action.
+          value = col.convert(value)
+          previous = col.raw_get(row_id)
+          if not strict_equal(value, previous):
+            if not changes:
+              changes = self._changes_map.setdefault(node, [])
+            changes.append((row_id, previous, value))
+            col.set(row_id, value)
+
        exclude.add(row_id)
        cleaned.append(row_id)
        self._recompute_done_counter += 1
    finally:
      self._current_node = previous_current_node
+      self._is_current_node_formula = previous_is_current_node_formula
      # Usually dirty_rows refers to self.recompute_map[node], so this modifies both
      dirty_rows -= cleaned

@@ -844,6 +884,43 @@ class Engine(object):
      if not self.recompute_map[node]:
        self.recompute_map.pop(node)

+  def _requesting(self, key, args):
+    """
+    Called by the REQUEST function. If we don't have a response already and we can't
+    synchronously get it from the JS side, then note the request to be made in JS asynchronously
+    and raise RequestingError to indicate that the formula
+    should be evaluated again later when we have a response.
+    """
+    # This will make the formula reevaluate periodically with the UpdateCurrentTime action.
+    # This assumes that the response changes with time and having the latest data is ideal.
+    # We will probably want to reconsider this to avoid making unwanted requests,
+    # along with avoiding refreshing the request when the doc is loaded with the Calculate action.
+    self.use_current_time()
+
+    if key in self._request_responses:
+      # This formula is being reevaluated in a RespondToRequests action, and the response is ready.
+      return self._request_responses[key]
+    elif self._sync_request or key in self._cached_request_keys:
+      # Not always ideal, but in this case the best strategy is to make the request immediately
+      # and block while waiting for a response.
+      return sandbox.call_external("request", key, args)
+
+    # We can't get a response to this request now. Note the request so it can be delegated.
+    table_id, column_id = self._current_node
+    (self.out_actions.requests  # `out_actions.requests` is returned by apply_user_actions
+         # Here is where the request arguments are stored if they haven't been already
+         .setdefault(key, args)
+         # While all this stores the cell that made the request so that it can be invalidated later
+         .setdefault("deps", {})
+         .setdefault(table_id, {})
+         .setdefault(column_id, [])
+         .append(self._current_row_id))
+
+    # As with OrderError, note the exception so it gets raised even if the formula catches it
+    self._cell_required_error = RequestingError()
+
+    raise RequestingError()
+
  def _recompute_one_cell(self, table, col, row_id, cycle=False, node=None):
    """
    Recomputes an one formula cell and returns a value.
@@ -1198,6 +1275,11 @@ class Engine(object):

    self.out_actions = action_obj.ActionGroup()
    self._user = User(user, self.tables) if user else None
+
+    # These should usually be empty, but may be populated by the RespondToRequests action.
+    self._request_responses = {}
+    self._cached_request_keys = set()
+
    checkpoint = self._get_undo_checkpoint()
    try:
      for user_action in user_actions:
@@ -1252,6 +1334,8 @@ class Engine(object):
    self.out_actions.flush_calc_changes()
    self.out_actions.check_sanity()
    self._user = None
+    self._request_responses = {}
+    self._cached_request_keys = set()
    return self.out_actions

  def acl_split(self, action_group):
--- a/sandbox/grist/functions/info.py
+++ b/sandbox/grist/functions/info.py
@@ -3,10 +3,13 @@

 from __future__ import absolute_import
 import datetime
+import hashlib
+import json
 import math
 import numbers
 import re

+import chardet
 import six

 import column
@@ -656,3 +659,91 @@ def is_error(value):
  return ((value is _error_sentinel)
      or isinstance(value, AltText)
      or (isinstance(value, float) and math.isnan(value)))
+
+
+@unimplemented  # exclude from autocomplete while in beta
+def REQUEST(url, params=None, headers=None):
+  # Makes a GET HTTP request with an API similar to `requests.get`.
+  # Actually jumps through hoops internally to make the request asynchronously (usually)
+  # while feeling synchronous to the formula writer.
+
+  # Requests are identified by a string key in various places.
+  # The same arguments should produce the same key so the request is only made once.
+  args = dict(url=url, params=params, headers=headers)
+  args_json = json.dumps(args, sort_keys=True)
+  key = hashlib.sha256(args_json.encode()).hexdigest()
+
+  # This may either return the raw response data or it may raise a special exception
+  # to delegate the request and reevaluate the formula later.
+  response_dict = docmodel.global_docmodel._engine._requesting(key, args)
+
+  if "error" in response_dict:
+    # Indicates a complete failure to make the request, such as a connection problem.
+    # An unsuccessful status code like 404 or 500 doesn't raise this error.
+    raise HTTPError(response_dict["error"])
+
+  return Response(**response_dict)
+
+
+class HTTPError(Exception):
+  pass
+
+
+class Response(object):
+  """
+  Similar to the Response class from the `requests` library.
+  """
+  def __init__(self, content, status, statusText, headers, encoding):
+    self.content = content  # raw bytes
+    self.status_code = status  # e.g. 404
+    self.reason = statusText  # e.g. "Not Found"
+    self.headers = CaseInsensitiveDict(headers)
+    self.encoding = encoding or self.apparent_encoding
+
+  @property
+  def text(self):
+    return self.content.decode(self.encoding)
+
+  def json(self, **kwargs):
+    return json.loads(self.text, **kwargs)
+
+  @property
+  def ok(self):
+    return self.status_code < 400
+
+  def raise_for_status(self):
+    if not self.ok:
+      raise HTTPError("Request failed with status %s" % self.status_code)
+
+  @property
+  def apparent_encoding(self):
+    return chardet.detect(self.content)["encoding"]
+
+  def close(self):
+    pass  # nothing to do
+
+
+class CaseInsensitiveDict(dict):
+  """
+  Similar to dict but treats all keys (which must be strings) case-insensitively,
+  e.g. `d["foo"]` and `d["FOO"]` are equivalent.
+  """
+  def __init__(self, *args, **kwargs):
+    dict.__init__(self, *args, **kwargs)
+    for k in list(self):
+      # Convert key to lowercase
+      self[k] = dict.pop(self, k)
+
+  def update(self, E=None, **F):
+    dict.update(self.__class__(E or {}))
+    dict.update(self.__class__(**F))
+
+
+def _forward_dict_method(name):
+  # Replace method 'name' where the first argument is a key with a version that lowercases the key
+  def method(self, key, *args, **kwargs):
+    return getattr(dict, name)(self, key.lower(), *args, **kwargs)
+  return method
+
+for _name in "__getitem__ __setitem__ __delitem__ __contains__ get setdefault pop has_key".split():
+  setattr(CaseInsensitiveDict, _name, _forward_dict_method(_name))
--- a/sandbox/grist/lookup.py
+++ b/sandbox/grist/lookup.py
@@ -126,7 +126,7 @@ class BaseLookupMapColumn(column.BaseColumn):
    """
    key = tuple(_extract(val) for val in key)
    engine = self._engine
-    if engine._current_node:
+    if engine._is_current_node_formula:
      rel = self._get_relation(engine._current_node)
      rel._add_lookup(engine._current_row_id, key)
    else:
--- a/sandbox/grist/main.py
+++ b/sandbox/grist/main.py
@@ -68,10 +68,13 @@ def run(sandbox):
  @export
  def apply_user_actions(action_reprs, user=None):
    action_group = eng.apply_user_actions([useractions.from_repr(u) for u in action_reprs], user)
-    return dict(
+    result = dict(
      rowCount=eng.count_rows(),
      **eng.acl_split(action_group).to_json_obj()
    )
+    if action_group.requests:
+      result["requests"] = action_group.requests
+    return result

  @export
  def fetch_table(table_id, formulas=True, query=None):
--- a/sandbox/grist/test_requests.py
+++ b/sandbox/grist/test_requests.py
@@ -0,0 +1,136 @@
+# coding=utf-8
+import unittest
+
+import test_engine
+import testutil
+from functions import CaseInsensitiveDict, Response, HTTPError
+
+
+class TestCaseInsensitiveDict(unittest.TestCase):
+  def test_case_insensitive_dict(self):
+    d = CaseInsensitiveDict({"FOO": 1})
+    for key in ["foo", "FOO", "Foo"]:
+      self.assertEqual(d, {"foo": 1})
+      self.assertEqual(list(d), ["foo"])
+      self.assertEqual(d, CaseInsensitiveDict({key: 1}))
+      self.assertIn(key, d)
+      self.assertEqual(d[key], 1)
+      self.assertEqual(d.get(key), 1)
+      self.assertEqual(d.get(key, 2), 1)
+      self.assertEqual(d.get(key + "2", 2), 2)
+      self.assertEqual(d.pop(key), 1)
+      self.assertEqual(d, {})
+      self.assertEqual(d.setdefault(key, 3), 3)
+      self.assertEqual(d, {"foo": 3})
+      self.assertEqual(d.setdefault(key, 4), 3)
+      self.assertEqual(d, {"foo": 3})
+      del d[key]
+      self.assertEqual(d, {})
+      d[key] = 1
+
+
+class TestResponse(unittest.TestCase):
+  def test_ok_response(self):
+    r = Response(b"foo", 200, "OK", {"X-header": "hi"}, None)
+    self.assertEqual(r.content, b"foo")
+    self.assertEqual(r.text, u"foo")
+    self.assertEqual(r.status_code, 200)
+    self.assertEqual(r.ok, True)
+    self.assertEqual(r.reason, "OK")
+    self.assertEqual(r.headers, {"x-header": "hi"})
+    self.assertEqual(r.encoding, "ascii")
+    self.assertEqual(r.apparent_encoding, "ascii")
+    r.raise_for_status()
+    r.close()
+
+  def test_error_response(self):
+    r = Response(b"foo", 500, "Server error", {}, None)
+    self.assertEqual(r.status_code, 500)
+    self.assertEqual(r.ok, False)
+    self.assertEqual(r.reason, "Server error")
+    with self.assertRaises(HTTPError) as cm:
+      r.raise_for_status()
+    self.assertEqual(str(cm.exception), "Request failed with status 500")
+
+  def test_json(self):
+    r = Response(b'{"foo": "bar"}', 200, "OK", {}, None)
+    self.assertEqual(r.json(), {"foo": "bar"})
+
+  def test_encoding_direct(self):
+    r = Response(b"foo", 200, "OK", {}, "some encoding")
+    self.assertEqual(r.encoding, "some encoding")
+    self.assertEqual(r.apparent_encoding, "ascii")
+
+  def test_apparent_encoding(self):
+    text = u"编程"
+    encoding = "utf-8"
+    content = text.encode(encoding)
+    self.assertEqual(content.decode(encoding), text)
+    r = Response(content, 200, "OK", {}, "")
+    self.assertEqual(r.encoding, encoding)
+    self.assertEqual(r.apparent_encoding, encoding)
+    self.assertEqual(r.content, content)
+    self.assertEqual(r.text, text)
+
+
+class TestRequestFunction(test_engine.EngineTestCase):
+  sample = testutil.parse_test_sample({
+    "SCHEMA": [
+      [1, "Table1", [
+        [2, "Request", "Any", True, "$id", "", ""],
+        [3, "Other", "Any", True, "", "", ""],
+      ]],
+    ],
+    "DATA": {
+      "Table1": [
+        ["id"],
+        [1],
+        [2],
+      ],
+    }
+  })
+
+  def test_request_function(self):
+    self.load_sample(self.sample)
+
+    formula = """
+r = REQUEST('my_url', headers={'foo': 'bar'}, params={'b': 1, 'a': 2})
+r.__dict__
+"""
+    out_actions = self.modify_column("Table1", "Request", formula=formula)
+    key = '9d305be9664924aaaf7ebb0bab2e4155d1fa1b9dcde53e417f1a9f9a2c7e09b9'
+    deps = {'Table1': {'Request': [1, 2]}}
+    args = {
+      'url': 'my_url',
+      'headers': {'foo': 'bar'},
+      'params': {'a': 2, 'b': 1},
+      'deps': deps,
+    }
+    self.assertEqual(out_actions.requests, {key: args})
+    self.assertTableData("Table1", cols="subset", data=[
+      ["id", "Request"],
+      [1, 1],
+      [2, 2],
+    ])
+
+    response = {
+      'status': 200,
+      'statusText': 'OK',
+      'content': b'body',
+      'headers': {'h1': 'h2'},
+      'encoding': 'utf16',
+      'deps': deps,
+    }
+    self.apply_user_action(["RespondToRequests", {key: response.copy()}, [key]])
+
+    # Translate names from JS `fetch` API to Python `requests`-style API
+    response["status_code"] = response.pop("status")
+    response["reason"] = response.pop("statusText")
+    # This is sent in the user action but not kept for the response object
+    del response["deps"]
+
+    self.assertTableData("Table1", cols="subset", data=[
+      ["id", "Request"],
+      [1, response],
+      [2, response],
+    ])
--- a/sandbox/grist/useractions.py
+++ b/sandbox/grist/useractions.py
@@ -9,6 +9,7 @@ import six
 from six.moves import xrange

 import acl
+import depend
 import gencode
 from acl_formula import parse_acl_formula_json
 import actions
@@ -331,6 +332,25 @@ class UserActions(object):
    """
    self._engine.update_current_time()

+  @useraction
+  def RespondToRequests(self, responses, cached_keys):
+    """
+    Reevaluate formulas which called the REQUEST function using the now available responses.
+    """
+    engine = self._engine
+
+    # The actual raw responses which will be returned to the REQUEST function
+    engine._request_responses = responses
+    # Keys for older requests which are stored in files and can be retrieved synchronously
+    engine._cached_request_keys = set(cached_keys)
+
+    # Invalidate the exact cells which made the exact requests which are being responded to here.
+    for response in six.itervalues(responses):
+      for table_id, table_deps in six.iteritems(response.pop("deps")):
+        for col_id, row_ids in six.iteritems(table_deps):
+          node = depend.Node(table_id, col_id)
+          engine.dep_graph.invalidate_deps(node, row_ids, engine.recompute_map)
+
  #----------------------------------------
  # User actions on records.
  #----------------------------------------