add a pyodide-based "sandbox" flavor (#437)

This adds a new `GRIST_SANDBOX_FLAVOR=pyodide` option where the version of Python used for the data engine is wasm, and so can be run by node like the rest of the back end. It still runs as a separate process. There are a few small version changes made to packages to avoid various awkwardnesses present in the current versions. All existing tests pass. This is very experimental. To use, you'll need something with a bash shell and make. First do: ``` cd sandbox/pyodide make setup # README.md and Makefile have details cd .. ``` Then running Grist as: ``` GRIST_SANDBOX_FLAVOR=pyodide yarn start ``` should work. Adding a formula with content: ``` import sys; return sys.version ``` should return a different Python version than other sandboxes. The motivation for this work is to have a form of sandboxing that will work on Windows for Grist Electron (for Linux we have gvisor/runsc, for Mac we have sandbox-exec, but I haven't found anything comparable for Windows). It also brings a back-end-free version of Grist a bit closer, for use-cases where that would make sense - such as serving a report (in the form of a Grist document) on a static site.
2026-03-02 04:09:24 +00:00 · 2023-03-06 16:56:25 -05:00
parent a1259139f6
commit 66643a5e6b
12 changed files with 497 additions and 53 deletions
--- a/sandbox/grist/sandbox.py
+++ b/sandbox/grist/sandbox.py
@@ -14,6 +14,32 @@ import marshal
 import sys
 import traceback

+class CarefulReader(object):
+  """
+  Wrap a pipe when reading from Pyodide, to work around marshaling
+  panicking if fewer bytes are read in a block than it was expecting.
+  Just wait for more.
+  """
+
+  def __init__(self, file_):
+    self._file = file_
+
+  def write(self, data):
+    return self._file.write(data)
+
+  def read(self, size):
+    return self._file.read(size)
+
+  def readinto(self, b):
+    result = self._file.readinto(b)
+    while result is not None and result < len(b):
+      bview = memoryview(b)
+      result += self._file.readinto(bview[result:])
+    return result
+
+  def __getattr__(self, attr):
+    return getattr(self._file, attr)
+
 def log(msg):
  sys.stderr.write(str(msg) + "\n")
  sys.stderr.flush()
@@ -23,22 +49,25 @@ class Sandbox(object):
  This class works in conjunction with Sandbox.js to allow function calls
  between the Node process and this sandbox.

-  The sandbox provides two pipes (on fds 3 and 4) to send data to and from the sandboxed
+  The sandbox provides two pipes to send data to and from the sandboxed
  process. Data on these is serialized using `marshal` module. All messages are comprised of a
  msgCode followed immediatedly by msgBody, with the following msgCodes:
    CALL = call to the other side. The data must be an array of [func_name, arguments...]
    DATA = data must be a value to return to a call from the other side
    EXC = data must be an exception to return to a call from the other side
+
+  Optionally, a callback can be supplied instead of an output pipe.
  """

  CALL = None
  DATA = True
  EXC = False

-  def __init__(self, external_input, external_output):
+  def __init__(self, external_input, external_output, external_output_method=None):
    self._functions = {}
    self._external_input = external_input
    self._external_output = external_output
+    self._external_output_method = external_output_method

  @classmethod
  def connected_to_js_pipes(cls):
@@ -62,6 +91,14 @@ class Sandbox(object):
    sys.stdout = sys.stderr
    return Sandbox.connected_to_js_pipes()

+  @classmethod
+  def use_pyodide(cls):
+    import js  # Get pyodide object.
+    external_input = CarefulReader(sys.stdin.buffer)
+    external_output_method = lambda data: js.sendFromSandbox(data)
+    sys.stdout = sys.stderr
+    return cls(external_input, None, external_output_method)
+
  def _send_to_js(self, msgCode, msgBody):
    # (Note that marshal version 2 is the default; we specify it explicitly for clarity. The
    # difference with version 0 is that version 2 uses a faster binary format for floats.)
@@ -70,8 +107,14 @@ class Sandbox(object):
    # It's much better to ensure the whole blob is sent as one write. We marshal the resulting
    # buffer again so that the reader can quickly tell how many bytes to expect.
    buf = marshal.dumps((msgCode, msgBody), 2)
-    marshal.dump(buf, self._external_output, 2)
-    self._external_output.flush()
+    if self._external_output:
+      marshal.dump(buf, self._external_output, 2)
+      self._external_output.flush()
+    elif self._external_output_method:
+      buf = marshal.dumps(buf, 2)
+      self._external_output_method(buf)
+    else:
+      raise Exception('no data output method')

  def call_external(self, name, *args):
    self._send_to_js(Sandbox.CALL, (name,) + args)
@@ -115,6 +158,8 @@ def get_default_sandbox():
  if default_sandbox is None:
    if os.environ.get('PIPE_MODE') == 'minimal':
      default_sandbox = Sandbox.use_common_pipes()
+    elif os.environ.get('PIPE_MODE') == 'pyodide':
+      default_sandbox = Sandbox.use_pyodide()
    else:
      default_sandbox = Sandbox.connected_to_js_pipes()
  return default_sandbox
--- a/sandbox/grist/test_renames2.py
+++ b/sandbox/grist/test_renames2.py
@@ -189,23 +189,21 @@ class TestRenames2(test_engine.EngineTestCase):

  def test_renames_b(self):
    # Rename Games.name: affects People.Games_Won, Games.win4_game_name
-    # TODO: win4_game_name isn't updated due to astroid avoidance of looking up the same attr on
-    # the same class during inference.
    out_actions = self.apply_user_action(["RenameColumn", "Games", "name", "nombre"])
    self.assertPartialOutActions(out_actions, { "stored": [
      ["RenameColumn", "Games", "name", "nombre"],
      ["ModifyColumn", "People", "Games_Won", {
        "formula": "' '.join(e.game.nombre for e in Entries.lookupRecords(person=$id, rank=1))"
      }],
-      ["BulkUpdateRecord", "_grist_Tables_column", [4, 12], {
-        "colId": ["nombre", "Games_Won"],
+      ["ModifyColumn", "Games", "win4_game_name", {"formula": "$win.win.win.win.nombre"}],
+      ["BulkUpdateRecord", "_grist_Tables_column", [4, 12, 19], {
+        "colId": ["nombre", "Games_Won", "win4_game_name"],
        "formula": [
-          "", "' '.join(e.game.nombre for e in Entries.lookupRecords(person=$id, rank=1))"]
-      }],
-      ["BulkUpdateRecord", "Games", [1, 2, 3, 4], {
-        "win4_game_name": [["E", "AttributeError"], ["E", "AttributeError"],
-          ["E", "AttributeError"], ["E", "AttributeError"]]
-      }],
+          "",
+          "' '.join(e.game.nombre for e in Entries.lookupRecords(person=$id, rank=1))",
+          "$win.win.win.win.nombre"
+        ]
+      }]
    ]})

    # Fix up things missed due to the TODOs above.
@@ -264,22 +262,16 @@ class TestRenames2(test_engine.EngineTestCase):

  def test_renames_d(self):
    # Rename People.name: affects People.N, People.ParnerNames
-    # TODO: win3_person_name ($win.win.win.name) does NOT get updated correctly with astroid
-    # because of a limitation in astroid inference: it refuses to look up the same attr on the
-    # same class during inference (in order to protect against too much recursion).
    # TODO: PartnerNames does NOT get updated correctly because astroid doesn't infer meanings of
    # lists very well.
    out_actions = self.apply_user_action(["RenameColumn", "People", "name", "nombre"])
    self.assertPartialOutActions(out_actions, { "stored": [
      ["RenameColumn", "People", "name", "nombre"],
      ["ModifyColumn", "People", "N", {"formula": "$nombre.upper()"}],
-      ["BulkUpdateRecord", "_grist_Tables_column", [2, 11], {
-        "colId": ["nombre", "N"],
-        "formula": ["", "$nombre.upper()"]
-      }],
-      ["BulkUpdateRecord", "Games", [1, 2, 3, 4], {
-        "win3_person_name": [["E", "AttributeError"], ["E", "AttributeError"],
-          ["E", "AttributeError"], ["E", "AttributeError"]]
+      ["ModifyColumn", "Games", "win3_person_name", {"formula": "$win.win.win.nombre"}],
+      ["BulkUpdateRecord", "_grist_Tables_column", [2, 11, 18], {
+        "colId": ["nombre", "N", "win3_person_name"],
+        "formula": ["", "$nombre.upper()", "$win.win.win.nombre"]
      }],
      ["BulkUpdateRecord", "People", [1, 2, 3, 4, 5], {
        "PartnerNames": [["E", "AttributeError"], ["E", "AttributeError"],
@@ -287,8 +279,7 @@ class TestRenames2(test_engine.EngineTestCase):
      }],
    ]})

-    # Fix up things missed due to the TODOs above.
-    self.modify_column("Games", "win3_person_name", formula="$win.win.win.nombre")
+    # Fix up things missed due to the TODO above.
    self.modify_column("People", "PartnerNames",
                       formula=self.partner_names.replace("name", "nombre"))

@@ -305,21 +296,14 @@ class TestRenames2(test_engine.EngineTestCase):
    self.assertPartialOutActions(out_actions, { "stored": [
      ["RenameColumn", "People", "partner", "companero"],
      ["ModifyColumn", "People", "partner4", {
-        "formula": "$companero.companero.partner.partner"
+        "formula": "$companero.companero.companero.companero"
      }],
      ["BulkUpdateRecord", "_grist_Tables_column", [14, 15], {
        "colId": ["companero", "partner4"],
-        "formula": [self.partner, "$companero.companero.partner.partner"]
-      }],
-      ["BulkUpdateRecord", "People", [1, 2, 3, 4, 5], {
-        "partner4": [["E", "AttributeError"], ["E", "AttributeError"],
-          ["E", "AttributeError"], ["E", "AttributeError"], ["E", "AttributeError"]]
-      }],
+        "formula": [self.partner, "$companero.companero.companero.companero"]
+      }]
    ]})

-    # Fix up things missed due to the TODOs above.
-    self.modify_column("People", "partner4", formula="$companero.companero.companero.companero")
-
    _replace_col_name(self.people_data, "partner", "companero")
    self.assertTableData("People", cols="subset", data=self.people_data)
    self.assertTableData("Games", cols="subset", data=self.games_data)
@@ -331,21 +315,13 @@ class TestRenames2(test_engine.EngineTestCase):
    self.assertPartialOutActions(out_actions, { "stored": [
      ["RenameColumn", "People", "win", "pwin"],
      ["ModifyColumn", "Games", "win3_person_name", {"formula": "$win.pwin.win.name"}],
-      # TODO: the omission of the 4th win's update is due to the same astroid bug mentioned above.
-      ["ModifyColumn", "Games", "win4_game_name", {"formula": "$win.pwin.win.win.name"}],
+      ["ModifyColumn", "Games", "win4_game_name", {"formula": "$win.pwin.win.pwin.name"}],
      ["BulkUpdateRecord", "_grist_Tables_column", [16, 18, 19], {
        "colId": ["pwin", "win3_person_name", "win4_game_name"],
        "formula": ["Entries.lookupOne(person=$id, rank=1).game",
-                    "$win.pwin.win.name", "$win.pwin.win.win.name"]}],
-      ["BulkUpdateRecord", "Games", [1, 2, 3, 4], {
-        "win4_game_name": [["E", "AttributeError"], ["E", "AttributeError"],
-          ["E", "AttributeError"], ["E", "AttributeError"]]
-      }],
+                    "$win.pwin.win.name", "$win.pwin.win.pwin.name"]}],
    ]})

-    # Fix up things missed due to the TODOs above.
-    self.modify_column("Games", "win4_game_name", formula="$win.pwin.win.pwin.name")
-
    _replace_col_name(self.people_data, "win", "pwin")
    self.assertTableData("People", cols="subset", data=self.people_data)
    self.assertTableData("Games", cols="subset", data=self.games_data)