(core) Add test_replay for easily replaying data sent to the sandbox purely within python

Summary: Run JS with a value for SANDBOX_BUFFERS_DIR, then run test_replay in python with the same value to replay just the python code. See test_replay.py for more info. Test Plan: Record some data, e.g. `SANDBOX_BUFFERS_DIR=manual npm start` or `SANDBOX_BUFFERS_DIR=server ./test/testrun.sh server`. Then run `SANDBOX_BUFFERS_DIR=server python -m unittest test_replay` from within `core/sandbox/grist` to replay the input from the JS. Sample of the output will look like this: ``` Checking /tmp/sandbox_buffers/server/2021-06-16T15:13:59.958Z True Checking /tmp/sandbox_buffers/server/2021-06-16T15:16:37.170Z True Checking /tmp/sandbox_buffers/server/2021-06-16T15:14:22.378Z True ``` Reviewers: paulfitz, dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D2866
2025-06-13 20:53:59 +00:00 · 2021-06-30 15:35:05 +02:00 · 2021-06-30 15:35:05 +02:00 · 84ddbc448b
commit 84ddbc448b
parent b537539b73
4 changed files with 145 additions and 20 deletions
--- a/app/server/lib/NSandbox.ts
+++ b/app/server/lib/NSandbox.ts
@ -12,6 +12,7 @@ import {ChildProcess, spawn, SpawnOptions} from 'child_process';
 import * as path from 'path';
 import {Stream, Writable} from 'stream';
 import * as _ from 'lodash';
 import * as fs from "fs";
 type SandboxMethod = (...args: any[]) => any;
@ -33,6 +34,10 @@ type ResolveRejectPair = [(value?: any) => void, (reason?: unknown) => void];
 // Type for basic message identifiers, available as constants in sandboxUtil.
 type MsgCode = null | true | false;
 // Optional root folder to store binary data sent to and from the sandbox
 // See test_replay.py
 const recordBuffersRoot = process.env.RECORD_SANDBOX_BUFFERS_DIR;
 export class NSandbox implements ISandbox {
  /**
   * Helper function to run the nacl sandbox. It takes care of most arguments, similarly to
@ -92,6 +97,9 @@ export class NSandbox implements ISandbox {
  private _throttle: Throttle | undefined;
  // Create a unique subdirectory for each sandbox process so they can be replayed separately
  private _recordBuffersDir = recordBuffersRoot ? path.resolve(recordBuffersRoot, new Date().toISOString()) : null;
  /*
   * Callers may listen to events from sandbox.childProc (a ChildProcess), e.g. 'close' and 'error'.
   * The sandbox listens for 'aboutToExit' event on the process, to properly shut down.
@ -136,6 +144,11 @@ export class NSandbox implements ISandbox {
        logMeta: this._logMeta,
      });
    }
    if (this._recordBuffersDir) {
      log.rawDebug(`Recording sandbox buffers in ${this._recordBuffersDir}`, this._logMeta);
      fs.mkdirSync(this._recordBuffersDir, {recursive: true});
    }
  }
  /**
@ -231,7 +244,11 @@ export class NSandbox implements ISandbox {
    }
    this._marshaller.marshal(msgCode);
    this._marshaller.marshal(data);
-    return this._streamToSandbox.write(this._marshaller.dumpAsBuffer());
+    const buf = this._marshaller.dumpAsBuffer();
    if (this._recordBuffersDir) {
      fs.appendFileSync(path.resolve(this._recordBuffersDir, "input"), buf);
    }
    return this._streamToSandbox.write(buf);
  }
@ -241,6 +258,9 @@ export class NSandbox implements ISandbox {
  private _onSandboxData(data: any) {
    this._unmarshaller.parse(data, buf => {
      const value = marshal.loads(buf, { bufferToString: true });
      if (this._recordBuffersDir) {
        fs.appendFileSync(path.resolve(this._recordBuffersDir, "output"), buf);
      }
      this._onSandboxMsg(value[0], value[1]);
    });
  }
--- a/sandbox/grist/main.py
+++ b/sandbox/grist/main.py
@ -13,7 +13,7 @@ import six
 from acl_formula import parse_acl_formula
 import actions
-import sandbox
+from sandbox import Sandbox
 import engine
 import migrations
 import schema
@ -23,15 +23,6 @@ import objtypes
 import logger
 log = logger.Logger(__name__, logger.INFO)
 def export(method):
  # Wrap each method so that it logs a message that it's being called.
  @functools.wraps(method)
  def wrapper(*args, **kwargs):
    log.debug("calling %s" % method.__name__)
    return method(*args, **kwargs)
  sandbox.register(method.__name__, wrapper)
 def table_data_from_db(table_name, table_data_repr):
  if table_data_repr is None:
    return actions.TableData(table_name, [], {})
@ -51,9 +42,18 @@ def _decode_db_value(value):
  else:
    return value
-def main():
+def run(sandbox):
  eng = engine.Engine()
  def export(method):
    # Wrap each method so that it logs a message that it's being called.
    @functools.wraps(method)
    def wrapper(*args, **kwargs):
      log.debug("calling %s" % method.__name__)
      return method(*args, **kwargs)
    sandbox.register(method.__name__, wrapper)
  @export
  def apply_user_actions(action_reprs):
    action_group = eng.apply_user_actions([useractions.from_repr(u) for u in action_reprs])
@ -114,5 +114,9 @@ def main():
  sandbox.run()
 def main():
  sandbox = Sandbox.connected_to_js_pipes()
  run(sandbox)
 if __name__ == "__main__":
  main()
--- a/sandbox/grist/sandbox.py
+++ b/sandbox/grist/sandbox.py
@ -11,7 +11,6 @@ Usage:
 import os
 import marshal
 import signal
 import sys
 import traceback
@ -36,10 +35,16 @@ class Sandbox(object):
  DATA = True
  EXC = False
-  def __init__(self):
+  def __init__(self, external_input, external_output):
    self._functions = {}
-    self._external_input = os.fdopen(3, "rb", 64*1024)
+    self._external_input = external_input
-    self._external_output = os.fdopen(4, "wb", 64*1024)
+    self._external_output = external_output
  @classmethod
  def connected_to_js_pipes(cls):
    external_input = os.fdopen(3, "rb", 64 * 1024)
    external_output = os.fdopen(4, "wb", 64 * 1024)
    return cls(external_input, external_output)
  def _send_to_js(self, msgCode, msgBody):
    # (Note that marshal version 2 is the default; we specify it explicitly for clarity. The
@ -87,14 +92,19 @@ class Sandbox(object):
    if break_on_response:
      raise Exception("Sandbox disconnected unexpectedly")
 default_sandbox = None
-sandbox = Sandbox()
+def get_default_sandbox():
  global default_sandbox
  if default_sandbox is None:
    default_sandbox = Sandbox.connected_to_js_pipes()
  return default_sandbox
 def call_external(name, *args):
-  return sandbox.call_external(name, *args)
+  return get_default_sandbox().call_external(name, *args)
 def register(func_name, func):
-  sandbox.register(func_name, func)
+  get_default_sandbox().register(func_name, func)
 def run():
-  sandbox.run()
+  get_default_sandbox().run()
--- a/sandbox/grist/test_replay.py
+++ b/sandbox/grist/test_replay.py
@ -0,0 +1,91 @@
 """
 Replay binary data sent from JS to reproduce behaviour in the sandbox.
 This isn't really a test and it doesn't run under normal circumstances,
 but it's convenient to run it alongside other tests to measure total coverage.
 This is a tool to directly run some python code of interest to make it easier to do things like:
 - Use a debugger within Python
 - Measure Python code coverage from JS tests
 - Rapidly iterate on Python code without having to repeatedly run the same JS
    or write a Python test from scratch.
 To use this, first set the environment variable RECORD_SANDBOX_BUFFERS_DIR to a directory path,
 then run some JS code. For example you could run some tests,
 or run `npm start` and then manually interact with a document in a way that triggers
 desired behaviour in the sandbox.
 This will store files like $RECORD_SANDBOX_BUFFERS_DIR/<subdirectory>/(input|output)
 Each subdirectory corresponds to a single sandbox process so that replays are isolated.
 JS tests can start many instances of the sandbox and thus create many subdirectories.
 `input` contains the binary data sent from JS to Python, `output` contains the data sent back.
 Currently, the name of each subdirectory is the time it was created.
 Now run this test with the same value of RECORD_SANDBOX_BUFFERS_DIR. For each subdirectory,
 it will read in `input` just as it would read the pipe from JS, and send output to a file
 `new_output` in the same subdirectory. Then it will compare the data in `output` and `new_output`.
 The outputs will usually match but there are many reasons they might differ:
 - Functions registered in JS tests (e.g. via plugins) but not in the python unit tests.
 - File paths in tracebacks.
 - Slight differences between standard and NaCl interpreters.
 - Functions involving randomness or time.
 In any case the point is usually not whether or not the outputs match, but to directly run
 just the python code of interest.
 """
 from __future__ import print_function
 import marshal
 import os
 import unittest
 from main import run
 from sandbox import Sandbox
 def marshal_load_all(path):
  result = []
  with open(path, "rb") as f:
    while True:
      try:
        result.append(marshal.load(f))
      except EOFError:
        break
  return result
 class TestReplay(unittest.TestCase):
  maxDiff = None
  def test_replay(self):
    root = os.environ.get("RECORD_SANDBOX_BUFFERS_DIR")
    if not root:
      self.skipTest("RECORD_SANDBOX_BUFFERS_DIR not set")
    for dirpath, dirnames, filenames in os.walk(root):
      if "input" not in filenames:
        continue
      print("Checking " + dirpath)
      input_path = os.path.join(dirpath, "input")
      output_path = os.path.join(dirpath, "output")
      new_output_path = os.path.join(dirpath, "new_output")
      with open(input_path, "rb") as external_input:
        with open(new_output_path, "wb") as external_output:
          sandbox = Sandbox(external_input, external_output)
          run(sandbox)
      original_output = marshal_load_all(output_path)
      # _send_to_js does two layers of marshalling,
      # and NSandbox._onSandboxData parses one of those layers before writing,
      # hence original_output is 'more parsed' than marshal_load_all(new_output_path)
      new_output = [marshal.loads(b) for b in marshal_load_all(new_output_path)]
      # It's usually not worth asserting a match, see comments at the top of the file
      print("Match:", original_output == new_output)
      # self.assertEqual(original_output, new_output)