(core) Exit more cleanly on unhandled errors, and handle errors writing to Clients.

Summary: - Node has a strong recommendation to assume bad state and exit promptly on unhandled exceptions and rejections. We follow it, and only make an effort to clean up before exiting, and to log the error in a more standard way. - The only case seen in recent month of an unhandled rejection was for attempting to write overly large JSON to a Client websocket. Ensure that's handled, and add a test case that artificially reproduces this scenario. Test Plan: Added a test case for failing write to Client, and a test case that unhandled errors indeed kill the server but with an attempt at cleanup. Reviewers: georgegevoian Reviewed By: georgegevoian Differential Revision: https://phab.getgrist.com/D4124
2026-03-02 04:09:24 +00:00 · 2023-11-30 14:08:46 -05:00
parent d89e008a75
commit 4d9bbf6263
12 changed files with 263 additions and 53 deletions
--- a/test/server/Comm.ts
+++ b/test/server/Comm.ts
@@ -11,7 +11,7 @@ import * as tmp from 'tmp';
 import {GristWSConnection, GristWSSettings} from 'app/client/components/GristWSConnection';
 import {Comm as ClientComm} from 'app/client/components/Comm';
 import * as log from 'app/client/lib/log';
-import {Comm, sendDocMessage} from 'app/server/lib/Comm';
+import {Comm} from 'app/server/lib/Comm';
 import {Client, ClientMethod} from 'app/server/lib/Client';
 import {CommClientConnect} from 'app/common/CommTypes';
 import {delay} from 'app/common/delay';
@@ -73,8 +73,8 @@ describe('Comm', function() {
      return {x: x, y: y, name: "methodAsync"};
    },
    methodSend: async function(client, docFD) {
-      sendDocMessage(client, docFD, "fooType" as any, "foo");
-      sendDocMessage(client, docFD, "barType" as any, "bar");
+      void(client.sendMessage({docFD, type: "fooType" as any, data: "foo"}));
+      void(client.sendMessage({docFD, type: "barType" as any, data: "bar"}));
    }
  };

--- a/test/server/lib/ManyFetches.ts
+++ b/test/server/lib/ManyFetches.ts
@@ -7,6 +7,7 @@ import * as log from 'app/server/lib/log';
 import {getGristConfig} from 'test/gen-server/testUtils';
 import {prepareDatabase} from 'test/server/lib/helpers/PrepareDatabase';
 import {TestServer} from 'test/server/lib/helpers/TestServer';
+import {waitForIt} from 'test/server/wait';
 import {createTestDir, EnvironmentSnapshot, setTmpLogLevel} from 'test/server/testUtils';
 import {assert} from 'chai';
 import * as cookie from 'cookie';
@@ -65,7 +66,7 @@ describe('ManyFetches', function() {
    // memory use limited in Client.ts by jsonMemoryPool.

    // Reduce the limit controlling memory for JSON responses from the default of 500MB to 50MB.
-    await docs.testingHooks.commSetClientJsonMemoryLimit(50 * 1024 * 1024);
+    await docs.testingHooks.commSetClientJsonMemoryLimits({totalSize: 50 * 1024 * 1024});

    // Create a large document where fetches would have a noticeable memory footprint.
    // 40k rows should produce ~2MB fetch response.
@@ -133,6 +134,59 @@ describe('ManyFetches', function() {
    }
  });

+  it('should cope gracefully when client messages fail', async function() {
+    // It used to be that sending data to the client could produce uncaught errors (in particular,
+    // for exceeding V8 JSON limits). This test case fakes errors to make sure they get handled.
+
+    // Create a document, initially empty. We'll add lots of rows later.
+    const {docId} = await createLargeDoc({rows: 0});
+
+    // If the server dies, testingHooks calls may hang. This wrapper prevents that.
+    const serverErrorPromise = docs.getExitPromise().then(() => { throw new Error("server exited"); });
+
+    // Make a connection.
+    const createConnectionFunc = await prepareGristWSConnection(docId);
+    const connectionA = createConnectionFunc();
+    const fetcherA = await connect(connectionA, docId);
+
+    // We'll expect 20k rows, taking up about 1MB. Set a lower limit for a fake exception.
+    const prev = await docs.testingHooks.commSetClientJsonMemoryLimits({
+      jsonResponseReservation: 100 * 1024,
+      maxReservationSize: 200 * 1024,
+    });
+
+    try {
+      // Adding lots of rows will produce an action that gets sent to the connected client.
+      // We've arranged for this send to fail. Promise.race helps notice if the server exits.
+      assert.equal(connectionA.established, true);
+      await Promise.race([serverErrorPromise, addRows(docId, 20_000, 20_000)]);
+
+      // Check that the send in fact failed, and the connection did get interrupted.
+      await waitForIt(() =>
+        assert.equal(connectionA.established, false, "Failed message should interrupt connection"),
+        1000, 100);
+
+      // Restore limits, so that fetch works below.
+      await docs.testingHooks.commSetClientJsonMemoryLimits(prev);
+
+      // Fetch data to make sure that the "addRows" call itself succeeded.
+      const connectionB = createConnectionFunc();
+      const fetcherB = await connect(connectionB, docId);
+      try {
+        fetcherB.startPausedFetch();
+        const data = await Promise.race([serverErrorPromise, fetcherB.completeFetch()]);
+        assert.lengthOf(data.tableData[2], 20_000);
+        assert.lengthOf(data.tableData[3].Num, 20_000);
+        assert.lengthOf(data.tableData[3].Text, 20_000);
+      } finally {
+        fetcherB.end();
+      }
+
+    } finally {
+      fetcherA.end();
+    }
+  });
+
  // Creates a document with the given number of rows, and about 50 bytes per row.
  async function createLargeDoc({rows}: {rows: number}): Promise<{docId: string}> {
    log.info("Preparing a doc of %s rows", rows);
@@ -142,7 +196,11 @@ describe('ManyFetches', function() {
      {id: 'Num', type: 'Numeric'},
      {id: 'Text', type: 'Text'}
    ]]]);
-    const chunk = 10_000;
+    await addRows(docId, rows);
+    return {docId};
+  }
+
+  async function addRows(docId: string, rows: number, chunk = 10_000): Promise<void> {
    for (let i = 0; i < rows; i += chunk) {
      const currentNumRows = Math.min(chunk, rows - i);
      await userApi.getDocAPI(docId).addRows('TestTable', {
@@ -152,7 +210,6 @@ describe('ManyFetches', function() {
        Text: Array.from(Array(currentNumRows), (_, n) => `Hello, world, again for the ${i + n}th time.`),
      });
    }
-    return {docId};
  }

  // Get all the info for how to create a GristWSConnection, and returns a connection-creating
--- a/test/server/lib/UnhandledErrors.ts
+++ b/test/server/lib/UnhandledErrors.ts
@@ -0,0 +1,54 @@
+import {prepareDatabase} from 'test/server/lib/helpers/PrepareDatabase';
+import {TestServer} from 'test/server/lib/helpers/TestServer';
+import {createTestDir, setTmpLogLevel} from 'test/server/testUtils';
+import {waitForIt} from 'test/server/wait';
+import {assert} from 'chai';
+import fetch from 'node-fetch';
+import {PassThrough} from 'stream';
+
+/**
+ * Grist sticks to the Node 18 default and recommendation to give up and exit on uncaught
+ * exceptions, unhandled promise rejections, and unhandled 'error' events. But it makes an effort
+ * to clean up before exiting, and to log the error in a better way.
+ */
+describe('UnhandledErrors', function() {
+  this.timeout(30000);
+
+  setTmpLogLevel('warn');
+
+  let testDir: string;
+
+  before(async function() {
+    testDir = await createTestDir('UnhandledErrors');
+    await prepareDatabase(testDir);
+  });
+
+  for (const errType of ['exception', 'rejection', 'error-event']) {
+    it(`should clean up on unhandled ${errType}`, async function() {
+      // Capture server log output, so that we can look to see how the server coped.
+      const output = new PassThrough();
+      const serverLogLines: string[] = [];
+      output.on('data', (data) => serverLogLines.push(data.toString()));
+
+      const server = await TestServer.startServer('home', testDir, errType, undefined, undefined, {output});
+
+      try {
+        assert.equal((await fetch(`${server.serverUrl}/status`)).status, 200);
+        serverLogLines.length  = 0;
+
+        // Trigger an unhandled error, and check that the server logged it and attempted cleanup.
+        await server.testingHooks.tickleUnhandledErrors(errType);
+        await waitForIt(() => {
+          assert.isTrue(serverLogLines.some(line => new RegExp(`Fake ${errType}`).test(line)));
+          assert.isTrue(serverLogLines.some(line => /Server .* cleaning up/.test(line)));
+        }, 1000, 100);
+
+        // We expect the server to be dead now.
+        await assert.isRejected(fetch(`${server.serverUrl}/status`), /failed.*ECONNREFUSED/);
+
+      } finally {
+        await server.stop();
+      }
+    });
+  }
+});
--- a/test/server/lib/helpers/TestServer.ts
+++ b/test/server/lib/helpers/TestServer.ts
@@ -9,6 +9,7 @@ import {exitPromise} from "app/server/lib/serverUtils";
 import log from "app/server/lib/log";
 import {delay} from "bluebird";
 import fetch from "node-fetch";
+import {Writable} from "stream";

 /**
 * This starts a server in a separate process.
@@ -20,10 +21,11 @@ export class TestServer {
    suitename: string,
    customEnv?: NodeJS.ProcessEnv,
    _homeUrl?: string,
+    options: {output?: Writable} = {},      // Pipe server output to the given stream
  ): Promise<TestServer> {

    const server = new TestServer(serverTypes, tempDirectory, suitename);
-    await server.start(_homeUrl, customEnv);
+    await server.start(_homeUrl, customEnv, options);
    return server;
  }

@@ -55,14 +57,18 @@ export class TestServer {
      ...process.env
    };
  }
-  public async start(_homeUrl?: string, customEnv?: NodeJS.ProcessEnv) {
+  public async start(_homeUrl?: string, customEnv?: NodeJS.ProcessEnv, options: {output?: Writable} = {}) {
    // put node logs into files with meaningful name that relate to the suite name and server type
    const fixedName = this._serverTypes.replace(/,/, '_');
    const nodeLogPath = path.join(this.rootDir, `${this._suiteName}-${fixedName}-node.log`);
    const nodeLogFd = await fse.open(nodeLogPath, 'a');
-    const serverLog = process.env.VERBOSE ? 'inherit' : nodeLogFd;
+    const serverLog = options.output ? 'pipe' : (process.env.VERBOSE ? 'inherit' : nodeLogFd);
    // use a path for socket that relates to suite name and server types
    this.testingSocket = path.join(this.rootDir, `${this._suiteName}-${fixedName}.socket`);
+    if (this.testingSocket.length >= 108) {
+      // Unix socket paths typically can't be longer than this. Who knew. Make the error obvious.
+      throw new Error(`Path of testingSocket too long: ${this.testingSocket.length} (${this.testingSocket})`);
+    }
    const env = {
      APP_HOME_URL: _homeUrl,
      GRIST_TESTING_SOCKET: this.testingSocket,
@@ -74,6 +80,11 @@ export class TestServer {
      env,
      stdio: ['inherit', serverLog, serverLog]
    });
+    if (options.output) {
+      this._server.stdout!.pipe(options.output);
+      this._server.stderr!.pipe(options.output);
+    }
+
    this._exitPromise = exitPromise(this._server);

    // Try to be more helpful when server exits by printing out the tail of its log.