(core) sanitizing redis errors

Summary: sanitazing errors output in webhooks to protect users data (not show them in logs and other places). Because redis is returing whole payload when error occur, best approach is to hijack exception as close to redis operation as posible and sanitize the data. We need to know data structure do do this corretly tho. Currently I decided to just censore everything that has "payload" key. Test Plan: Because logs that need to be sanitized come from redis, to be valid tested we should force redis to crash. It's hard to do in our integration test setup. In this moment, unit test is all we got. Reviewers: paulfitz Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D3905
2025-06-13 20:53:59 +00:00 · 2023-06-01 16:00:52 +02:00 · 2023-06-01 16:00:52 +02:00 · 90e902c10f
commit 90e902c10f
parent 0fb5092e33
3 changed files with 166 additions and 3 deletions
--- a/app/server/lib/Triggers.ts
+++ b/app/server/lib/Triggers.ts
@ -1,4 +1,5 @@
 import {LocalActionBundle} from 'app/common/ActionBundle';
+import {summarizeAction} from 'app/common/ActionSummarizer';
 import {ActionSummary, TableDelta} from 'app/common/ActionSummary';
 import {ApiError} from 'app/common/ApiError';
 import {MapWithTTL} from 'app/common/AsyncCreate';
@ -8,13 +9,13 @@ import {MetaRowRecord} from 'app/common/TableData';
 import {CellDelta} from 'app/common/TabularDiff';
 import {WebhookBatchStatus, WebhookStatus, WebhookSummary, WebhookUsage} from 'app/common/Triggers';
 import {decodeObject} from 'app/plugin/objtypes';
-import {summarizeAction} from 'app/common/ActionSummarizer';
 import {ActiveDoc} from 'app/server/lib/ActiveDoc';
 import {makeExceptionalDocSession} from 'app/server/lib/DocSession';
 import log from 'app/server/lib/log';
+import {proxyAgent} from 'app/server/lib/ProxyAgent';
 import {matchesBaseDomain} from 'app/server/lib/requestUtils';
 import {delayAbort} from 'app/server/lib/serverUtils';
-import {proxyAgent} from 'app/server/lib/ProxyAgent';
+import {LogSanitizer} from "app/server/utils/LogSanitizer";
 import {promisifyAll} from 'bluebird';
 import * as _ from 'lodash';
 import {AbortController, AbortSignal} from 'node-abort-controller';
@ -126,6 +127,7 @@ export class DocTriggers {
  private _loopAbort: AbortController|undefined;

  private _stats: WebhookStatistics;
+  private _sanitizer = new LogSanitizer();

  constructor(private _activeDoc: ActiveDoc) {
    const redisUrl = process.env.REDIS_URL;
@ -396,7 +398,15 @@ export class DocTriggers {

  private async _pushToRedisQueue(events: WebHookEvent[]) {
    const strings = events.map(e => JSON.stringify(e));
-    await this._redisClient?.rpushAsync(this._redisQueueKey, ...strings);
+    try {
+      await this._redisClient?.rpushAsync(this._redisQueueKey, ...strings);
+    }
+    catch(e){
+      // It's very hard to test this with integration tests, because it requires a redis failure.
+      // And it's not easy to simulate redis failure.
+      // So on this point we have only unit test in core/test/server/utils/LogSanitizer.ts
+      throw this._sanitizer.sanitize(e);
+    }
  }

  private async _getRedisQueue(redisClient: RedisClient) {
--- a/app/server/utils/LogSanitizer.ts
+++ b/app/server/utils/LogSanitizer.ts
@ -0,0 +1,84 @@
+/**
+ * A sanitizer interface that provides methods to sanitize log entries.
+ */
+interface ISanitizer {
+  /**
+   * Sanitizes the provided log entry. Should be called only if canSanitize returns true.
+   * @param {any} entry - The log entry to sanitize.
+   * @returns {any} The sanitized log entry.
+   */
+  sanitize(entry: any): any;
+
+  /**
+   * Checks if the sanitizer can handle the given log entry.
+   * @param {any} entry - The log entry to check.
+   * @returns {boolean} True if the sanitizer can handle the log entry, false otherwise.
+   */
+  canSanitize(entry: any): boolean;
+}
+
+/**
+ * A log sanitizer class that sanitizes logs to avoid leaking sensitive/private data.
+ * only the first applicable sanitizer (as determined by canSanitize) will be applied
+ * Currently, it is hardcoded to sanitize only logs from Redis rpush command.
+ */
+export class LogSanitizer {
+  private _sanitizers: ISanitizer[] = [
+    new RedisSanitizer(),
+  ];
+
+  /**
+   * Sanitizes the provided log entry using a predefined set of sanitizers.
+   * @param {any} log - The log entry to sanitize.
+   * @returns {any} The sanitized log entry.
+   */
+  public sanitize(log: any): any {
+    for (const sanitizer of this._sanitizers) {
+      if (sanitizer.canSanitize(log)) {
+        return sanitizer.sanitize(log);
+      }
+    }
+    return log;
+  }
+}
+
+/**
+ * A sanitizer implementation for Redis logs.
+ */
+class RedisSanitizer implements ISanitizer {
+  /**
+   * Sanitizes the Redis log entry by replacing sensitive data in the payload with "[sanitized]".
+   * @param {any} entry - The Redis log entry to sanitize.
+   * @returns {any} The sanitized Redis log entry.
+   */
+  public sanitize(entry: any): any {
+    // REDIS log structure looks like this: the first arg is the name of the queue,
+    // and the rest are the data that was pushed to the queue. Therefore, we are omitting the first arg.
+    for (let i = 1; i < entry.args.length; i++) {
+      let arg = entry.args[i];
+      let parsedArg: any = null;
+      parsedArg = JSON.parse(arg);
+      if (parsedArg?.payload) {
+        parsedArg.payload = "[sanitized]";
+      }
+      arg = JSON.stringify(parsedArg);
+      entry.args[i] = arg;
+    }
+    return entry;
+  }
+
+  /**
+   * Checks if the given log entry corresponds to a Redis rpush command.
+   * @param {any} entry - The log entry to check.
+   * @returns {boolean} True if the log entry is a Redis rpush command, false otherwise.
+   */
+  public canSanitize(entry: any): boolean {
+    // We are only interested in rpush commands
+    return (
+      typeof entry === "object" &&
+      entry.command?.toLowerCase() === "rpush".toLowerCase() &&
+      entry.args &&
+      Array.isArray(entry.args)
+    );
+  }
+}
--- a/test/server/utils/LogSanitizer.ts
+++ b/test/server/utils/LogSanitizer.ts
@ -0,0 +1,69 @@
+import {LogSanitizer} from "app/server/utils/LogSanitizer";
+import {assert} from "chai";
+
+describe("LogSanitizer", () => {
+
+  it("should return neutral logs untouched", done => {
+    const exampleLog
+      = 'DocTriggers: Webhook responded with non-200 status status=404, attempt=1, docId=8x9U6xe4hNz8WaJCzAjDBM,' +
+      ' queueLength=8, drainingQueue=false, shuttingDown=false, sending=true, redisClient=true';
+    const sanitizer = new LogSanitizer();
+    const sanitizedLog = sanitizer.sanitize(exampleLog);
+    assert.equal(sanitizedLog, exampleLog);
+    done();
+  });
+
+  it("should not crashed when empty log was passed to sanitizer", done => {
+    const exampleLog = undefined;
+    const sanitizer = new LogSanitizer();
+    const sanitizedLog = sanitizer.sanitize(exampleLog);
+    assert.equal(sanitizedLog, exampleLog);
+    done();
+  });
+
+
+
+
+  it("should sanitize redis webhooks rpush logs", done => {
+    const exampleLog = {
+      command: "RPUSH",
+      code: "NR_CLOSED",
+      args: [
+        "webhook-queue-8x9U6xe4hNz8WaJCzAjDBM",
+        // Data send to redis is kept there in string format, therefore in our solution we are stringify them before
+        // sending. we know that the payload is a json though, so here we are trying to reproduce that data structure.
+        JSON.stringify({
+          id: "f3517b07-9846-4fe3-bcb2-d26cc07e40bd",
+          payload: {
+            id: 355,
+            manualSort: 355,
+            Name: "Johny",
+            InsuranceNumber: "12345"
+          }
+        }),
+        // in thie redis those are json, but send as a strings, so we need to parse them
+        JSON.stringify({
+          id: "b3091e47-00a0-4614-a58f-cb1ae383ea43",
+          payload: {
+            id: 355,
+            manualSort: 355,
+            Name: "Mark",
+            InsuranceNumber: "65844"
+          }
+        })
+      ]
+    };
+
+    const sanitizer = new LogSanitizer();
+    const sanitizedLogObj = sanitizer.sanitize(exampleLog);
+    const sanitizedLog = JSON.stringify(sanitizedLogObj);
+
+    // tests on stringify object, to make it fast to search in.
+    assert.isTrue(sanitizedLog.includes("[sanitized]"));
+    assert.isFalse(sanitizedLog.includes("InsuranceNumber"));
+    assert.isFalse(sanitizedLog.includes("Name"));
+
+    done();
+  });
+
+});