(core) Add options to /status health-check endpoints to check DB and Redis liveness.

Summary:
- /status accepts new optional query parameters: db=1, redis=1, and timeout=<ms> (defaults to 10_000).
- These verify that the server can make trivial calls to DB/Redis, and that they return within the timeout.
- New HealthCheck tests simulates DB and Redis problems.
- Added resilience to Redis reconnects (helped by a test case that simulates disconnects)
- When closing Redis-based session store, disconnect from Redis (to avoid hanging tests)

Some associated test reorg:
- Move stripeTools out of test/nbrowser, and remove an unnecessary dependency,
  to avoid starting up browser for gen-server tests.
- Move TcpForwarder to its own file, to use in the new test.

Test Plan: Added a new HealthCheck test that simulates DB and Redis problems.

Reviewers: georgegevoian

Reviewed By: georgegevoian

Differential Revision: https://phab.getgrist.com/D4054
This commit is contained in:
Dmitry S
2023-10-02 12:48:45 -04:00
parent 996674211d
commit fbae81648c
8 changed files with 137 additions and 76 deletions

View File

@@ -190,6 +190,9 @@ export class DocWorkerMap implements IDocWorkerMap {
this._clients = _clients || [createClient(process.env.REDIS_URL)];
this._redlock = new Redlock(this._clients);
this._client = this._clients[0]!;
this._client.on('error', (err) => log.warn(`DocWorkerMap: redisClient error`, String(err)));
this._client.on('end', () => log.warn(`DocWorkerMap: redisClient connection closed`));
this._client.on('reconnecting', () => log.warn(`DocWorkerMap: redisClient reconnecting`));
}
public async addWorker(info: DocWorkerInfo): Promise<void> {

View File

@@ -25,6 +25,8 @@ import {
} from 'typeorm/error/QueryRunnerProviderAlreadyReleasedError';
import {QueryBuilder} from 'typeorm/query-builder/QueryBuilder';
// Print a warning for transactions that take longer than this.
const SLOW_TRANSACTION_MS = 5000;
/**********************
* Patch 1
@@ -103,9 +105,21 @@ export function applyPatch() {
async function runOrRollback() {
try {
await queryRunner.startTransaction();
const result = await runInTransaction(queryRunner.manager);
await queryRunner.commitTransaction();
return result;
const start = Date.now();
const timer = setInterval(() => {
const timeMs = Date.now() - start;
log.warn(`TypeORM transaction slow: [${arg1} ${arg2}]`, {timeMs});
}, SLOW_TRANSACTION_MS);
try {
const result = await runInTransaction(queryRunner.manager);
await queryRunner.commitTransaction();
return result;
} finally {
clearInterval(timer);
}
} catch (err) {
log.debug(`TypeORM transaction error [${arg1} ${arg2}] - ${err}`);
try {