mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) deal with SQLITE_BUSY: database is locked during tests
Summary: During testing, and in some dev configurations, we make connections to the Grist home database from multiple processes, and store it in Sqlite. This is a situation prone to file locks in Sqlite. We set a busy_timeout parameter to help, but transactions may still fail immediately. This diff adds retries to our patch of TypeORM for Sqlite transactions (which without the patch are already flakey for other reasons https://github.com/typeorm/typeorm/issues/1884#issuecomment-380767213). Without this change, I can tickly SQLITE_BUSY problems fairly reliably with the DocApi tests. With this change, I can't so far. This change should not affect deployments of our SaaS (which use Postgres for home db) or grist-core/grist-ee (which access home db from a single process in default configuration). Test Plan: repeated running of DocApi tests Reviewers: dsagal Reviewed By: dsagal Subscribers: dsagal Differential Revision: https://phab.getgrist.com/D3466
This commit is contained in:
parent
1c6f80f956
commit
ff77ecc6c6
@ -13,6 +13,7 @@
|
|||||||
// changed during construction of a query.
|
// changed during construction of a query.
|
||||||
|
|
||||||
import * as sqlite3 from '@gristlabs/sqlite3';
|
import * as sqlite3 from '@gristlabs/sqlite3';
|
||||||
|
import {delay} from 'app/common/delay';
|
||||||
import * as log from 'app/server/lib/log';
|
import * as log from 'app/server/lib/log';
|
||||||
import {Mutex, MutexInterface} from 'async-mutex';
|
import {Mutex, MutexInterface} from 'async-mutex';
|
||||||
import isEqual = require('lodash/isEqual');
|
import isEqual = require('lodash/isEqual');
|
||||||
@ -97,13 +98,16 @@ export function applyPatch() {
|
|||||||
}
|
}
|
||||||
const queryRunner = this.connection.createQueryRunner();
|
const queryRunner = this.connection.createQueryRunner();
|
||||||
const runInTransaction = typeof arg1 === "function" ? arg1 : arg2;
|
const runInTransaction = typeof arg1 === "function" ? arg1 : arg2;
|
||||||
|
const isSqlite = this.connection.driver.options.type === 'sqlite';
|
||||||
|
try {
|
||||||
|
async function runOrRollback() {
|
||||||
try {
|
try {
|
||||||
await queryRunner.startTransaction();
|
await queryRunner.startTransaction();
|
||||||
const result = await runInTransaction(queryRunner.manager);
|
const result = await runInTransaction(queryRunner.manager);
|
||||||
await queryRunner.commitTransaction();
|
await queryRunner.commitTransaction();
|
||||||
return result;
|
return result;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log.debug(`SQLite transaction error [${arg1} ${arg2}] - ${err}`);
|
log.debug(`TypeORM transaction error [${arg1} ${arg2}] - ${err}`);
|
||||||
try {
|
try {
|
||||||
// we throw original error even if rollback thrown an error
|
// we throw original error even if rollback thrown an error
|
||||||
await queryRunner.rollbackTransaction();
|
await queryRunner.rollbackTransaction();
|
||||||
@ -112,12 +116,65 @@ export function applyPatch() {
|
|||||||
// tslint: disable-next-line
|
// tslint: disable-next-line
|
||||||
}
|
}
|
||||||
throw err;
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isSqlite) {
|
||||||
|
return await callWithRetry(runOrRollback, {
|
||||||
|
// Transactions may fail immediately if there are connections from
|
||||||
|
// multiple processes, regardless of busy_timeout setting. Add a
|
||||||
|
// retry for this kind of failure. This is relevant to tests, which
|
||||||
|
// use connections from multiple processes, but not to single-process
|
||||||
|
// instances of Grist, or instances of Grist that use Postgres for the
|
||||||
|
// home server.
|
||||||
|
worthRetry: (e) => Boolean(e.message.match(/SQLITE_BUSY/)),
|
||||||
|
firstDelayMsec: 10,
|
||||||
|
factor: 1.25,
|
||||||
|
maxTotalMsec: 3000,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// When not using SQLite, don't do anything special.
|
||||||
|
return await runOrRollback();
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
await queryRunner.release();
|
await queryRunner.release();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Call an operation, and if it fails with an error that is worth retrying
|
||||||
|
* (or any error if worthRetry callback is not specified), retry it after
|
||||||
|
* a delay of firstDelayMsec. Retries are repeated with delays growing by
|
||||||
|
* the specified factor (or 2.0 if not specified). Stop if maxTotalMsec is
|
||||||
|
* specified and has passed.
|
||||||
|
*/
|
||||||
|
async function callWithRetry<T>(op: () => Promise<T>, options: {
|
||||||
|
worthRetry?: (err: Error) => boolean,
|
||||||
|
maxTotalMsec?: number,
|
||||||
|
firstDelayMsec: number,
|
||||||
|
factor?: number,
|
||||||
|
}): Promise<T> {
|
||||||
|
const startedAt = Date.now();
|
||||||
|
let dt = options.firstDelayMsec;
|
||||||
|
while (true) { // eslint-disable-line no-constant-condition
|
||||||
|
try {
|
||||||
|
return await op();
|
||||||
|
} catch (e) {
|
||||||
|
// throw if not worth retrying
|
||||||
|
if (options.worthRetry && e instanceof Error && !options.worthRetry(e)) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
// throw if max time has expired
|
||||||
|
if (options.maxTotalMsec && Date.now() - startedAt > options.maxTotalMsec) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
// otherwise wait a bit and retry
|
||||||
|
await delay(dt);
|
||||||
|
dt *= options.factor ?? 2.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**********************
|
/**********************
|
||||||
* Patch 2
|
* Patch 2
|
||||||
|
Loading…
Reference in New Issue
Block a user