Add option to skip Checksum verification between doc storage and Redis #751 (#767)

This check should be unnecessary for stores with strong consistency guarantees (virtually everywhere now).

---------

Co-authored-by: Florent FAYOLLE <florent.fayolle@beta.gouv.fr>
This commit is contained in:
Florent 2024-03-07 20:11:17 +01:00 committed by GitHub
parent 66f025c7df
commit 1e3e076820
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 307 additions and 266 deletions

View File

@ -291,6 +291,7 @@ COOKIE_MAX_AGE | session cookie max age, defaults to 90 days; can be set to
HOME_PORT | port number to listen on for REST API server; if set to "share", add API endpoints to regular grist port.
PORT | port number to listen on for Grist server
REDIS_URL | optional redis server for browser sessions and db query caching
GRIST_SKIP_REDIS_CHECKSUM_MISMATCH | Experimental. If set, only warn if the checksum in Redis differs with the one in your S3 backend storage. You may turn it on if your backend storage implements the [read-after-write consistency](https://aws.amazon.com/fr/blogs/aws/amazon-s3-update-strong-read-after-write-consistency/). Defaults to false.
GRIST_SNAPSHOT_TIME_CAP | optional. Define the caps for tracking buckets. Usage: {"hour": 25, "day": 32, "isoWeek": 12, "month": 96, "year": 1000}
GRIST_SNAPSHOT_KEEP | optional. Number of recent snapshots to retain unconditionally for a document, regardless of when they were made
GRIST_PROMCLIENT_PORT | optional. If set, serve the Prometheus metrics on the specified port number. ⚠️ Be sure to use a port which is not publicly exposed ⚠️.

View File

@ -1,6 +1,8 @@
import {ObjMetadata, ObjSnapshot, ObjSnapshotWithMetadata} from 'app/common/DocSnapshot';
import {isAffirmative} from 'app/common/gutil';
import log from 'app/server/lib/log';
import {createTmpDir} from 'app/server/lib/uploads';
import {delay} from 'bluebird';
import * as fse from 'fs-extra';
import * as path from 'path';
@ -226,15 +228,29 @@ export class ChecksummedExternalStorage implements ExternalStorage {
const expectedChecksum = await this._options.sharedHash.load(fromKey);
// Let null docMD5s pass. Otherwise we get stuck if redis is cleared.
// Otherwise, make sure what we've got matches what we expect to get.
// S3 is eventually consistent - if you overwrite an object in it, and then read from it,
// you may get an old version for some time.
// AWS S3 was eventually consistent, but now has stronger guarantees:
// https://aws.amazon.com/blogs/aws/amazon-s3-update-strong-read-after-write-consistency/
//
// Previous to this change, if you overwrote an object in it,
// and then read from it, you may have got an old version for some time.
// We are confident this should not be the case anymore, though this has to be studied carefully.
// If a snapshotId was specified, we can skip this check.
if (expectedChecksum && expectedChecksum !== checksum) {
log.error("ext %s download: data for %s has wrong checksum: %s (expected %s)",
this.label, fromKey, checksum, expectedChecksum);
const message = `ext ${this.label} download: data for ${fromKey} has wrong checksum:` +
` ${checksum} (expected ${expectedChecksum})`;
// If GRIST_SKIP_REDIS_CHECKSUM_MISMATCH is set, issue a warning only and continue,
// rather than issuing an error and failing.
// This flag is experimental and should be removed once we are
// confident that the checksums verification is useless.
if (isAffirmative(process.env.GRIST_SKIP_REDIS_CHECKSUM_MISMATCH)) {
log.warn(message);
} else {
log.error(message);
return undefined;
}
}
}
// If successful, rename the temporary file to its proper name. The destination should NOT
// exist in this case, and this should fail if it does.

View File

@ -25,7 +25,7 @@ import {createClient, RedisClient} from 'redis';
import * as sinon from 'sinon';
import {createInitialDb, removeConnection, setUpDB} from 'test/gen-server/seed';
import {createTmpDir, getGlobalPluginManager} from 'test/server/docTools';
import {setTmpLogLevel, useFixtureDoc} from 'test/server/testUtils';
import {EnvironmentSnapshot, setTmpLogLevel, useFixtureDoc} from 'test/server/testUtils';
import {waitForIt} from 'test/server/wait';
import uuidv4 from "uuid/v4";
@ -273,6 +273,17 @@ class TestStore {
private _externalStorageCreate: (purpose: 'doc'|'meta', extraPrefix: string) => ExternalStorage|undefined) {
}
public async run<T>(fn: () => Promise<T>): Promise<T> {
await this.begin();
let result;
try {
result = await fn();
} finally {
await this.end();
}
return result;
}
// Simulates doc worker startup.
public async begin() {
await this.end();
@ -366,6 +377,7 @@ describe('HostedStorageManager', function() {
describe(storage, function() {
const sandbox = sinon.createSandbox();
let oldEnv: EnvironmentSnapshot;
const workerId = 'dw17';
let cli: RedisClient;
@ -376,6 +388,7 @@ describe('HostedStorageManager', function() {
before(async function() {
if (!process.env.TEST_REDIS_URL) { this.skip(); return; }
cli = createClient(process.env.TEST_REDIS_URL);
oldEnv = new EnvironmentSnapshot();
await cli.flushdbAsync();
workers = new DocWorkerMap([cli]);
await workers.addWorker({
@ -439,6 +452,7 @@ describe('HostedStorageManager', function() {
});
afterEach(async function() {
oldEnv.restore();
sandbox.restore();
if (store) {
await store.end();
@ -468,57 +482,67 @@ describe('HostedStorageManager', function() {
assert.equal(await getRedisChecksum(docId), 'null');
// Create an empty document when checksum in redis is 'null'.
await store.begin();
const checksum = await store.run(async () => {
await store.docManager.fetchDoc(docSession, docId);
assert(await store.waitForUpdates());
const checksum = await getRedisChecksum(docId);
assert.notEqual(checksum, 'null');
await store.end();
return checksum;
});
// Check if we nobble the expected checksum then fetch eventually errors.
// Check what happens when we nobble the expected checksum.
await setRedisChecksum(docId, 'nobble');
await store.removeAll();
await store.begin();
// With GRIST_SKIP_REDIS_CHECKSUM_MISMATCH set, the fetch should work
process.env.GRIST_SKIP_REDIS_CHECKSUM_MISMATCH = 'true';
await store.run(async () => {
await assert.isFulfilled(store.docManager.fetchDoc(docSession, docId));
});
// By default, the fetch should eventually errors.
delete process.env.GRIST_SKIP_REDIS_CHECKSUM_MISMATCH;
await store.run(async () => {
await assert.isRejected(store.docManager.fetchDoc(docSession, docId),
/operation failed to become consistent/);
await store.end();
});
// Check we get the document back on fresh start if checksum is correct.
await setRedisChecksum(docId, checksum);
await store.removeAll();
await store.begin();
await store.run(async () => {
await store.docManager.fetchDoc(docSession, docId);
await store.end();
});
});
it('can save modifications', async function() {
await store.begin();
await store.run(async () => {
await workers.assignDocWorker('Hello');
await useFixtureDoc('Hello.grist', store.storageManager);
await workers.assignDocWorker('Hello2');
let doc = await store.docManager.fetchDoc(docSession, 'Hello');
let doc2 = await store.docManager.fetchDoc(docSession, 'Hello2');
const doc = await store.docManager.fetchDoc(docSession, 'Hello');
const doc2 = await store.docManager.fetchDoc(docSession, 'Hello2');
await doc.docStorage.exec("update Table1 set A = 'magic_word' where id = 1");
await doc2.docStorage.exec("insert into Table1(id) values(42)");
await store.end();
return { doc, doc2 };
});
await store.removeAll();
await store.begin();
doc = await store.docManager.fetchDoc(docSession, 'Hello');
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, 'Hello');
let result = await doc.docStorage.get("select A from Table1 where id = 1");
assert.equal(result!.A, 'magic_word');
doc2 = await store.docManager.fetchDoc(docSession, 'Hello2');
const doc2 = await store.docManager.fetchDoc(docSession, 'Hello2');
result = await doc2.docStorage.get("select id from Table1");
assert.equal(result!.id, 42);
await store.end();
});
});
it('can save modifications with interfering backup file', async function() {
await store.begin();
await store.run(async () => {
// There was a bug where if a corrupt/truncated backup file was created, all future
// backups would fail. This tickles the condition and makes sure backups now succeed.
await fse.writeFile(path.join(tmpDir, 'Hello.grist-backup'), 'not a sqlite file');
@ -526,36 +550,37 @@ describe('HostedStorageManager', function() {
await workers.assignDocWorker('Hello');
await useFixtureDoc('Hello.grist', store.storageManager);
let doc = await store.docManager.fetchDoc(docSession, 'Hello');
const doc = await store.docManager.fetchDoc(docSession, 'Hello');
await doc.docStorage.exec("update Table1 set A = 'magic_word2' where id = 1");
await store.end(); // S3 push will happen prior to this returning.
});
// S3 should have happened after store.run()
await store.removeAll();
await store.begin();
doc = await store.docManager.fetchDoc(docSession, 'Hello');
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, 'Hello');
const result = await doc.docStorage.get("select A from Table1 where id = 1");
assert.equal(result!.A, 'magic_word2');
await store.end();
});
});
it('survives if there is a doc marked dirty that turns out to be clean', async function() {
await store.begin();
await store.run(async () => {
await workers.assignDocWorker('Hello');
await useFixtureDoc('Hello.grist', store.storageManager);
let doc = await store.docManager.fetchDoc(docSession, 'Hello');
const doc = await store.docManager.fetchDoc(docSession, 'Hello');
await doc.docStorage.exec("update Table1 set A = 'magic_word' where id = 1");
await store.end();
});
await store.removeAll();
await store.begin();
doc = await store.docManager.fetchDoc(docSession, 'Hello');
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, 'Hello');
const result = await doc.docStorage.get("select A from Table1 where id = 1");
assert.equal(result!.A, 'magic_word');
store.docManager.markAsChanged(doc);
await store.end();
});
// The real test is whether this test manages to complete.
});
@ -564,15 +589,15 @@ describe('HostedStorageManager', function() {
await workers.assignDocWorker('Hello');
// put a doc in s3
await store.begin();
await store.run(async () => {
await useFixtureDoc('Hello.grist', store.storageManager);
let doc = await store.docManager.fetchDoc(docSession, 'Hello');
const doc = await store.docManager.fetchDoc(docSession, 'Hello');
await doc.docStorage.exec("update Table1 set A = 'parallel' where id = 1");
await store.end();
});
// now open it many times in parallel
await store.removeAll();
await store.begin();
await store.run(async () => {
const docs = Promise.all([
store.docManager.fetchDoc(docSession, 'Hello'),
store.docManager.fetchDoc(docSession, 'Hello'),
@ -580,15 +605,15 @@ describe('HostedStorageManager', function() {
store.docManager.fetchDoc(docSession, 'Hello'),
]);
await assert.isFulfilled(docs);
doc = (await docs)[0];
const doc = (await docs)[0];
const result = await doc.docStorage.get("select A from Table1 where id = 1");
assert.equal(result!.A, 'parallel');
await store.end();
});
// To be sure we are checking something, let's call prepareLocalDoc directly
// on storage manager and make sure it fails.
await store.removeAll();
await store.begin();
await store.run(async () => {
const preps = Promise.all([
store.storageManager.prepareLocalDoc('Hello'),
store.storageManager.prepareLocalDoc('Hello'),
@ -596,7 +621,7 @@ describe('HostedStorageManager', function() {
store.storageManager.prepareLocalDoc('Hello')
]);
await assert.isRejected(preps, /in parallel/);
await store.end();
});
});
it ('can delete a document', async function() {
@ -604,17 +629,17 @@ describe('HostedStorageManager', function() {
await workers.assignDocWorker(docId);
// Create a document
await store.begin();
let doc = await store.docManager.fetchDoc(docSession, docId);
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, docId);
await doc.docStorage.exec("insert into Table1(id) values(42)");
await store.end();
});
const docPath = store.getDocPath(docId);
const ext = store.storageManager.testGetExternalStorage();
// Check that the document exists on filesystem and in external store.
await store.begin();
doc = await store.docManager.fetchDoc(docSession, docId);
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, docId);
assert.equal(await fse.pathExists(docPath), true);
assert.equal(await fse.pathExists(docPath + '-hash-doc'), true);
await waitForIt(async () => assert.equal(await ext.exists(docId), true), 20000);
@ -626,7 +651,7 @@ describe('HostedStorageManager', function() {
assert.equal(await fse.pathExists(docPath + '-hash-doc'), false);
assert.equal(await getRedisChecksum(docId), DELETED_TOKEN);
await waitForIt(async () => assert.equal(await ext.exists(docId), false), 20000);
await store.end();
});
// As far as the underlying storage is concerned it should be
// possible to recreate a doc with the same id after deletion.
@ -634,24 +659,23 @@ describe('HostedStorageManager', function() {
// document it must exist in the db - however we'll need to watch
// out for caching.
// TODO: it could be worth tweaking fetchDoc so creation is explicit.
await store.begin();
doc = await store.docManager.fetchDoc(docSession, docId);
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, docId);
await doc.docStorage.exec("insert into Table1(id) values(42)");
await store.end();
});
await store.begin();
doc = await store.docManager.fetchDoc(docSession, docId);
await store.run(async () => {
await store.docManager.fetchDoc(docSession, docId);
assert.equal(await fse.pathExists(docPath), true);
assert.equal(await fse.pathExists(docPath + '-hash-doc'), true);
await store.end();
});
});
it('individual document close is orderly', async function() {
const docId = `create-${uuidv4()}`;
await workers.assignDocWorker(docId);
await store.begin();
await store.run(async () => {
let doc = await store.docManager.fetchDoc(docSession, docId);
await store.closeDoc(doc);
const checksum1 = await getRedisChecksum(docId);
@ -681,8 +705,7 @@ describe('HostedStorageManager', function() {
const checksum3 = await getRedisChecksum(docId);
assert.notEqual(checksum2, checksum3);
await asyncClose;
await store.end();
});
});
// Viewing a document should not mark it as changed (unless a document-level migration
@ -691,8 +714,7 @@ describe('HostedStorageManager', function() {
const docId = `create-${uuidv4()}`;
await workers.assignDocWorker(docId);
await store.begin();
await store.run(async () => {
const markAsChanged: {callCount: number} = store.storageManager.markAsChanged as any;
const changesInitial = markAsChanged.callCount;
@ -707,8 +729,7 @@ describe('HostedStorageManager', function() {
await store.closeDoc(doc);
const changesAfterViewing = markAsChanged.callCount;
assert.equal(changesAfterViewing, changesAfterCreation);
await store.end();
});
});
it('can fork documents', async function() {
@ -717,35 +738,35 @@ describe('HostedStorageManager', function() {
await workers.assignDocWorker(docId);
await workers.assignDocWorker(forkId);
await store.begin();
await store.run(async () => {
await useFixtureDoc('Hello.grist', store.storageManager, `${docId}.grist`);
let doc = await store.docManager.fetchDoc(docSession, docId);
const doc = await store.docManager.fetchDoc(docSession, docId);
await doc.docStorage.exec("update Table1 set A = 'trunk' where id = 1");
await store.end();
});
await store.begin();
await store.run(async () => {
await store.docManager.storageManager.prepareFork(docId, forkId);
doc = await store.docManager.fetchDoc(docSession, forkId);
const doc = await store.docManager.fetchDoc(docSession, forkId);
assert.equal('trunk', (await doc.docStorage.get("select A from Table1 where id = 1"))!.A);
await doc.docStorage.exec("update Table1 set A = 'fork' where id = 1");
await store.end();
});
await store.removeAll();
await store.begin();
doc = await store.docManager.fetchDoc(docSession, docId);
await store.run(async () => {
let doc = await store.docManager.fetchDoc(docSession, docId);
assert.equal('trunk', (await doc.docStorage.get("select A from Table1 where id = 1"))!.A);
doc = await store.docManager.fetchDoc(docSession, forkId);
assert.equal('fork', (await doc.docStorage.get("select A from Table1 where id = 1"))!.A);
await store.end();
});
// Check that the trunk can be replaced by a fork
await store.removeAll();
await store.begin();
await store.run(async () => {
await store.storageManager.replace(docId, {sourceDocId: forkId});
doc = await store.docManager.fetchDoc(docSession, docId);
const doc = await store.docManager.fetchDoc(docSession, docId);
assert.equal('fork', (await doc.docStorage.get("select A from Table1 where id = 1"))!.A);
await store.end();
});
});
it('can persist a fork with no modifications', async function() {
@ -755,16 +776,16 @@ describe('HostedStorageManager', function() {
await workers.assignDocWorker(forkId);
// Create a document.
await store.begin();
await store.run(async () => {
await useFixtureDoc('Hello.grist', store.storageManager, `${docId}.grist`);
let doc = await store.docManager.fetchDoc(docSession, docId);
const doc = await store.docManager.fetchDoc(docSession, docId);
await doc.docStorage.exec("update Table1 set A = 'trunk' where id = 1");
await store.end();
});
// Create a fork with no modifications.
await store.begin();
await store.run(async () => {
await store.docManager.storageManager.prepareFork(docId, forkId);
await store.end();
});
await store.waitForUpdates();
await store.removeAll();
@ -772,10 +793,10 @@ describe('HostedStorageManager', function() {
await fse.remove(store.getDocPath(docId));
// Make sure opening the fork works as expected.
await store.begin();
doc = await store.docManager.fetchDoc(docSession, forkId);
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, forkId);
assert.equal('trunk', (await doc.docStorage.get("select A from Table1 where id = 1"))!.A);
await store.end();
});
await store.removeAll();
});
@ -792,35 +813,36 @@ describe('HostedStorageManager', function() {
await workers.assignDocWorker(forkId2);
await workers.assignDocWorker(forkId3);
await store.begin();
const doc = await store.run(async () => {
await useFixtureDoc('Hello.grist', store.storageManager, `${docId}.grist`);
let doc = await store.docManager.fetchDoc(docSession, docId);
const doc = await store.docManager.fetchDoc(docSession, docId);
await doc.waitForInitialization();
for (let i = 0; i < forks; i++) {
await doc.docStorage.exec(`update Table1 set A = 'v${i}' where id = 1`);
await doc.testKeepOpen();
await store.waitForUpdates();
}
await store.end();
return doc;
});
const {snapshots} = await store.storageManager.getSnapshots(doc.docName);
assert.isAtLeast(snapshots.length, forks + 1); // May be 1 greater depending on how long
// it takes to run initial migrations.
await store.begin();
await store.run(async () => {
for (let i = forks - 1; i >= 0; i--) {
const snapshot = snapshots.shift()!;
const forkId = snapshot.docId;
await workers.assignDocWorker(forkId);
doc = await store.docManager.fetchDoc(docSession, forkId);
const doc = await store.docManager.fetchDoc(docSession, forkId);
assert.equal(`v${i}`, (await doc.docStorage.get("select A from Table1 where id = 1"))!.A);
}
await store.end();
});
});
it('can access snapshots with old schema versions', async function() {
const snapshotId = `World~v=1`;
await workers.assignDocWorker(snapshotId);
await store.begin();
await store.run(async () => {
// Pretend we have a snapshot of World-v33.grist and fetch/load it.
await useFixtureDoc('World-v33.grist', store.storageManager, `${snapshotId}.grist`);
const doc = await store.docManager.fetchDoc(docSession, snapshotId);
@ -839,14 +861,14 @@ describe('HostedStorageManager', function() {
/Snapshots cannot be replaced/);
await assert.isRejected(doc.applyUserActions(docSession, [['AddTable', 'NewTable', [{id: 'A'}]]]),
/pyCall is not available in snapshots/);
await store.end();
});
});
it('can prune snapshots', async function() {
const versions = 8;
const docId = `create-${uuidv4()}`;
await store.begin();
const doc = await store.run(async () => {
await useFixtureDoc('Hello.grist', store.storageManager, `${docId}.grist`);
const doc = await store.docManager.fetchDoc(docSession, docId);
for (let i = 0; i < versions; i++) {
@ -855,7 +877,8 @@ describe('HostedStorageManager', function() {
await store.waitForUpdates();
}
await store.storageManager.testWaitForPrunes();
await store.end();
return doc;
});
await waitForIt(async () => {
const {snapshots} = await store.storageManager.getSnapshots(doc.docName);
// Should be keeping at least five, and then maybe 1 more if the hour changed
@ -878,20 +901,20 @@ describe('HostedStorageManager', function() {
// Create a series of versions of a document, and fetch them sequentially
// so that they are potentially available as stale values.
await store.begin();
await store.run(async () => {
await useFixtureDoc('Hello.grist', store.storageManager, `${docId}.grist`);
let doc = await store.docManager.fetchDoc(docSession, docId);
await store.end();
await store.docManager.fetchDoc(docSession, docId);
});
for (let i = 0; i < 3; i++) {
await store.removeAll();
await store.begin();
doc = await store.docManager.fetchDoc(docSession, docId);
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, docId);
if (i > 0) {
const prev = await doc.docStorage.get("select A from Table1 where id = 1");
assert.equal(prev!.A, `magic_word${i - 1}`);
}
await doc.docStorage.exec(`update Table1 set A = 'magic_word${i}' where id = 1`);
await store.end();
});
}
// Wipe all checksums and make sure (1) we don't get any errors and (2) the
@ -903,10 +926,10 @@ describe('HostedStorageManager', function() {
// Optionally wipe all local files.
await store.removeAll();
}
await store.begin();
doc = await store.docManager.fetchDoc(docSession, docId);
await store.run(async () => {
const doc = await store.docManager.fetchDoc(docSession, docId);
result = (await doc.docStorage.get("select A from Table1 where id = 1"))?.A;
await store.end();
});
if (result !== 'magic_word2') {
throw new Error(`inconsistent result: ${result}`);
}
@ -917,7 +940,7 @@ describe('HostedStorageManager', function() {
it('can access metadata', async function() {
const docId = `create-${uuidv4()}`;
await store.begin();
const { tz, h, doc } = await store.run(async () => {
// Use a doc that's up-to-date on storage migrations, but needs a python schema migration.
await useFixtureDoc('BlobMigrationV8.grist', store.storageManager, `${docId}.grist`);
const doc = await store.docManager.fetchDoc(docSession, docId);
@ -926,7 +949,8 @@ describe('HostedStorageManager', function() {
const tz = rec.tableData[3].timezone[0];
const h = (await doc.getRecentStates(makeExceptionalDocSession('system')))[0].h;
await store.docManager.makeBackup(doc, 'hello');
await store.end();
return { tz, h, doc };
});
const {snapshots} = await store.storageManager.getSnapshots(doc.docName);
assert.equal(snapshots[0]?.metadata?.label, 'hello');
// There can be extra snapshots, depending on timing.