2020-07-21 13:20:51 +00:00
|
|
|
import * as sqlite3 from '@gristlabs/sqlite3';
|
|
|
|
import {mapGetOrSet} from 'app/common/AsyncCreate';
|
|
|
|
import {delay} from 'app/common/delay';
|
|
|
|
import {DocEntry} from 'app/common/DocListAPI';
|
2020-10-30 16:53:23 +00:00
|
|
|
import {DocSnapshots} from 'app/common/DocSnapshot';
|
2022-05-16 17:41:12 +00:00
|
|
|
import {DocumentUsage} from 'app/common/DocUsage';
|
2020-07-21 13:20:51 +00:00
|
|
|
import {buildUrlId, parseUrlId} from 'app/common/gristUrls';
|
|
|
|
import {KeyedOps} from 'app/common/KeyedOps';
|
|
|
|
import {DocReplacementOptions, NEW_DOCUMENT_CODE} from 'app/common/UserAPI';
|
|
|
|
import {HomeDBManager} from 'app/gen-server/lib/HomeDBManager';
|
|
|
|
import {checksumFile} from 'app/server/lib/checksumFile';
|
2020-10-30 16:53:23 +00:00
|
|
|
import {DocSnapshotInventory, DocSnapshotPruner} from 'app/server/lib/DocSnapshots';
|
2020-07-21 13:20:51 +00:00
|
|
|
import {IDocWorkerMap} from 'app/server/lib/DocWorkerMap';
|
2022-04-22 18:07:14 +00:00
|
|
|
import {ChecksummedExternalStorage, DELETED_TOKEN, ExternalStorage, Unchanged} from 'app/server/lib/ExternalStorage';
|
2020-07-21 13:20:51 +00:00
|
|
|
import {HostedMetadataManager} from 'app/server/lib/HostedMetadataManager';
|
|
|
|
import {ICreate} from 'app/server/lib/ICreate';
|
|
|
|
import {IDocStorageManager} from 'app/server/lib/IDocStorageManager';
|
2021-10-25 13:29:06 +00:00
|
|
|
import {LogMethods} from "app/server/lib/LogMethods";
|
2020-07-21 13:20:51 +00:00
|
|
|
import {fromCallback} from 'app/server/lib/serverUtils';
|
|
|
|
import * as fse from 'fs-extra';
|
|
|
|
import * as path from 'path';
|
|
|
|
import * as uuidv4 from "uuid/v4";
|
2020-10-30 16:53:23 +00:00
|
|
|
import { OpenMode, SQLiteDB } from './SQLiteDB';
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
// Check for a valid document id.
|
|
|
|
const docIdRegex = /^[-=_\w~%]+$/;
|
|
|
|
|
|
|
|
// Wait this long after a change to the document before trying to make a backup of it.
|
|
|
|
const GRIST_BACKUP_DELAY_SECS = parseInt(process.env.GRIST_BACKUP_DELAY_SECS || '15', 10);
|
|
|
|
|
|
|
|
// This constant controls how many pages of the database we back up in a single step.
|
|
|
|
// The larger it is, the faster the backup overall, but the slower each step is.
|
|
|
|
// Slower steps result in longer periods when the database is locked, without any
|
|
|
|
// opportunity for a waiting client to get in and make a write.
|
|
|
|
// The size of a page, as far as sqlite is concerned, is 4096 bytes.
|
|
|
|
const PAGES_TO_BACKUP_PER_STEP = 1024; // Backup is made in 4MB chunks.
|
|
|
|
|
|
|
|
// Between steps of the backup, we pause in case a client is waiting to make a write.
|
|
|
|
// The shorter the pause, the greater the odds that the client won't be able to make
|
|
|
|
// its write, but the faster the backup will complete.
|
|
|
|
const PAUSE_BETWEEN_BACKUP_STEPS_IN_MS = 10;
|
|
|
|
|
|
|
|
function checkValidDocId(docId: string): void {
|
|
|
|
if (!docIdRegex.test(docId)) {
|
|
|
|
throw new Error(`Invalid docId ${docId}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-09 19:05:19 +00:00
|
|
|
export interface HostedStorageOptions {
|
2020-07-21 13:20:51 +00:00
|
|
|
secondsBeforePush: number;
|
|
|
|
secondsBeforeFirstRetry: number;
|
|
|
|
pushDocUpdateTimes: boolean;
|
2022-05-09 19:05:19 +00:00
|
|
|
// A function returning the core ExternalStorage implementation,
|
|
|
|
// which may then be wrapped in additional layer(s) of ExternalStorage.
|
|
|
|
// See ICreate.ExternalStorage.
|
|
|
|
// Uses S3 by default in hosted Grist.
|
(core) add machinery for self-managed flavor of Grist
Summary:
Currently, we have two ways that we deliver Grist. One is grist-core,
which has simple defaults and is relatively easy for third parties to
deploy. The second is our internal build for our SaaS, which is the
opposite. For self-managed Grist, a planned paid on-premise version
of Grist, I adopt the following approach:
* Use the `grist-core` build mechanism, extending it to accept an
overlay of extra code if present.
* Extra code is supplied in a self-contained `ext` directory, with
an `ext/app` directory that is of same structure as core `app`
and `stubs/app`.
* The `ext` directory also contains information about extra
node dependencies needed beyond that of `grist-core`.
* The `ext` directory is contained within our monorepo rather than
`grist-core` since it may contain material not under the Apache
license.
Docker builds are achieved in our monorepo by using the `--build-context`
functionality to add in `ext` during the regular `grist-core` build:
```
docker buildx build --load -t gristlabs/grist-ee --build-context=ext=../ext .
```
Incremental builds in our monorepo are achieved with the `build_core.sh` helper,
like:
```
buildtools/build_core.sh /tmp/self-managed
cd /tmp/self-managed
yarn start
```
The initial `ext` directory contains material for snapshotting to S3.
If you build the docker image as above, and have S3 access, you can
do something like:
```
docker run -p 8484:8484 --env GRIST_SESSION_SECRET=a-secret \
--env GRIST_DOCS_S3_BUCKET=grist-docs-test \
--env GRIST_DOCS_S3_PREFIX=self-managed \
-v $HOME/.aws:/root/.aws -it gristlabs/grist-ee
```
This will start a version of Grist that is like `grist-core` but with
S3 snapshots enabled. To release this code to `grist-core`, it would
just need to move from `ext/app` to `app` within core.
I tried a lot of ways of organizing self-managed Grist, and this was
what made me happiest. There are a lot of trade-offs, but here is what
I was looking for:
* Only OSS-code in grist-core. Adding mixed-license material there
feels unfair to people already working with the repo. That said,
a possible future is to move away from our private monorepo to
a public mixed-licence repo, which could have the same relationship
with grist-core as the monorepo has.
* Minimal differences between self-managed builds and one of our
existing builds, ideally hewing as close to grist-core as possible
for ease of documentation, debugging, and maintenance.
* Ideally, docker builds without copying files around (the new
`--build-context` functionality made that possible).
* Compatibility with monorepo build.
Expressing dependencies of the extra code in `ext` proved tricky to
do in a clean way. Yarn/npm fought me every step of the way - everything
related to optional dependencies was unsatisfactory in some respect.
Yarn2 is flexible but smells like it might be overreach. In the end,
organizing to install non-core dependencies one directory up from the
main build was a good simple trick that saved my bacon.
This diff gets us to the point of building `grist-ee` images conveniently,
but there isn't a public repo people can go look at to see its source. This
could be generated by taking `grist-core`, adding the `ext` directory
to it, and pushing to a distinct repository. I'm not in a hurry to do that,
since a PR to that repo would be hard to sync with our monorepo and
`grist-core`. Also, we don't have any licensing text ready for the `ext`
directory. So leaving that for future work.
Test Plan: manual
Reviewers: georgegevoian, alexmojaki
Reviewed By: georgegevoian, alexmojaki
Differential Revision: https://phab.getgrist.com/D3415
2022-05-12 15:24:48 +00:00
|
|
|
externalStorageCreator?: (purpose: 'doc'|'meta') => ExternalStorage;
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const defaultOptions: HostedStorageOptions = {
|
|
|
|
secondsBeforePush: GRIST_BACKUP_DELAY_SECS,
|
|
|
|
secondsBeforeFirstRetry: 3.0,
|
|
|
|
pushDocUpdateTimes: true
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* HostedStorageManager manages Grist files in the hosted environment for a particular DocWorker.
|
|
|
|
* These files are stored on S3 and synced to the local file system. It matches the interface of
|
|
|
|
* DocStorageManager (used for standalone Grist), but is more limited, e.g. does not expose the
|
|
|
|
* list of local files.
|
|
|
|
*
|
|
|
|
* In hosted environment, documents are uniquely identified by docId, which serves as the
|
|
|
|
* canonical docName. This ID does not change on renaming. HostedStorageManager knows nothing of
|
|
|
|
* friendlier doc names.
|
|
|
|
*
|
|
|
|
* TODO: Listen (to Redis?) to find out when a local document has been deleted or renamed.
|
|
|
|
* (In case of rename, something on the DocWorker needs to inform the client about the rename)
|
|
|
|
* TODO: Do something about the active flag in redis DocStatus.
|
|
|
|
* TODO: Add an explicit createFlag in DocStatus for clarity and simplification.
|
|
|
|
*/
|
|
|
|
export class HostedStorageManager implements IDocStorageManager {
|
|
|
|
|
|
|
|
// Handles pushing doc metadata changes when the doc is updated.
|
|
|
|
private _metadataManager: HostedMetadataManager|null = null;
|
|
|
|
|
|
|
|
// Maps docId to the promise for when the document is present on the local filesystem.
|
|
|
|
private _localFiles = new Map<string, Promise<boolean>>();
|
|
|
|
|
2020-10-30 16:53:23 +00:00
|
|
|
// Label to put in metadata for a document. Only one label supported per snapshot currently.
|
|
|
|
// Holds the label that should be associated with a backup when a labeled backup is being made.
|
|
|
|
private _labels = new Map<string, string>();
|
|
|
|
|
|
|
|
// Time at which document was last changed.
|
|
|
|
private _timestamps = new Map<string, string>();
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
// Access external storage.
|
|
|
|
private _ext: ChecksummedExternalStorage;
|
2020-10-30 16:53:23 +00:00
|
|
|
private _extMeta: ChecksummedExternalStorage;
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
// Prune external storage.
|
|
|
|
private _pruner: DocSnapshotPruner;
|
|
|
|
|
2020-10-30 16:53:23 +00:00
|
|
|
// Access to version information about documents.
|
|
|
|
private _inventory: DocSnapshotInventory;
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
// A set of filenames currently being created or downloaded.
|
|
|
|
private _prepareFiles = new Set<string>();
|
|
|
|
|
|
|
|
// Ongoing and scheduled uploads for documents.
|
|
|
|
private _uploads: KeyedOps;
|
|
|
|
|
|
|
|
// Set once the manager has been closed.
|
|
|
|
private _closed: boolean = false;
|
|
|
|
|
|
|
|
private _baseStore: ExternalStorage; // External store for documents, without checksumming.
|
|
|
|
|
2020-11-10 03:28:30 +00:00
|
|
|
// Latest version ids of documents.
|
2020-10-30 16:53:23 +00:00
|
|
|
private _latestVersions = new Map<string, string>();
|
2022-03-07 14:27:43 +00:00
|
|
|
private _latestMetaVersions = new Map<string, string>();
|
2020-10-30 16:53:23 +00:00
|
|
|
|
2021-10-25 13:29:06 +00:00
|
|
|
private _log = new LogMethods('HostedStorageManager ', (docId: string|null) => ({docId}));
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
/**
|
|
|
|
* Initialize with the given root directory, which should be a fully-resolved path.
|
|
|
|
* If s3Bucket is blank, S3 storage will be disabled.
|
|
|
|
*/
|
|
|
|
constructor(
|
|
|
|
private _docsRoot: string,
|
|
|
|
private _docWorkerId: string,
|
2020-10-30 16:53:23 +00:00
|
|
|
private _disableS3: boolean,
|
2020-07-21 13:20:51 +00:00
|
|
|
private _docWorkerMap: IDocWorkerMap,
|
|
|
|
dbManager: HomeDBManager,
|
|
|
|
create: ICreate,
|
|
|
|
options: HostedStorageOptions = defaultOptions
|
|
|
|
) {
|
(core) add machinery for self-managed flavor of Grist
Summary:
Currently, we have two ways that we deliver Grist. One is grist-core,
which has simple defaults and is relatively easy for third parties to
deploy. The second is our internal build for our SaaS, which is the
opposite. For self-managed Grist, a planned paid on-premise version
of Grist, I adopt the following approach:
* Use the `grist-core` build mechanism, extending it to accept an
overlay of extra code if present.
* Extra code is supplied in a self-contained `ext` directory, with
an `ext/app` directory that is of same structure as core `app`
and `stubs/app`.
* The `ext` directory also contains information about extra
node dependencies needed beyond that of `grist-core`.
* The `ext` directory is contained within our monorepo rather than
`grist-core` since it may contain material not under the Apache
license.
Docker builds are achieved in our monorepo by using the `--build-context`
functionality to add in `ext` during the regular `grist-core` build:
```
docker buildx build --load -t gristlabs/grist-ee --build-context=ext=../ext .
```
Incremental builds in our monorepo are achieved with the `build_core.sh` helper,
like:
```
buildtools/build_core.sh /tmp/self-managed
cd /tmp/self-managed
yarn start
```
The initial `ext` directory contains material for snapshotting to S3.
If you build the docker image as above, and have S3 access, you can
do something like:
```
docker run -p 8484:8484 --env GRIST_SESSION_SECRET=a-secret \
--env GRIST_DOCS_S3_BUCKET=grist-docs-test \
--env GRIST_DOCS_S3_PREFIX=self-managed \
-v $HOME/.aws:/root/.aws -it gristlabs/grist-ee
```
This will start a version of Grist that is like `grist-core` but with
S3 snapshots enabled. To release this code to `grist-core`, it would
just need to move from `ext/app` to `app` within core.
I tried a lot of ways of organizing self-managed Grist, and this was
what made me happiest. There are a lot of trade-offs, but here is what
I was looking for:
* Only OSS-code in grist-core. Adding mixed-license material there
feels unfair to people already working with the repo. That said,
a possible future is to move away from our private monorepo to
a public mixed-licence repo, which could have the same relationship
with grist-core as the monorepo has.
* Minimal differences between self-managed builds and one of our
existing builds, ideally hewing as close to grist-core as possible
for ease of documentation, debugging, and maintenance.
* Ideally, docker builds without copying files around (the new
`--build-context` functionality made that possible).
* Compatibility with monorepo build.
Expressing dependencies of the extra code in `ext` proved tricky to
do in a clean way. Yarn/npm fought me every step of the way - everything
related to optional dependencies was unsatisfactory in some respect.
Yarn2 is flexible but smells like it might be overreach. In the end,
organizing to install non-core dependencies one directory up from the
main build was a good simple trick that saved my bacon.
This diff gets us to the point of building `grist-ee` images conveniently,
but there isn't a public repo people can go look at to see its source. This
could be generated by taking `grist-core`, adding the `ext` directory
to it, and pushing to a distinct repository. I'm not in a hurry to do that,
since a PR to that repo would be hard to sync with our monorepo and
`grist-core`. Also, we don't have any licensing text ready for the `ext`
directory. So leaving that for future work.
Test Plan: manual
Reviewers: georgegevoian, alexmojaki
Reviewed By: georgegevoian, alexmojaki
Differential Revision: https://phab.getgrist.com/D3415
2022-05-12 15:24:48 +00:00
|
|
|
const creator = options.externalStorageCreator || ((purpose) => create.ExternalStorage(purpose, ''));
|
2020-07-21 13:20:51 +00:00
|
|
|
// We store documents either in a test store, or in an s3 store
|
|
|
|
// at s3://<s3Bucket>/<s3Prefix><docId>.grist
|
(core) add machinery for self-managed flavor of Grist
Summary:
Currently, we have two ways that we deliver Grist. One is grist-core,
which has simple defaults and is relatively easy for third parties to
deploy. The second is our internal build for our SaaS, which is the
opposite. For self-managed Grist, a planned paid on-premise version
of Grist, I adopt the following approach:
* Use the `grist-core` build mechanism, extending it to accept an
overlay of extra code if present.
* Extra code is supplied in a self-contained `ext` directory, with
an `ext/app` directory that is of same structure as core `app`
and `stubs/app`.
* The `ext` directory also contains information about extra
node dependencies needed beyond that of `grist-core`.
* The `ext` directory is contained within our monorepo rather than
`grist-core` since it may contain material not under the Apache
license.
Docker builds are achieved in our monorepo by using the `--build-context`
functionality to add in `ext` during the regular `grist-core` build:
```
docker buildx build --load -t gristlabs/grist-ee --build-context=ext=../ext .
```
Incremental builds in our monorepo are achieved with the `build_core.sh` helper,
like:
```
buildtools/build_core.sh /tmp/self-managed
cd /tmp/self-managed
yarn start
```
The initial `ext` directory contains material for snapshotting to S3.
If you build the docker image as above, and have S3 access, you can
do something like:
```
docker run -p 8484:8484 --env GRIST_SESSION_SECRET=a-secret \
--env GRIST_DOCS_S3_BUCKET=grist-docs-test \
--env GRIST_DOCS_S3_PREFIX=self-managed \
-v $HOME/.aws:/root/.aws -it gristlabs/grist-ee
```
This will start a version of Grist that is like `grist-core` but with
S3 snapshots enabled. To release this code to `grist-core`, it would
just need to move from `ext/app` to `app` within core.
I tried a lot of ways of organizing self-managed Grist, and this was
what made me happiest. There are a lot of trade-offs, but here is what
I was looking for:
* Only OSS-code in grist-core. Adding mixed-license material there
feels unfair to people already working with the repo. That said,
a possible future is to move away from our private monorepo to
a public mixed-licence repo, which could have the same relationship
with grist-core as the monorepo has.
* Minimal differences between self-managed builds and one of our
existing builds, ideally hewing as close to grist-core as possible
for ease of documentation, debugging, and maintenance.
* Ideally, docker builds without copying files around (the new
`--build-context` functionality made that possible).
* Compatibility with monorepo build.
Expressing dependencies of the extra code in `ext` proved tricky to
do in a clean way. Yarn/npm fought me every step of the way - everything
related to optional dependencies was unsatisfactory in some respect.
Yarn2 is flexible but smells like it might be overreach. In the end,
organizing to install non-core dependencies one directory up from the
main build was a good simple trick that saved my bacon.
This diff gets us to the point of building `grist-ee` images conveniently,
but there isn't a public repo people can go look at to see its source. This
could be generated by taking `grist-core`, adding the `ext` directory
to it, and pushing to a distinct repository. I'm not in a hurry to do that,
since a PR to that repo would be hard to sync with our monorepo and
`grist-core`. Also, we don't have any licensing text ready for the `ext`
directory. So leaving that for future work.
Test Plan: manual
Reviewers: georgegevoian, alexmojaki
Reviewed By: georgegevoian, alexmojaki
Differential Revision: https://phab.getgrist.com/D3415
2022-05-12 15:24:48 +00:00
|
|
|
const externalStoreDoc = this._disableS3 ? undefined : creator('doc');
|
2020-10-30 16:53:23 +00:00
|
|
|
if (!externalStoreDoc) { this._disableS3 = true; }
|
2020-07-21 13:20:51 +00:00
|
|
|
const secondsBeforePush = options.secondsBeforePush;
|
|
|
|
if (options.pushDocUpdateTimes) {
|
|
|
|
this._metadataManager = new HostedMetadataManager(dbManager);
|
|
|
|
}
|
|
|
|
this._uploads = new KeyedOps(key => this._pushToS3(key), {
|
|
|
|
delayBeforeOperationMs: secondsBeforePush * 1000,
|
|
|
|
retry: true,
|
|
|
|
logError: (key, failureCount, err) => {
|
2021-10-25 13:29:06 +00:00
|
|
|
this._log.error(null, "error pushing %s (%d): %s", key, failureCount, err);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
if (!this._disableS3) {
|
2020-10-30 16:53:23 +00:00
|
|
|
this._baseStore = externalStoreDoc!;
|
2020-07-21 13:20:51 +00:00
|
|
|
// Whichever store we have, we use checksums to deal with
|
|
|
|
// eventual consistency.
|
2020-10-30 16:53:23 +00:00
|
|
|
this._ext = this._getChecksummedExternalStorage('doc', this._baseStore,
|
|
|
|
this._latestVersions, options);
|
|
|
|
|
(core) add machinery for self-managed flavor of Grist
Summary:
Currently, we have two ways that we deliver Grist. One is grist-core,
which has simple defaults and is relatively easy for third parties to
deploy. The second is our internal build for our SaaS, which is the
opposite. For self-managed Grist, a planned paid on-premise version
of Grist, I adopt the following approach:
* Use the `grist-core` build mechanism, extending it to accept an
overlay of extra code if present.
* Extra code is supplied in a self-contained `ext` directory, with
an `ext/app` directory that is of same structure as core `app`
and `stubs/app`.
* The `ext` directory also contains information about extra
node dependencies needed beyond that of `grist-core`.
* The `ext` directory is contained within our monorepo rather than
`grist-core` since it may contain material not under the Apache
license.
Docker builds are achieved in our monorepo by using the `--build-context`
functionality to add in `ext` during the regular `grist-core` build:
```
docker buildx build --load -t gristlabs/grist-ee --build-context=ext=../ext .
```
Incremental builds in our monorepo are achieved with the `build_core.sh` helper,
like:
```
buildtools/build_core.sh /tmp/self-managed
cd /tmp/self-managed
yarn start
```
The initial `ext` directory contains material for snapshotting to S3.
If you build the docker image as above, and have S3 access, you can
do something like:
```
docker run -p 8484:8484 --env GRIST_SESSION_SECRET=a-secret \
--env GRIST_DOCS_S3_BUCKET=grist-docs-test \
--env GRIST_DOCS_S3_PREFIX=self-managed \
-v $HOME/.aws:/root/.aws -it gristlabs/grist-ee
```
This will start a version of Grist that is like `grist-core` but with
S3 snapshots enabled. To release this code to `grist-core`, it would
just need to move from `ext/app` to `app` within core.
I tried a lot of ways of organizing self-managed Grist, and this was
what made me happiest. There are a lot of trade-offs, but here is what
I was looking for:
* Only OSS-code in grist-core. Adding mixed-license material there
feels unfair to people already working with the repo. That said,
a possible future is to move away from our private monorepo to
a public mixed-licence repo, which could have the same relationship
with grist-core as the monorepo has.
* Minimal differences between self-managed builds and one of our
existing builds, ideally hewing as close to grist-core as possible
for ease of documentation, debugging, and maintenance.
* Ideally, docker builds without copying files around (the new
`--build-context` functionality made that possible).
* Compatibility with monorepo build.
Expressing dependencies of the extra code in `ext` proved tricky to
do in a clean way. Yarn/npm fought me every step of the way - everything
related to optional dependencies was unsatisfactory in some respect.
Yarn2 is flexible but smells like it might be overreach. In the end,
organizing to install non-core dependencies one directory up from the
main build was a good simple trick that saved my bacon.
This diff gets us to the point of building `grist-ee` images conveniently,
but there isn't a public repo people can go look at to see its source. This
could be generated by taking `grist-core`, adding the `ext` directory
to it, and pushing to a distinct repository. I'm not in a hurry to do that,
since a PR to that repo would be hard to sync with our monorepo and
`grist-core`. Also, we don't have any licensing text ready for the `ext`
directory. So leaving that for future work.
Test Plan: manual
Reviewers: georgegevoian, alexmojaki
Reviewed By: georgegevoian, alexmojaki
Differential Revision: https://phab.getgrist.com/D3415
2022-05-12 15:24:48 +00:00
|
|
|
const baseStoreMeta = creator('meta');
|
2020-10-30 16:53:23 +00:00
|
|
|
if (!baseStoreMeta) {
|
|
|
|
throw new Error('bug: external storage should be created for "meta" if it is created for "doc"');
|
|
|
|
}
|
|
|
|
this._extMeta = this._getChecksummedExternalStorage('meta', baseStoreMeta,
|
2022-03-07 14:27:43 +00:00
|
|
|
this._latestMetaVersions,
|
2020-10-30 16:53:23 +00:00
|
|
|
options);
|
|
|
|
|
2022-03-15 10:45:20 +00:00
|
|
|
this._inventory = new DocSnapshotInventory(
|
|
|
|
this._ext,
|
|
|
|
this._extMeta,
|
|
|
|
async docId => {
|
|
|
|
const dir = this.getAssetPath(docId);
|
|
|
|
await fse.mkdirp(dir);
|
|
|
|
return path.join(dir, 'meta.json');
|
|
|
|
},
|
|
|
|
async docId => {
|
|
|
|
const product = await dbManager.getDocProduct(docId);
|
|
|
|
return product?.features.snapshotWindow;
|
|
|
|
},
|
|
|
|
);
|
2020-10-30 16:53:23 +00:00
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
// The pruner could use an inconsistent store without any real loss overall,
|
|
|
|
// but tests are easier if it is consistent.
|
2020-10-30 16:53:23 +00:00
|
|
|
this._pruner = new DocSnapshotPruner(this._inventory, {
|
2020-07-21 13:20:51 +00:00
|
|
|
delayBeforeOperationMs: 0, // prune as soon as we've made a first upload.
|
|
|
|
minDelayBetweenOperationsMs: secondsBeforePush * 4000, // ... but wait awhile before
|
|
|
|
// pruning again.
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Send a document to S3, without doing anything fancy. Assumes this is the first time
|
|
|
|
* the object is written in S3 - so no need to worry about consistency.
|
|
|
|
*/
|
|
|
|
public async addToStorage(docId: string) {
|
|
|
|
if (this._disableS3) { return; }
|
2020-12-21 14:46:50 +00:00
|
|
|
this._uploads.addOperation(docId);
|
|
|
|
await this._uploads.expediteOperationAndWait(docId);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
public getPath(docName: string): string {
|
2020-10-30 16:53:23 +00:00
|
|
|
return this.getAssetPath(docName) + '.grist';
|
|
|
|
}
|
|
|
|
|
|
|
|
// Where to store files related to a document locally. Document goes in <assetPath>.grist,
|
|
|
|
// and other files go in <assetPath>/ directory.
|
|
|
|
public getAssetPath(docName: string): string {
|
2020-07-21 13:20:51 +00:00
|
|
|
checkValidDocId(docName);
|
2020-10-30 16:53:23 +00:00
|
|
|
return path.join(this._docsRoot, path.basename(docName, '.grist'));
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// We don't deal with sample docs
|
|
|
|
public getSampleDocPath(sampleDocName: string): string|null { return null; }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Translates a possibly non-canonical docName to a canonical one. Returns a bare docId,
|
|
|
|
* stripping out any possible path components or .grist extension. (We don't expect these to
|
|
|
|
* ever be used, but stripping seems better than asserting.)
|
|
|
|
*/
|
|
|
|
public async getCanonicalDocName(altDocName: string): Promise<string> {
|
|
|
|
return path.basename(altDocName, '.grist');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Prepares a document for use locally. Here we sync the doc from S3 to the local filesystem.
|
|
|
|
* Returns whether the document is new (needs to be created).
|
|
|
|
* Calling this method multiple times in parallel for the same document is treated as a sign
|
|
|
|
* of a bug.
|
2021-01-12 15:48:40 +00:00
|
|
|
*
|
|
|
|
* The optional srcDocName parameter is set when preparing a fork.
|
2020-07-21 13:20:51 +00:00
|
|
|
*/
|
2021-01-12 15:48:40 +00:00
|
|
|
public async prepareLocalDoc(docName: string, srcDocName?: string): Promise<boolean> {
|
2020-07-21 13:20:51 +00:00
|
|
|
// We could be reopening a document that is still closing down.
|
|
|
|
// Wait for that to happen. TODO: we could also try to interrupt the closing-down process.
|
|
|
|
await this.closeDocument(docName);
|
|
|
|
|
|
|
|
if (this._prepareFiles.has(docName)) {
|
|
|
|
throw new Error(`Tried to call prepareLocalDoc('${docName}') twice in parallel`);
|
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
|
|
|
this._prepareFiles.add(docName);
|
2021-01-12 15:48:40 +00:00
|
|
|
const isNew = !(await this._claimDocument(docName, srcDocName));
|
2020-07-21 13:20:51 +00:00
|
|
|
return isNew;
|
|
|
|
} finally {
|
|
|
|
this._prepareFiles.delete(docName);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-21 14:46:50 +00:00
|
|
|
public async prepareToCreateDoc(docName: string): Promise<void> {
|
2021-01-12 15:48:40 +00:00
|
|
|
await this.prepareLocalDoc(docName, 'new');
|
2020-12-21 14:46:50 +00:00
|
|
|
if (this._inventory) {
|
|
|
|
await this._inventory.create(docName);
|
2021-04-26 21:54:09 +00:00
|
|
|
await this._onInventoryChange(docName);
|
2020-12-21 14:46:50 +00:00
|
|
|
}
|
2021-01-12 15:48:40 +00:00
|
|
|
this.markAsChanged(docName);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize one document from another, associating the result with the current
|
|
|
|
* worker.
|
|
|
|
*/
|
2021-04-28 18:53:18 +00:00
|
|
|
public async prepareFork(srcDocName: string, destDocName: string): Promise<string> {
|
2021-01-12 15:48:40 +00:00
|
|
|
await this.prepareLocalDoc(destDocName, srcDocName);
|
2021-12-01 17:01:02 +00:00
|
|
|
this.markAsChanged(destDocName); // Make sure fork is actually stored in S3, even
|
|
|
|
// if no changes are made, since we'd refuse to
|
|
|
|
// create it later.
|
2021-04-28 18:53:18 +00:00
|
|
|
return this.getPath(destDocName);
|
2020-12-21 14:46:50 +00:00
|
|
|
}
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
// Gets a copy of the document, eg. for downloading. Returns full file path.
|
|
|
|
// Copy won't change if edits are made to the document. It is caller's responsibility
|
|
|
|
// to delete the result.
|
|
|
|
public async getCopy(docName: string): Promise<string> {
|
2021-01-12 15:48:40 +00:00
|
|
|
const present = await this._claimDocument(docName);
|
2020-07-21 13:20:51 +00:00
|
|
|
if (!present) {
|
|
|
|
throw new Error('cannot copy document that does not exist yet');
|
|
|
|
}
|
|
|
|
return await this._prepareBackup(docName, uuidv4());
|
|
|
|
}
|
|
|
|
|
|
|
|
public async replace(docId: string, options: DocReplacementOptions): Promise<void> {
|
|
|
|
// Make sure the current version of the document is flushed.
|
|
|
|
await this.flushDoc(docId);
|
|
|
|
|
|
|
|
// Figure out the source s3 key to copy from. For this purpose, we need to
|
|
|
|
// remove any snapshotId embedded in the document id.
|
|
|
|
const rawSourceDocId = options.sourceDocId || docId;
|
|
|
|
const parts = parseUrlId(rawSourceDocId);
|
|
|
|
const sourceDocId = buildUrlId({...parts, snapshotId: undefined});
|
|
|
|
const snapshotId = options.snapshotId || parts.snapshotId;
|
|
|
|
|
|
|
|
if (sourceDocId === docId && !snapshotId) { return; }
|
|
|
|
|
|
|
|
// Basic implementation for when S3 is not available.
|
|
|
|
if (this._disableS3) {
|
|
|
|
if (snapshotId) {
|
|
|
|
throw new Error('snapshots not supported without S3');
|
|
|
|
}
|
|
|
|
if (await fse.pathExists(this.getPath(sourceDocId))) {
|
|
|
|
await fse.copy(this.getPath(sourceDocId), this.getPath(docId));
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
throw new Error(`cannot find ${docId}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// While replacing, move the current version of the document aside. If a problem
|
|
|
|
// occurs, move it back.
|
|
|
|
const docPath = this.getPath(docId);
|
|
|
|
const tmpPath = `${docPath}-replacing`;
|
|
|
|
// NOTE: fse.remove succeeds also when the file does not exist.
|
|
|
|
await fse.remove(tmpPath);
|
|
|
|
if (await fse.pathExists(docPath)) {
|
|
|
|
await fse.move(docPath, tmpPath);
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
// Fetch new content from S3.
|
|
|
|
if (!await this._fetchFromS3(docId, {sourceDocId, snapshotId})) {
|
|
|
|
throw new Error('Cannot fetch document');
|
|
|
|
}
|
|
|
|
// Make sure the new content is considered new.
|
|
|
|
// NOTE: fse.remove succeeds also when the file does not exist.
|
|
|
|
await fse.remove(this._getHashFile(this.getPath(docId)));
|
2020-10-30 16:53:23 +00:00
|
|
|
this.markAsChanged(docId, 'edit');
|
2022-05-16 17:41:12 +00:00
|
|
|
// Invalidate usage; it'll get re-computed the next time the document is opened.
|
|
|
|
this.scheduleUsageUpdate(docId, null, true);
|
2020-07-21 13:20:51 +00:00
|
|
|
} catch (err) {
|
2021-10-25 13:29:06 +00:00
|
|
|
this._log.error(docId, "problem replacing doc: %s", err);
|
2020-07-21 13:20:51 +00:00
|
|
|
await fse.move(tmpPath, docPath, {overwrite: true});
|
|
|
|
throw err;
|
|
|
|
} finally {
|
|
|
|
// NOTE: fse.remove succeeds also when the file does not exist.
|
|
|
|
await fse.remove(tmpPath);
|
|
|
|
}
|
|
|
|
// Flush the document immediately if it has been changed.
|
|
|
|
await this.flushDoc(docId);
|
|
|
|
}
|
|
|
|
|
|
|
|
// We don't deal with listing documents.
|
|
|
|
public async listDocs(): Promise<DocEntry[]> { return []; }
|
|
|
|
|
|
|
|
public async deleteDoc(docName: string, deletePermanently?: boolean): Promise<void> {
|
|
|
|
if (!deletePermanently) {
|
|
|
|
throw new Error("HostedStorageManager only implements permanent deletion in deleteDoc");
|
|
|
|
}
|
|
|
|
await this.closeDocument(docName);
|
|
|
|
if (!this._disableS3) {
|
|
|
|
await this._ext.remove(docName);
|
2020-10-30 16:53:23 +00:00
|
|
|
await this._extMeta.remove(docName);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
// NOTE: fse.remove succeeds also when the file does not exist.
|
|
|
|
await fse.remove(this.getPath(docName));
|
2020-10-30 16:53:23 +00:00
|
|
|
await fse.remove(this._getHashFile(this.getPath(docName), 'doc'));
|
|
|
|
await fse.remove(this._getHashFile(this.getPath(docName), 'meta'));
|
|
|
|
await fse.remove(this.getAssetPath(docName));
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// We don't implement document renames.
|
|
|
|
public async renameDoc(oldName: string, newName: string): Promise<void> {
|
|
|
|
throw new Error("HostedStorageManager does not implement renameDoc");
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* We handle backups by syncing the current version of the file as a new object version in S3,
|
2020-10-30 16:53:23 +00:00
|
|
|
* with the requested backupTag as metadata.
|
2020-07-21 13:20:51 +00:00
|
|
|
*/
|
|
|
|
public async makeBackup(docName: string, backupTag: string): Promise<string> {
|
2020-10-30 16:53:23 +00:00
|
|
|
if (this._labels.get(docName)) {
|
|
|
|
await this.flushDoc(docName);
|
|
|
|
}
|
|
|
|
this._labels.set(docName, backupTag);
|
|
|
|
this.markAsChanged(docName);
|
|
|
|
await this.flushDoc(docName);
|
|
|
|
// TODO: make an alternative way to store backups if operating without an external
|
|
|
|
// store.
|
|
|
|
return this._ext ?
|
|
|
|
(this._ext.url(docName) + ' (' + this._latestVersions.get(docName) + ')') :
|
|
|
|
'no-external-storage-enabled';
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Electron version only. Shows the given doc in the file explorer.
|
|
|
|
*/
|
|
|
|
public async showItemInFolder(docName: string): Promise<void> {
|
|
|
|
throw new Error("HostedStorageManager does not implement showItemInFolder");
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Close the storage manager. Make sure any pending changes reach S3 first.
|
|
|
|
*/
|
|
|
|
public async closeStorage(): Promise<void> {
|
2021-10-25 13:29:06 +00:00
|
|
|
await this._uploads.wait(() => this._log.info(null, 'waiting for closeStorage to finish'));
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
// Close metadata manager.
|
|
|
|
if (this._metadataManager) { await this._metadataManager.close(); }
|
|
|
|
|
|
|
|
// Finish up everything incoming. This is most relevant for tests.
|
|
|
|
// Wait for any downloads to wind up, since there's no easy way to cancel them.
|
|
|
|
while (this._prepareFiles.size > 0) { await delay(100); }
|
|
|
|
await Promise.all(this._localFiles.values());
|
|
|
|
|
|
|
|
this._closed = true;
|
|
|
|
if (this._ext) { await this._ext.close(); }
|
|
|
|
if (this._pruner) { await this._pruner.close(); }
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow storage manager to be used again - used in tests.
|
|
|
|
*/
|
|
|
|
public testReopenStorage() {
|
|
|
|
this._closed = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
public async testWaitForPrunes() {
|
|
|
|
if (this._pruner) { await this._pruner.wait(); }
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get direct access to the external store - used in tests.
|
|
|
|
*/
|
|
|
|
public testGetExternalStorage(): ExternalStorage {
|
|
|
|
return this._baseStore;
|
|
|
|
}
|
|
|
|
|
2021-01-12 15:48:40 +00:00
|
|
|
// return true if document and inventory is backed up to external store (if attached).
|
|
|
|
public isAllSaved(docName: string): boolean {
|
|
|
|
return !this._uploads.hasPendingOperation(docName) &&
|
|
|
|
(this._inventory ? this._inventory.isSaved(docName) : true);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// pick up the pace of pushing to s3, from leisurely to urgent.
|
|
|
|
public prepareToCloseStorage() {
|
|
|
|
if (this._pruner) {
|
2021-10-25 13:29:06 +00:00
|
|
|
this._pruner.close().catch(e => this._log.error(null, "pruning error %s", e));
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
this._uploads.expediteOperations();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Finalize any operations involving the named document.
|
|
|
|
*/
|
|
|
|
public async closeDocument(docName: string): Promise<void> {
|
|
|
|
if (this._localFiles.has(docName)) {
|
|
|
|
await this._localFiles.get(docName);
|
|
|
|
}
|
|
|
|
this._localFiles.delete(docName);
|
|
|
|
return this.flushDoc(docName);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Make sure document is backed up to s3.
|
|
|
|
*/
|
|
|
|
public async flushDoc(docName: string): Promise<void> {
|
2021-01-12 15:48:40 +00:00
|
|
|
while (!this.isAllSaved(docName)) {
|
2021-10-25 13:29:06 +00:00
|
|
|
this._log.info(docName, 'waiting for document to finish');
|
2020-07-21 13:20:51 +00:00
|
|
|
await this._uploads.expediteOperationAndWait(docName);
|
2021-01-12 15:48:40 +00:00
|
|
|
await this._inventory?.flush(docName);
|
|
|
|
if (!this.isAllSaved(docName)) {
|
2020-11-10 03:28:30 +00:00
|
|
|
// Throttle slightly in case this operation ends up looping excessively.
|
|
|
|
await delay(1000);
|
|
|
|
}
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This is called when a document may have been changed, via edits or migrations etc.
|
|
|
|
*/
|
2020-10-30 16:53:23 +00:00
|
|
|
public markAsChanged(docName: string, reason?: string): void {
|
|
|
|
const timestamp = new Date().toISOString();
|
|
|
|
this._timestamps.set(docName, timestamp);
|
|
|
|
try {
|
|
|
|
if (parseUrlId(docName).snapshotId) { return; }
|
|
|
|
if (this._localFiles.has(docName)) {
|
|
|
|
// Make sure the file is marked as locally present (it may be newly created).
|
|
|
|
this._localFiles.set(docName, Promise.resolve(true));
|
|
|
|
}
|
|
|
|
if (this._disableS3) { return; }
|
|
|
|
if (this._closed) { throw new Error("HostedStorageManager.markAsChanged called after closing"); }
|
|
|
|
this._uploads.addOperation(docName);
|
|
|
|
} finally {
|
|
|
|
if (reason === 'edit') {
|
|
|
|
this._markAsEdited(docName, timestamp);
|
|
|
|
}
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-16 17:41:12 +00:00
|
|
|
/**
|
|
|
|
* Schedule an update to a document's usage column.
|
|
|
|
*
|
|
|
|
* If `minimizeDelay` is true, HostedMetadataManager will attempt to
|
|
|
|
* minimize delays by scheduling the update to occur as soon as possible.
|
|
|
|
*/
|
|
|
|
public scheduleUsageUpdate(
|
|
|
|
docName: string,
|
|
|
|
docUsage: DocumentUsage|null,
|
|
|
|
minimizeDelay = false
|
|
|
|
): void {
|
|
|
|
const {forkId, snapshotId} = parseUrlId(docName);
|
|
|
|
if (!this._metadataManager || forkId || snapshotId) { return; }
|
|
|
|
|
|
|
|
this._metadataManager.scheduleUpdate(
|
|
|
|
docName,
|
|
|
|
{usage: docUsage},
|
|
|
|
minimizeDelay
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
/**
|
|
|
|
* Check if there is a pending change to be pushed to S3.
|
|
|
|
*/
|
|
|
|
public needsUpdate(): boolean {
|
|
|
|
return this._uploads.hasPendingOperations();
|
|
|
|
}
|
|
|
|
|
2020-12-18 17:37:16 +00:00
|
|
|
public async removeSnapshots(docName: string, snapshotIds: string[]): Promise<void> {
|
|
|
|
if (this._disableS3) { return; }
|
|
|
|
await this._pruner.prune(docName, snapshotIds);
|
|
|
|
}
|
|
|
|
|
|
|
|
public async getSnapshots(docName: string, skipMetadataCache?: boolean): Promise<DocSnapshots> {
|
2020-07-21 13:20:51 +00:00
|
|
|
if (this._disableS3) {
|
|
|
|
return {
|
|
|
|
snapshots: [{
|
|
|
|
snapshotId: 'current',
|
2020-10-30 16:53:23 +00:00
|
|
|
lastModified: new Date().toISOString(),
|
2020-07-21 13:20:51 +00:00
|
|
|
docId: docName,
|
|
|
|
}]
|
|
|
|
};
|
|
|
|
}
|
2020-12-18 17:37:16 +00:00
|
|
|
const versions = skipMetadataCache ?
|
|
|
|
await this._ext.versions(docName) :
|
|
|
|
await this._inventory.versions(docName, this._latestVersions.get(docName) || null);
|
2020-07-21 13:20:51 +00:00
|
|
|
const parts = parseUrlId(docName);
|
|
|
|
return {
|
|
|
|
snapshots: versions
|
2020-10-30 16:53:23 +00:00
|
|
|
.map(v => {
|
|
|
|
return {
|
|
|
|
...v,
|
|
|
|
docId: buildUrlId({...parts, snapshotId: v.snapshotId}),
|
|
|
|
};
|
|
|
|
})
|
2020-07-21 13:20:51 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-10-30 16:53:23 +00:00
|
|
|
/**
|
|
|
|
* This is called when a document was edited by the user.
|
|
|
|
*/
|
|
|
|
private _markAsEdited(docName: string, timestamp: string): void {
|
2022-05-16 17:41:12 +00:00
|
|
|
if (parseUrlId(docName).snapshotId || !this._metadataManager) { return; }
|
2020-10-30 16:53:23 +00:00
|
|
|
// Schedule a metadata update for the modified doc.
|
2022-05-16 17:41:12 +00:00
|
|
|
this._metadataManager.scheduleUpdate(docName, {updatedAt: timestamp});
|
2020-10-30 16:53:23 +00:00
|
|
|
}
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
/**
|
2021-01-12 15:48:40 +00:00
|
|
|
* Makes sure a document is assigned to this worker, adding an
|
|
|
|
* assignment if it has none. If the document is present in
|
|
|
|
* external storage, fetch it. Return true if the document was
|
|
|
|
* fetched.
|
|
|
|
*
|
|
|
|
* The document can optionally be copied from an alternative
|
|
|
|
* source (srcDocName). This is useful for forking.
|
|
|
|
*
|
|
|
|
* If srcDocName is 'new', checks for the document in external storage
|
|
|
|
* are skipped.
|
2020-07-21 13:20:51 +00:00
|
|
|
*/
|
2021-01-12 15:48:40 +00:00
|
|
|
private async _claimDocument(docName: string,
|
|
|
|
srcDocName?: string): Promise<boolean> {
|
2020-07-21 13:20:51 +00:00
|
|
|
// AsyncCreate.mapGetOrSet ensures we don't start multiple promises to talk to S3/Redis
|
|
|
|
// and that we clean up the failed key in case of failure.
|
|
|
|
return mapGetOrSet(this._localFiles, docName, async () => {
|
|
|
|
if (this._closed) { throw new Error("HostedStorageManager._ensureDocumentIsPresent called after closing"); }
|
|
|
|
checkValidDocId(docName);
|
|
|
|
|
2021-01-12 15:48:40 +00:00
|
|
|
const {trunkId, forkId, snapshotId} = parseUrlId(docName);
|
2020-07-21 13:20:51 +00:00
|
|
|
|
2021-01-12 15:48:40 +00:00
|
|
|
const canCreateFork = Boolean(srcDocName);
|
2020-07-21 13:20:51 +00:00
|
|
|
|
|
|
|
const docStatus = await this._docWorkerMap.getDocWorkerOrAssign(docName, this._docWorkerId);
|
|
|
|
if (!docStatus.isActive) { throw new Error(`Doc is not active on a DocWorker: ${docName}`); }
|
|
|
|
if (docStatus.docWorker.id !== this._docWorkerId) {
|
|
|
|
throw new Error(`Doc belongs to a different DocWorker (${docStatus.docWorker.id}): ${docName}`);
|
|
|
|
}
|
|
|
|
|
2021-01-12 15:48:40 +00:00
|
|
|
if (srcDocName === 'new') { return false; }
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
if (this._disableS3) {
|
|
|
|
// skip S3, just use file system
|
|
|
|
let present: boolean = await fse.pathExists(this.getPath(docName));
|
2021-01-12 15:48:40 +00:00
|
|
|
if ((forkId || snapshotId) && !present) {
|
2020-07-21 13:20:51 +00:00
|
|
|
if (!canCreateFork) { throw new Error(`Cannot create fork`); }
|
|
|
|
if (snapshotId && snapshotId !== 'current') {
|
|
|
|
throw new Error(`cannot find snapshot ${snapshotId} of ${docName}`);
|
|
|
|
}
|
|
|
|
if (await fse.pathExists(this.getPath(trunkId))) {
|
|
|
|
await fse.copy(this.getPath(trunkId), this.getPath(docName));
|
|
|
|
present = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return present;
|
|
|
|
}
|
|
|
|
|
|
|
|
const existsLocally = await fse.pathExists(this.getPath(docName));
|
|
|
|
if (existsLocally) {
|
|
|
|
if (!docStatus.docMD5 || docStatus.docMD5 === DELETED_TOKEN) {
|
2022-03-07 14:27:43 +00:00
|
|
|
// New doc appears to already exist, but may not exist in S3.
|
|
|
|
// Let's check.
|
|
|
|
const head = await this._ext.head(docName);
|
|
|
|
const lastLocalVersionSeen = this._latestVersions.get(docName);
|
|
|
|
if (head && lastLocalVersionSeen !== head.snapshotId) {
|
|
|
|
// Exists in S3, with a version not known to be latest seen
|
|
|
|
// by this worker - so wipe local version and defer to S3.
|
|
|
|
await this._wipeCache(docName);
|
|
|
|
} else {
|
|
|
|
// Go ahead and use local version.
|
|
|
|
return true;
|
|
|
|
}
|
2020-07-21 13:20:51 +00:00
|
|
|
} else {
|
|
|
|
// Doc exists locally and in S3 (according to redis).
|
|
|
|
// Make sure the checksum matches.
|
|
|
|
const checksum = await this._getHash(await this._prepareBackup(docName));
|
|
|
|
if (checksum === docStatus.docMD5) {
|
|
|
|
// Fine, accept the doc as existing on our file system.
|
|
|
|
return true;
|
|
|
|
} else {
|
2021-10-25 13:29:06 +00:00
|
|
|
this._log.info(docName, "Local hash does not match redis: %s vs %s", checksum, docStatus.docMD5);
|
2020-07-21 13:20:51 +00:00
|
|
|
// The file that exists locally does not match S3. But S3 is the canonical version.
|
|
|
|
// On the assumption that the local file is outdated, delete it.
|
|
|
|
// TODO: may want to be more careful in case the local file has modifications that
|
|
|
|
// simply never made it to S3 due to some kind of breakage.
|
2022-03-07 14:27:43 +00:00
|
|
|
await this._wipeCache(docName);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return this._fetchFromS3(docName, {
|
2021-01-12 15:48:40 +00:00
|
|
|
sourceDocId: srcDocName,
|
2020-07-21 13:20:51 +00:00
|
|
|
trunkId: forkId ? trunkId : undefined, snapshotId, canCreateFork
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2022-03-07 14:27:43 +00:00
|
|
|
/**
|
|
|
|
* Remove local version of a document, and state related to it.
|
|
|
|
*/
|
|
|
|
private async _wipeCache(docName: string) {
|
|
|
|
// NOTE: fse.remove succeeds also when the file does not exist.
|
|
|
|
await fse.remove(this.getPath(docName));
|
|
|
|
await fse.remove(this._getHashFile(this.getPath(docName), 'doc'));
|
|
|
|
await fse.remove(this._getHashFile(this.getPath(docName), 'meta'));
|
|
|
|
await this._inventory.clear(docName);
|
|
|
|
this._latestVersions.delete(docName);
|
|
|
|
this._latestMetaVersions.delete(docName);
|
|
|
|
}
|
|
|
|
|
2020-07-21 13:20:51 +00:00
|
|
|
/**
|
|
|
|
* Fetch a document from s3 and save it locally as destId.grist
|
|
|
|
*
|
|
|
|
* If the document is not present in s3:
|
|
|
|
* + If it has a trunk:
|
|
|
|
* - If we do not not have permission to create a fork, we throw an error
|
|
|
|
* - Else we fetch the document from the trunk instead
|
|
|
|
* + Otherwise return false
|
|
|
|
*
|
|
|
|
* Forks of fork will not spark joy at this time. An attempt to
|
|
|
|
* fork a fork will result in a new fork of the original trunk.
|
|
|
|
*/
|
|
|
|
private async _fetchFromS3(destId: string, options: {sourceDocId?: string,
|
|
|
|
trunkId?: string,
|
|
|
|
snapshotId?: string,
|
|
|
|
canCreateFork?: boolean}): Promise<boolean> {
|
|
|
|
const destIdWithoutSnapshot = buildUrlId({...parseUrlId(destId), snapshotId: undefined});
|
|
|
|
let sourceDocId = options.sourceDocId || destIdWithoutSnapshot;
|
|
|
|
if (!await this._ext.exists(destIdWithoutSnapshot)) {
|
|
|
|
if (!options.trunkId) { return false; } // Document not found in S3
|
|
|
|
// No such fork in s3 yet, try from trunk (if we are allowed to create the fork).
|
|
|
|
if (!options.canCreateFork) { throw new Error('Cannot create fork'); }
|
|
|
|
// The special NEW_DOCUMENT_CODE trunk means we should create an empty document.
|
|
|
|
if (options.trunkId === NEW_DOCUMENT_CODE) { return false; }
|
|
|
|
if (!await this._ext.exists(options.trunkId)) { throw new Error('Cannot find original'); }
|
|
|
|
sourceDocId = options.trunkId;
|
|
|
|
}
|
|
|
|
await this._ext.downloadTo(sourceDocId, destId, this.getPath(destId), options.snapshotId);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get a checksum for the given file (absolute path).
|
|
|
|
*/
|
|
|
|
private _getHash(srcPath: string): Promise<string> {
|
|
|
|
return checksumFile(srcPath, 'md5');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* We'll save hashes in a file with the suffix -hash.
|
|
|
|
*/
|
2020-10-30 16:53:23 +00:00
|
|
|
private _getHashFile(docPath: string, family: string = 'doc'): string {
|
|
|
|
return docPath + `-hash-${family}`;
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Makes a copy of a document to a file with the suffix -backup. The copy is
|
|
|
|
* made using Sqlite's backup API. The backup is made incrementally so the db
|
|
|
|
* is never locked for long by the backup. The backup process will survive
|
|
|
|
* transient locks on the db.
|
|
|
|
*/
|
|
|
|
private async _prepareBackup(docId: string, postfix: string = 'backup'): Promise<string> {
|
|
|
|
const docPath = this.getPath(docId);
|
|
|
|
const tmpPath = `${docPath}-${postfix}`;
|
2021-10-25 13:29:06 +00:00
|
|
|
return backupSqliteDatabase(docPath, tmpPath, undefined, postfix, {docId});
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Send a document to S3.
|
|
|
|
*/
|
|
|
|
private async _pushToS3(docId: string): Promise<void> {
|
|
|
|
let tmpPath: string|null = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
if (this._prepareFiles.has(docId)) {
|
|
|
|
throw new Error('too soon to consider pushing');
|
|
|
|
}
|
|
|
|
tmpPath = await this._prepareBackup(docId);
|
2020-10-30 16:53:23 +00:00
|
|
|
const docMetadata = await this._getDocMetadata(tmpPath);
|
|
|
|
const label = this._labels.get(docId);
|
|
|
|
const t = this._timestamps.get(docId) || new Date().toISOString();
|
|
|
|
this._labels.delete(docId);
|
|
|
|
// Keep metadata keys simple, short, and lowercase.
|
|
|
|
const metadata = {
|
|
|
|
...docMetadata,
|
|
|
|
...label && {label},
|
|
|
|
t,
|
|
|
|
};
|
|
|
|
const prevSnapshotId = this._latestVersions.get(docId) || null;
|
|
|
|
const newSnapshotId = await this._ext.upload(docId, tmpPath, metadata);
|
2022-04-22 18:07:14 +00:00
|
|
|
if (newSnapshotId === Unchanged) {
|
|
|
|
// Nothing uploaded because nothing changed
|
|
|
|
return;
|
|
|
|
}
|
2020-10-30 16:53:23 +00:00
|
|
|
if (!newSnapshotId) {
|
|
|
|
// This is unexpected.
|
|
|
|
throw new Error('No snapshotId allocated after upload');
|
|
|
|
}
|
|
|
|
const snapshot = {
|
|
|
|
lastModified: t,
|
|
|
|
snapshotId: newSnapshotId,
|
|
|
|
metadata
|
2021-05-23 17:43:11 +00:00
|
|
|
};
|
2020-10-30 16:53:23 +00:00
|
|
|
await this._inventory.add(docId, snapshot, prevSnapshotId);
|
2020-12-21 14:46:50 +00:00
|
|
|
await this._onInventoryChange(docId);
|
2020-07-21 13:20:51 +00:00
|
|
|
} finally {
|
|
|
|
// Clean up backup.
|
|
|
|
// NOTE: fse.remove succeeds also when the file does not exist.
|
|
|
|
if (tmpPath) { await fse.remove(tmpPath); }
|
|
|
|
}
|
|
|
|
}
|
2020-10-30 16:53:23 +00:00
|
|
|
|
2020-12-21 14:46:50 +00:00
|
|
|
// Make sure inventory change is followed up on.
|
|
|
|
private async _onInventoryChange(docId: string) {
|
|
|
|
const scheduled = this._pruner.requestPrune(docId);
|
|
|
|
if (!scheduled) {
|
|
|
|
await this._inventory.flush(docId);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-30 16:53:23 +00:00
|
|
|
// Extract actionHash, actionNum, and timezone from a document backup.
|
|
|
|
private async _getDocMetadata(fname: string): Promise<{[key: string]: string}> {
|
|
|
|
const result: Record<string, string> = {};
|
|
|
|
const db = await SQLiteDB.openDBRaw(fname, OpenMode.OPEN_READONLY);
|
|
|
|
try {
|
|
|
|
const actionQuery = await db.get('select actionHash, actionNum from _gristsys_ActionHistoryBranch as b ' +
|
|
|
|
'left join _gristsys_ActionHistory as h on h.id = b.actionRef ' +
|
|
|
|
'where b.name = ?', 'shared');
|
|
|
|
const h = actionQuery?.actionHash;
|
|
|
|
if (h) { result.h = h; }
|
|
|
|
const n = actionQuery?.actionNum;
|
|
|
|
if (n) { result.n = String(n); }
|
|
|
|
} catch (e) {
|
|
|
|
// Tolerate files that don't have _gristsys_* yet (although we don't need to).
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
const tzQuery = await db.get('select timezone from _grist_DocInfo where id = 1');
|
|
|
|
const tz = tzQuery?.timezone;
|
|
|
|
if (tz) { result.tz = tz; }
|
|
|
|
} catch (e) {
|
|
|
|
// Tolerate files that don't have _grist_DocInfo yet.
|
|
|
|
}
|
|
|
|
await db.close();
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wrap external storage in a checksum-aware decorator this will retry until
|
|
|
|
// consistency.
|
|
|
|
private _getChecksummedExternalStorage(family: string, core: ExternalStorage,
|
|
|
|
versions: Map<string, string>,
|
|
|
|
options: HostedStorageOptions) {
|
2020-11-10 03:28:30 +00:00
|
|
|
return new ChecksummedExternalStorage(family, core, {
|
2020-10-30 16:53:23 +00:00
|
|
|
maxRetries: 4,
|
|
|
|
initialDelayMs: options.secondsBeforeFirstRetry * 1000,
|
|
|
|
computeFileHash: this._getHash.bind(this),
|
|
|
|
sharedHash: {
|
|
|
|
save: async (key, checksum) => {
|
|
|
|
await this._docWorkerMap.updateChecksum(family, key, checksum);
|
|
|
|
},
|
|
|
|
load: async (key) => {
|
|
|
|
return await this._docWorkerMap.getChecksum(family, key);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
localHash: {
|
|
|
|
save: async (key, checksum) => {
|
|
|
|
const fname = this._getHashFile(this.getPath(key), family);
|
|
|
|
await fse.writeFile(fname, checksum);
|
|
|
|
},
|
|
|
|
load: async (key) => {
|
|
|
|
const fname = this._getHashFile(this.getPath(key), family);
|
|
|
|
if (!await fse.pathExists(fname)) { return null; }
|
|
|
|
return await fse.readFile(fname, 'utf8');
|
|
|
|
}
|
|
|
|
},
|
|
|
|
latestVersion: {
|
|
|
|
save: async (key, ver) => {
|
|
|
|
versions.set(key, ver);
|
|
|
|
},
|
|
|
|
load: async (key) => versions.get(key) || null
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Make a copy of a sqlite database safely and without locking it for long periods, using the
|
|
|
|
* sqlite backup api.
|
|
|
|
* @param src: database to copy
|
|
|
|
* @param dest: file to which we copy the database
|
|
|
|
* @param testProgress: a callback used for test purposes to monitor detailed timing of backup.
|
|
|
|
* @param label: a tag to add to log messages
|
|
|
|
* @return dest
|
|
|
|
*/
|
|
|
|
export async function backupSqliteDatabase(src: string, dest: string,
|
|
|
|
testProgress?: (e: BackupEvent) => void,
|
2021-10-25 13:29:06 +00:00
|
|
|
label?: string,
|
|
|
|
logMeta: object = {}): Promise<string> {
|
|
|
|
const _log = new LogMethods<null>('backupSqliteDatabase: ', () => logMeta);
|
|
|
|
_log.debug(null, `starting copy of ${src} (${label})`);
|
2020-07-21 13:20:51 +00:00
|
|
|
let db: sqlite3.DatabaseWithBackup|null = null;
|
|
|
|
let success: boolean = false;
|
2021-10-25 13:29:06 +00:00
|
|
|
let maxStepTimeMs: number = 0;
|
|
|
|
let numSteps: number = 0;
|
2020-07-21 13:20:51 +00:00
|
|
|
try {
|
|
|
|
// NOTE: fse.remove succeeds also when the file does not exist.
|
|
|
|
await fse.remove(dest); // Just in case some previous process terminated very badly.
|
|
|
|
// Sqlite will try to open any existing material at this
|
|
|
|
// path prior to overwriting it.
|
|
|
|
await fromCallback(cb => { db = new sqlite3.Database(dest, cb) as sqlite3.DatabaseWithBackup; });
|
|
|
|
// Turn off protections that can slow backup steps. If the app or OS
|
|
|
|
// crashes, the backup may be corrupt. In Grist use case, if app or OS
|
|
|
|
// crashes, no use will be made of backup, so we're OK.
|
|
|
|
// This sets flags matching the --async option to .backup in the sqlite3
|
|
|
|
// shell program: https://www.sqlite.org/src/info/7b6a605b1883dfcb
|
|
|
|
await fromCallback(cb => db!.exec("PRAGMA synchronous=OFF; PRAGMA journal_mode=OFF;", cb));
|
|
|
|
if (testProgress) { testProgress({action: 'open', phase: 'before'}); }
|
|
|
|
const backup: sqlite3.Backup = db!.backup(src, 'main', 'main', false);
|
|
|
|
if (testProgress) { testProgress({action: 'open', phase: 'after'}); }
|
|
|
|
let remaining: number = -1;
|
|
|
|
let prevError: Error|null = null;
|
|
|
|
let errorMsgTime: number = 0;
|
|
|
|
let restartMsgTime: number = 0;
|
|
|
|
for (;;) {
|
|
|
|
// For diagnostic purposes, issue a message if the backup appears to have been
|
|
|
|
// restarted by sqlite. The symptom of a restart we use is that the number of
|
|
|
|
// pages remaining in the backup increases rather than decreases. That number
|
|
|
|
// is reported by backup.remaining (after an initial period of where sqlite
|
|
|
|
// doesn't yet know how many pages there are and reports -1).
|
|
|
|
// So as not to spam the log if the user is making a burst of changes, we report
|
|
|
|
// this message at most once a second.
|
|
|
|
// See https://www.sqlite.org/c3ref/backup_finish.html and
|
|
|
|
// https://github.com/mapbox/node-sqlite3/pull/1116 for api details.
|
2021-10-25 13:29:06 +00:00
|
|
|
numSteps++;
|
|
|
|
const stepStart = Date.now();
|
|
|
|
if (remaining >= 0 && backup.remaining > remaining && stepStart - restartMsgTime > 1000) {
|
|
|
|
_log.info(null, `copy of ${src} (${label}) restarted`);
|
|
|
|
restartMsgTime = stepStart;
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
remaining = backup.remaining;
|
|
|
|
if (testProgress) { testProgress({action: 'step', phase: 'before'}); }
|
|
|
|
let isCompleted: boolean = false;
|
|
|
|
try {
|
|
|
|
isCompleted = Boolean(await fromCallback(cb => backup.step(PAGES_TO_BACKUP_PER_STEP, cb)));
|
|
|
|
} catch (err) {
|
|
|
|
if (String(err) !== String(prevError) || Date.now() - errorMsgTime > 1000) {
|
2021-10-25 13:29:06 +00:00
|
|
|
_log.info(null, `error (${src} ${label}): ${err}`);
|
2020-07-21 13:20:51 +00:00
|
|
|
errorMsgTime = Date.now();
|
|
|
|
}
|
|
|
|
prevError = err;
|
|
|
|
if (backup.failed) { throw new Error(`backupSqliteDatabase (${src} ${label}): internal copy failed`); }
|
2021-10-25 13:29:06 +00:00
|
|
|
} finally {
|
|
|
|
const stepTimeMs = Date.now() - stepStart;
|
|
|
|
if (stepTimeMs > maxStepTimeMs) { maxStepTimeMs = stepTimeMs; }
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
if (testProgress) { testProgress({action: 'step', phase: 'after'}); }
|
|
|
|
if (isCompleted) {
|
2021-10-25 13:29:06 +00:00
|
|
|
_log.info(null, `copy of ${src} (${label}) completed successfully`);
|
2020-07-21 13:20:51 +00:00
|
|
|
success = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
await delay(PAUSE_BETWEEN_BACKUP_STEPS_IN_MS);
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
if (testProgress) { testProgress({action: 'close', phase: 'before'}); }
|
|
|
|
try {
|
|
|
|
if (db) { await fromCallback(cb => db!.close(cb)); }
|
|
|
|
} catch (err) {
|
2021-10-25 13:29:06 +00:00
|
|
|
_log.debug(null, `problem stopping copy of ${src} (${label}): ${err}`);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
if (!success) {
|
|
|
|
// Something went wrong, remove backup if it was started.
|
|
|
|
try {
|
|
|
|
// NOTE: fse.remove succeeds also when the file does not exist.
|
|
|
|
await fse.remove(dest);
|
|
|
|
} catch (err) {
|
2021-10-25 13:29:06 +00:00
|
|
|
_log.debug(null, `problem removing copy of ${src} (${label}): ${err}`);
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (testProgress) { testProgress({action: 'close', phase: 'after'}); }
|
2021-10-25 13:29:06 +00:00
|
|
|
_log.rawLog('debug', null, `stopped copy of ${src} (${label})`, {maxStepTimeMs, numSteps});
|
2020-07-21 13:20:51 +00:00
|
|
|
}
|
|
|
|
return dest;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A summary of an event during a backup. Emitted for test purposes, to check timing.
|
|
|
|
*/
|
|
|
|
export interface BackupEvent {
|
|
|
|
action: 'step' | 'close' | 'open';
|
|
|
|
phase: 'before' | 'after';
|
|
|
|
}
|