log periodic per-document statistics about snapshot generation

This is to facilitate alerting to detect if snapshot generation were to
stall for a document.
This commit is contained in:
Paul Fitzpatrick 2024-07-01 10:24:16 -04:00
parent 3082fe0f01
commit 4815a007ed
6 changed files with 137 additions and 30 deletions

View File

@ -0,0 +1,27 @@
import moment from 'moment';
/**
* Output an ISO8601 format datetime string, with timezone.
* Any string fed in without timezone is expected to be in UTC.
*
* When connected to postgres, dates will be extracted as Date objects,
* with timezone information. The normalization done here is not
* really needed in this case.
*
* Timestamps in SQLite are stored as UTC, and read as strings
* (without timezone information). The normalization here is
* pretty important in this case.
*/
export function normalizedDateTimeString(dateTime: any): string {
if (!dateTime) { return dateTime; }
if (dateTime instanceof Date) {
return moment(dateTime).toISOString();
}
if (typeof dateTime === 'string' || typeof dateTime === 'number') {
// When SQLite returns a string, it will be in UTC.
// Need to make sure it actually have timezone info in it
// (will not by default).
return moment.utc(dateTime).toISOString();
}
throw new Error(`normalizedDateTimeString cannot handle ${dateTime}`);
}

View File

@ -1,6 +1,7 @@
import { ApiError } from 'app/common/ApiError'; import { ApiError } from 'app/common/ApiError';
import { delay } from 'app/common/delay'; import { delay } from 'app/common/delay';
import { buildUrlId } from 'app/common/gristUrls'; import { buildUrlId } from 'app/common/gristUrls';
import { normalizedDateTimeString } from 'app/common/normalizedDateTimeString';
import { BillingAccount } from 'app/gen-server/entity/BillingAccount'; import { BillingAccount } from 'app/gen-server/entity/BillingAccount';
import { Document } from 'app/gen-server/entity/Document'; import { Document } from 'app/gen-server/entity/Document';
import { Organization } from 'app/gen-server/entity/Organization'; import { Organization } from 'app/gen-server/entity/Organization';
@ -16,7 +17,6 @@ import log from 'app/server/lib/log';
import { IPermitStore } from 'app/server/lib/Permit'; import { IPermitStore } from 'app/server/lib/Permit';
import { optStringParam, stringParam } from 'app/server/lib/requestUtils'; import { optStringParam, stringParam } from 'app/server/lib/requestUtils';
import * as express from 'express'; import * as express from 'express';
import moment from 'moment';
import fetch from 'node-fetch'; import fetch from 'node-fetch';
import * as Fetch from 'node-fetch'; import * as Fetch from 'node-fetch';
import { EntityManager } from 'typeorm'; import { EntityManager } from 'typeorm';
@ -416,32 +416,6 @@ export class Housekeeper {
} }
} }
/**
* Output an ISO8601 format datetime string, with timezone.
* Any string fed in without timezone is expected to be in UTC.
*
* When connected to postgres, dates will be extracted as Date objects,
* with timezone information. The normalization done here is not
* really needed in this case.
*
* Timestamps in SQLite are stored as UTC, and read as strings
* (without timezone information). The normalization here is
* pretty important in this case.
*/
function normalizedDateTimeString(dateTime: any): string {
if (!dateTime) { return dateTime; }
if (dateTime instanceof Date) {
return moment(dateTime).toISOString();
}
if (typeof dateTime === 'string') {
// When SQLite returns a string, it will be in UTC.
// Need to make sure it actually have timezone info in it
// (will not by default).
return moment.utc(dateTime).toISOString();
}
throw new Error(`normalizedDateTimeString cannot handle ${dateTime}`);
}
/** /**
* Call callback(item) for each item on the list, sleeping periodically to allow other works to * Call callback(item) for each item on the list, sleeping periodically to allow other works to
* happen. Any time work takes more than SYNC_WORK_LIMIT_MS, will sleep for SYNC_WORK_BREAK_MS. * happen. Any time work takes more than SYNC_WORK_LIMIT_MS, will sleep for SYNC_WORK_BREAK_MS.

View File

@ -69,6 +69,7 @@ import {commonUrls, parseUrlId} from 'app/common/gristUrls';
import {byteString, countIf, retryOnce, safeJsonParse, timeoutReached} from 'app/common/gutil'; import {byteString, countIf, retryOnce, safeJsonParse, timeoutReached} from 'app/common/gutil';
import {InactivityTimer} from 'app/common/InactivityTimer'; import {InactivityTimer} from 'app/common/InactivityTimer';
import {Interval} from 'app/common/Interval'; import {Interval} from 'app/common/Interval';
import {normalizedDateTimeString} from 'app/common/normalizedDateTimeString';
import { import {
compilePredicateFormula, compilePredicateFormula,
getPredicateFormulaProperties, getPredicateFormulaProperties,
@ -2496,6 +2497,23 @@ export class ActiveDoc extends EventEmitter {
} }
} }
private _logSnapshotProgress(docSession: OptDocSession) {
const snapshotProgress = this._docManager.storageManager.getSnapshotProgress(this.docName);
const lastWindowTime = (snapshotProgress.lastWindowStartedAt &&
snapshotProgress.lastWindowDoneAt &&
snapshotProgress.lastWindowDoneAt > snapshotProgress.lastWindowStartedAt) ?
snapshotProgress.lastWindowDoneAt : Date.now();
const delay = snapshotProgress.lastWindowStartedAt ?
lastWindowTime - snapshotProgress.lastWindowStartedAt : null;
this._log.debug(docSession, 'snapshot status', {
...snapshotProgress,
lastChangeAt: normalizedDateTimeString(snapshotProgress.lastChangeAt),
lastWindowStartedAt: normalizedDateTimeString(snapshotProgress.lastWindowStartedAt),
lastWindowDoneAt: normalizedDateTimeString(snapshotProgress.lastWindowDoneAt),
delay,
});
}
private _logDocMetrics(docSession: OptDocSession, triggeredBy: 'docOpen' | 'interval'| 'docClose') { private _logDocMetrics(docSession: OptDocSession, triggeredBy: 'docOpen' | 'interval'| 'docClose') {
this.logTelemetryEvent(docSession, 'documentUsage', { this.logTelemetryEvent(docSession, 'documentUsage', {
limited: { limited: {
@ -2513,6 +2531,9 @@ export class ActiveDoc extends EventEmitter {
...this._getCustomWidgetMetrics(), ...this._getCustomWidgetMetrics(),
}, },
}); });
// Log progress on making snapshots periodically, to catch anything
// excessively slow.
this._logSnapshotProgress(docSession);
} }
private _getAccessRuleMetrics() { private _getAccessRuleMetrics() {

View File

@ -11,7 +11,7 @@ import * as gutil from 'app/common/gutil';
import {Comm} from 'app/server/lib/Comm'; import {Comm} from 'app/server/lib/Comm';
import * as docUtils from 'app/server/lib/docUtils'; import * as docUtils from 'app/server/lib/docUtils';
import {GristServer} from 'app/server/lib/GristServer'; import {GristServer} from 'app/server/lib/GristServer';
import {IDocStorageManager} from 'app/server/lib/IDocStorageManager'; import {IDocStorageManager, SnapshotProgress} from 'app/server/lib/IDocStorageManager';
import {IShell} from 'app/server/lib/IShell'; import {IShell} from 'app/server/lib/IShell';
import log from 'app/server/lib/log'; import log from 'app/server/lib/log';
import uuidv4 from "uuid/v4"; import uuidv4 from "uuid/v4";
@ -257,6 +257,10 @@ export class DocStorageManager implements IDocStorageManager {
throw new Error('removeSnapshots not implemented'); throw new Error('removeSnapshots not implemented');
} }
public getSnapshotProgress(): SnapshotProgress {
throw new Error('getSnapshotProgress not implemented');
}
public async replace(docName: string, options: any): Promise<void> { public async replace(docName: string, options: any): Promise<void> {
throw new Error('replacement not implemented'); throw new Error('replacement not implemented');
} }

View File

@ -15,7 +15,7 @@ import {IDocWorkerMap} from 'app/server/lib/DocWorkerMap';
import {ChecksummedExternalStorage, DELETED_TOKEN, ExternalStorage, Unchanged} from 'app/server/lib/ExternalStorage'; import {ChecksummedExternalStorage, DELETED_TOKEN, ExternalStorage, Unchanged} from 'app/server/lib/ExternalStorage';
import {HostedMetadataManager} from 'app/server/lib/HostedMetadataManager'; import {HostedMetadataManager} from 'app/server/lib/HostedMetadataManager';
import {ICreate} from 'app/server/lib/ICreate'; import {ICreate} from 'app/server/lib/ICreate';
import {IDocStorageManager} from 'app/server/lib/IDocStorageManager'; import {IDocStorageManager, SnapshotProgress} from 'app/server/lib/IDocStorageManager';
import {LogMethods} from "app/server/lib/LogMethods"; import {LogMethods} from "app/server/lib/LogMethods";
import {fromCallback} from 'app/server/lib/serverUtils'; import {fromCallback} from 'app/server/lib/serverUtils';
import * as fse from 'fs-extra'; import * as fse from 'fs-extra';
@ -94,6 +94,9 @@ export class HostedStorageManager implements IDocStorageManager {
// Time at which document was last changed. // Time at which document was last changed.
private _timestamps = new Map<string, string>(); private _timestamps = new Map<string, string>();
// Statistics related to snapshot generation.
private _snapshotProgress = new Map<string, SnapshotProgress>();
// Access external storage. // Access external storage.
private _ext: ChecksummedExternalStorage; private _ext: ChecksummedExternalStorage;
private _extMeta: ChecksummedExternalStorage; private _extMeta: ChecksummedExternalStorage;
@ -223,6 +226,25 @@ export class HostedStorageManager implements IDocStorageManager {
return path.basename(altDocName, '.grist'); return path.basename(altDocName, '.grist');
} }
/**
* Read some statistics related to generating snapshots.
*/
public getSnapshotProgress(docName: string): SnapshotProgress {
let snapshotProgress = this._snapshotProgress.get(docName);
if (!snapshotProgress) {
snapshotProgress = {
pushes: 0,
skippedPushes: 0,
errors: 0,
changes: 0,
windowsStarted: 0,
windowsDone: 0,
};
this._snapshotProgress.set(docName, snapshotProgress);
}
return snapshotProgress;
}
/** /**
* Prepares a document for use locally. Here we sync the doc from S3 to the local filesystem. * Prepares a document for use locally. Here we sync the doc from S3 to the local filesystem.
* Returns whether the document is new (needs to be created). * Returns whether the document is new (needs to be created).
@ -476,7 +498,11 @@ export class HostedStorageManager implements IDocStorageManager {
* This is called when a document may have been changed, via edits or migrations etc. * This is called when a document may have been changed, via edits or migrations etc.
*/ */
public markAsChanged(docName: string, reason?: string): void { public markAsChanged(docName: string, reason?: string): void {
const timestamp = new Date().toISOString(); const now = new Date();
const snapshotProgress = this.getSnapshotProgress(docName);
snapshotProgress.lastChangeAt = now.getTime();
snapshotProgress.changes++;
const timestamp = now.toISOString();
this._timestamps.set(docName, timestamp); this._timestamps.set(docName, timestamp);
try { try {
if (parseUrlId(docName).snapshotId) { return; } if (parseUrlId(docName).snapshotId) { return; }
@ -486,6 +512,10 @@ export class HostedStorageManager implements IDocStorageManager {
} }
if (this._disableS3) { return; } if (this._disableS3) { return; }
if (this._closed) { throw new Error("HostedStorageManager.markAsChanged called after closing"); } if (this._closed) { throw new Error("HostedStorageManager.markAsChanged called after closing"); }
if (!this._uploads.hasPendingOperation(docName)) {
snapshotProgress.lastWindowStartedAt = now.getTime();
snapshotProgress.windowsStarted++;
}
this._uploads.addOperation(docName); this._uploads.addOperation(docName);
} finally { } finally {
if (reason === 'edit') { if (reason === 'edit') {
@ -729,6 +759,7 @@ export class HostedStorageManager implements IDocStorageManager {
private async _pushToS3(docId: string): Promise<void> { private async _pushToS3(docId: string): Promise<void> {
let tmpPath: string|null = null; let tmpPath: string|null = null;
const snapshotProgress = this.getSnapshotProgress(docId);
try { try {
if (this._prepareFiles.has(docId)) { if (this._prepareFiles.has(docId)) {
throw new Error('too soon to consider pushing'); throw new Error('too soon to consider pushing');
@ -748,14 +779,18 @@ export class HostedStorageManager implements IDocStorageManager {
await this._inventory.uploadAndAdd(docId, async () => { await this._inventory.uploadAndAdd(docId, async () => {
const prevSnapshotId = this._latestVersions.get(docId) || null; const prevSnapshotId = this._latestVersions.get(docId) || null;
const newSnapshotId = await this._ext.upload(docId, tmpPath as string, metadata); const newSnapshotId = await this._ext.upload(docId, tmpPath as string, metadata);
snapshotProgress.lastWindowDoneAt = Date.now();
snapshotProgress.windowsDone++;
if (newSnapshotId === Unchanged) { if (newSnapshotId === Unchanged) {
// Nothing uploaded because nothing changed // Nothing uploaded because nothing changed
snapshotProgress.skippedPushes++;
return { prevSnapshotId }; return { prevSnapshotId };
} }
if (!newSnapshotId) { if (!newSnapshotId) {
// This is unexpected. // This is unexpected.
throw new Error('No snapshotId allocated after upload'); throw new Error('No snapshotId allocated after upload');
} }
snapshotProgress.pushes++;
const snapshot = { const snapshot = {
lastModified: t, lastModified: t,
snapshotId: newSnapshotId, snapshotId: newSnapshotId,
@ -767,6 +802,10 @@ export class HostedStorageManager implements IDocStorageManager {
if (changeMade) { if (changeMade) {
await this._onInventoryChange(docId); await this._onInventoryChange(docId);
} }
} catch (e) {
snapshotProgress.errors++;
// Snapshot window completion time deliberately not set.
throw e;
} finally { } finally {
// Clean up backup. // Clean up backup.
// NOTE: fse.remove succeeds also when the file does not exist. // NOTE: fse.remove succeeds also when the file does not exist.

View File

@ -36,6 +36,8 @@ export interface IDocStorageManager {
// Metadata may not be returned in this case. // Metadata may not be returned in this case.
getSnapshots(docName: string, skipMetadataCache?: boolean): Promise<DocSnapshots>; getSnapshots(docName: string, skipMetadataCache?: boolean): Promise<DocSnapshots>;
removeSnapshots(docName: string, snapshotIds: string[]): Promise<void>; removeSnapshots(docName: string, snapshotIds: string[]): Promise<void>;
// Get information about how snapshot generation is going.
getSnapshotProgress(docName: string): SnapshotProgress;
replace(docName: string, options: DocReplacementOptions): Promise<void>; replace(docName: string, options: DocReplacementOptions): Promise<void>;
} }
@ -66,5 +68,45 @@ export class TrivialDocStorageManager implements IDocStorageManager {
public async flushDoc() {} public async flushDoc() {}
public async getSnapshots(): Promise<never> { throw new Error('no'); } public async getSnapshots(): Promise<never> { throw new Error('no'); }
public async removeSnapshots(): Promise<never> { throw new Error('no'); } public async removeSnapshots(): Promise<never> { throw new Error('no'); }
public getSnapshotProgress(): SnapshotProgress { throw new Error('no'); }
public async replace(): Promise<never> { throw new Error('no'); } public async replace(): Promise<never> { throw new Error('no'); }
} }
/**
* Some summary information about how snapshot generation is going.
* Any times are in ms.
* All information is within the lifetime of a doc worker, not global.
*/
export interface SnapshotProgress {
// The last time the document was marked as having changed.
lastChangeAt?: number;
// The last time a save window started for the document (checking to see
// if it needs to be pushed, and pushing it if so, possibly waiting
// quite some time to bundle any other changes).
lastWindowStartedAt?: number;
// The last time the document was either pushed or determined to not
// actually need to be pushed, after having been marked as changed.
lastWindowDoneAt?: number;
// Number of times the document was pushed.
pushes: number;
// Number of times the document was not pushed because no change found.
skippedPushes: number;
// Number of times there was an error trying to push.
errors: number;
// Number of times the document was marked as changed.
// Will generally be a lot greater than saves.
changes: number;
// Number of times a save window was started.
windowsStarted: number;
// Number of times a save window was completed.
windowsDone: number;
}