(core) Enforcing data size limit

Summary:
Track 'data size' in ActiveDoc alongside row count. Measure it at most once every 5 minutes after each change as before, or after every change when it becomes high enough to matter.

A document is now considered to be approaching/exceeding 'the data limit' if either the data size or the row count is approaching/exceeding its own limit.

Unrelated: tweaked teamFreeFeatures.snapshotWindow based on Quip comments

Test Plan: Tested manually that data size is now logged after every change once it gets high enough, but only if the row limit isn't also too high. Still too early for automated tests.

Reviewers: georgegevoian

Reviewed By: georgegevoian

Differential Revision: https://phab.getgrist.com/D3341
This commit is contained in:
Alex Hall 2022-03-30 13:45:37 +02:00
parent 8269c33d01
commit 21b0ac3eff
5 changed files with 105 additions and 52 deletions

View File

@ -1,6 +1,6 @@
export interface SnapshotWindow { export interface SnapshotWindow {
count: number; count: number;
unit: 'month' | 'year'; unit: 'days' | 'month' | 'year';
} }
// A product is essentially a list of flags and limits that we may enforce/support. // A product is essentially a list of flags and limits that we may enforce/support.
@ -46,6 +46,7 @@ export interface Features {
// number of rows (total) in a single document. // number of rows (total) in a single document.
// Actual max for a document may be higher. // Actual max for a document may be higher.
baseMaxApiUnitsPerDocumentPerDay?: number; // Similar for api calls. baseMaxApiUnitsPerDocumentPerDay?: number; // Similar for api calls.
baseMaxDataSizePerDocument?: number; // Similar maximum for number of bytes of 'normal' data in a document
gracePeriodDays?: number; // Duration of the grace period in days, before entering delete-only mode gracePeriodDays?: number; // Duration of the grace period in days, before entering delete-only mode
} }

View File

@ -35,9 +35,10 @@ export const teamFreeFeatures: Features = {
maxSharesPerWorkspace: 0, // all workspace shares need to be org members. maxSharesPerWorkspace: 0, // all workspace shares need to be org members.
maxSharesPerDoc: 2, maxSharesPerDoc: 2,
maxDocsPerOrg: 20, maxDocsPerOrg: 20,
snapshotWindow: { count: 1, unit: 'month' }, snapshotWindow: { count: 30, unit: 'days' },
baseMaxRowsPerDocument: 5000, baseMaxRowsPerDocument: 5000,
baseMaxApiUnitsPerDocumentPerDay: 5000, baseMaxApiUnitsPerDocumentPerDay: 5000,
baseMaxDataSizePerDocument: 5000 * 2 * 1024, // 2KB per row
gracePeriodDays: 14, gracePeriodDays: 14,
}; };

View File

@ -115,6 +115,9 @@ const MEMORY_MEASUREMENT_INTERVAL_MS = 60 * 1000;
// A hook for dependency injection. // A hook for dependency injection.
export const Deps = {ACTIVEDOC_TIMEOUT}; export const Deps = {ACTIVEDOC_TIMEOUT};
// Ratio of the row/data size limit where we tell users that they're approaching the limit
const APPROACHING_LIMIT_RATIO = 0.9;
/** /**
* Represents an active document with the given name. The document isn't actually open until * Represents an active document with the given name. The document isn't actually open until
* either .loadDoc() or .createEmptyDoc() is called. * either .loadDoc() or .createEmptyDoc() is called.
@ -161,8 +164,10 @@ export class ActiveDoc extends EventEmitter {
// initialized. True on success. // initialized. True on success.
private _fullyLoaded: boolean = false; // Becomes true once all columns are loaded/computed. private _fullyLoaded: boolean = false; // Becomes true once all columns are loaded/computed.
private _lastMemoryMeasurement: number = 0; // Timestamp when memory was last measured. private _lastMemoryMeasurement: number = 0; // Timestamp when memory was last measured.
private _lastDataSizeMeasurement: number = 0; // Timestamp when dbstat data size was last measured.
private _fetchCache = new MapWithTTL<string, Promise<TableDataAction>>(DEFAULT_CACHE_TTL); private _fetchCache = new MapWithTTL<string, Promise<TableDataAction>>(DEFAULT_CACHE_TTL);
private _rowCount?: number; private _rowCount?: number;
private _dataSize?: number;
private _productFeatures?: Features; private _productFeatures?: Features;
private _gracePeriodStart: Date|null = null; private _gracePeriodStart: Date|null = null;
@ -219,15 +224,20 @@ export class ActiveDoc extends EventEmitter {
public get isShuttingDown(): boolean { return this._shuttingDown; } public get isShuttingDown(): boolean { return this._shuttingDown; }
public async getRowCount(docSession: OptDocSession): Promise<number | undefined> { public get rowLimitRatio() {
if (await this._granularAccess.canReadEverything(docSession)) { return this._rowLimit && this._rowCount ? this._rowCount / this._rowLimit : 0;
return this._rowCount;
}
} }
public async getDataLimitStatus(): Promise<DataLimitStatus> { public get dataSizeLimitRatio() {
if (this._rowLimit && this._rowCount) { return this._dataSizeLimit && this._dataSize ? this._dataSize / this._dataSizeLimit : 0;
const ratio = this._rowCount / this._rowLimit; }
public get dataLimitRatio() {
return Math.max(this.rowLimitRatio, this.dataSizeLimitRatio);
}
public get dataLimitStatus(): DataLimitStatus {
const ratio = this.dataLimitRatio;
if (ratio > 1) { if (ratio > 1) {
const start = this._gracePeriodStart; const start = this._gracePeriodStart;
const days = this._productFeatures?.gracePeriodDays; const days = this._productFeatures?.gracePeriodDays;
@ -236,13 +246,23 @@ export class ActiveDoc extends EventEmitter {
} else { } else {
return 'gracePeriod'; return 'gracePeriod';
} }
} else if (ratio > 0.9) { } else if (ratio > APPROACHING_LIMIT_RATIO) {
return 'approachingLimit'; return 'approachingLimit';
} }
}
return null; return null;
} }
public async getRowCount(docSession: OptDocSession): Promise<number | undefined> {
if (await this._granularAccess.canReadEverything(docSession)) {
return this._rowCount;
}
}
public async getDataLimitStatus(): Promise<DataLimitStatus> {
// TODO filter based on session permissions
return this.dataLimitStatus;
}
public async getUserOverride(docSession: OptDocSession) { public async getUserOverride(docSession: OptDocSession) {
return this._granularAccess.getUserOverride(docSession); return this._granularAccess.getUserOverride(docSession);
} }
@ -926,7 +946,7 @@ export class ActiveDoc extends EventEmitter {
await this.waitForInitialization(); await this.waitForInitialization();
if ( if (
await this.getDataLimitStatus() === "deleteOnly" && this.dataLimitStatus === "deleteOnly" &&
!actions.every(action => [ !actions.every(action => [
'RemoveTable', 'RemoveColumn', 'RemoveRecord', 'BulkRemoveRecord', 'RemoveTable', 'RemoveColumn', 'RemoveRecord', 'BulkRemoveRecord',
'RemoveViewSection', 'RemoveView', 'ApplyUndoActions', 'RemoveViewSection', 'RemoveView', 'ApplyUndoActions',
@ -1254,11 +1274,6 @@ export class ActiveDoc extends EventEmitter {
} }
const user = docSession ? await this._granularAccess.getCachedUser(docSession) : undefined; const user = docSession ? await this._granularAccess.getCachedUser(docSession) : undefined;
sandboxActionBundle = await this._rawPyCall('apply_user_actions', normalActions, user?.toJSON()); sandboxActionBundle = await this._rawPyCall('apply_user_actions', normalActions, user?.toJSON());
log.rawInfo('Sandbox row count', {
...this.getLogMeta(docSession),
rowCount: sandboxActionBundle.rowCount
});
await this._updateRowCount(sandboxActionBundle.rowCount);
await this._reportDataEngineMemory(); await this._reportDataEngineMemory();
} else { } else {
// Create default SandboxActionBundle to use if the data engine is not called. // Create default SandboxActionBundle to use if the data engine is not called.
@ -1337,6 +1352,39 @@ export class ActiveDoc extends EventEmitter {
return this._granularAccess; return this._granularAccess;
} }
public async updateRowCount(rowCount: number, docSession: OptDocSession | null) {
this._rowCount = rowCount;
log.rawInfo('Sandbox row count', {...this.getLogMeta(docSession), rowCount});
await this._checkDataLimitRatio();
// Calculating data size is potentially expensive, so by default measure it at most once every 5 minutes.
// Measure it after every change if the user is currently being warned specifically about
// approaching or exceeding the data size limit but not the row count limit,
// because we don't need to warn about both limits at the same time.
let checkDataSizePeriod = 5 * 60;
if (
this.dataSizeLimitRatio > APPROACHING_LIMIT_RATIO && this.rowLimitRatio <= APPROACHING_LIMIT_RATIO ||
this.dataSizeLimitRatio > 1.0 && this.rowLimitRatio <= 1.0
) {
checkDataSizePeriod = 0;
}
const now = Date.now();
if (now - this._lastDataSizeMeasurement > checkDataSizePeriod * 1000) {
this._lastDataSizeMeasurement = now;
// When the data size isn't critically high so we're only measuring it infrequently,
// do it in the background so we don't delay responding to the client.
// When it's being measured after every change, wait for it to finish to avoid race conditions
// from multiple measurements and updates happening concurrently.
if (checkDataSizePeriod === 0) {
await this._checkDataSizeLimitRatio(docSession);
} else {
this._checkDataSizeLimitRatio(docSession).catch(e => console.error(e));
}
}
}
/** /**
* Loads an open document from DocStorage. Returns a list of the tables it contains. * Loads an open document from DocStorage. Returns a list of the tables it contains.
*/ */
@ -1490,21 +1538,33 @@ export class ActiveDoc extends EventEmitter {
return this._productFeatures?.baseMaxRowsPerDocument; return this._productFeatures?.baseMaxRowsPerDocument;
} }
private get _dataSizeLimit(): number | undefined {
return this._productFeatures?.baseMaxDataSizePerDocument;
}
private async _updateGracePeriodStart(gracePeriodStart: Date | null) { private async _updateGracePeriodStart(gracePeriodStart: Date | null) {
this._gracePeriodStart = gracePeriodStart; this._gracePeriodStart = gracePeriodStart;
await this.getHomeDbManager()?.setDocGracePeriodStart(this.docName, gracePeriodStart); await this.getHomeDbManager()?.setDocGracePeriodStart(this.docName, gracePeriodStart);
} }
private async _updateRowCount(rowCount: number) { private async _checkDataLimitRatio() {
this._rowCount = rowCount; const exceedingDataLimit = this.dataLimitRatio > 1;
const exceedingRowLimit = this._rowLimit && rowCount > this._rowLimit; if (exceedingDataLimit && !this._gracePeriodStart) {
if (exceedingRowLimit && !this._gracePeriodStart) {
await this._updateGracePeriodStart(new Date()); await this._updateGracePeriodStart(new Date());
} else if (!exceedingRowLimit && this._gracePeriodStart) { } else if (!exceedingDataLimit && this._gracePeriodStart) {
await this._updateGracePeriodStart(null); await this._updateGracePeriodStart(null);
} }
} }
private async _checkDataSizeLimitRatio(docSession: OptDocSession | null) {
const start = Date.now();
const dataSize = await this.docStorage.getDataSize();
const timeToMeasure = Date.now() - start;
this._dataSize = dataSize;
log.rawInfo('Data size from dbstat...', {...this.getLogMeta(docSession), dataSize, timeToMeasure});
await this._checkDataLimitRatio();
}
/** /**
* Prepares a single attachment by adding it DocStorage and returns a UserAction to apply. * Prepares a single attachment by adding it DocStorage and returns a UserAction to apply.
*/ */

View File

@ -624,9 +624,6 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
// tables (obtained from auto-generated schema.js). // tables (obtained from auto-generated schema.js).
private _docSchema: {[tableId: string]: {[colId: string]: string}}; private _docSchema: {[tableId: string]: {[colId: string]: string}};
// The last time _logDataSize ran fully
private _lastLoggedDataSize: number = Date.now();
public constructor(public storageManager: IDocStorageManager, public docName: string) { public constructor(public storageManager: IDocStorageManager, public docName: string) {
this.docPath = this.storageManager.getPath(docName); this.docPath = this.storageManager.getPath(docName);
this._db = null; this._db = null;
@ -918,7 +915,6 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
}); });
} }
}); });
this._logDataSize().catch(e => log.error(`Error in _logDataSize: ${e}`));
} }
/** /**
@ -1329,6 +1325,18 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
} }
} }
public async getDataSize(): Promise<number> {
const result = await this.get(`
SELECT SUM(pgsize - unused) AS totalSize
FROM dbstat
WHERE NOT (
name LIKE 'sqlite_%' OR
name LIKE '_gristsys_%'
);
`);
return result!.totalSize;
}
private async _markAsChanged<T>(promise: Promise<T>): Promise<T> { private async _markAsChanged<T>(promise: Promise<T>): Promise<T> {
try { try {
return await promise; return await promise;
@ -1546,25 +1554,6 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
`${joinClauses} ${whereClause} ${limitClause}`; `${joinClauses} ${whereClause} ${limitClause}`;
return sql; return sql;
} }
private async _logDataSize() {
// To reduce overhead, don't query and log data size more than once in 5 minutes
const now = Date.now();
if (now - this._lastLoggedDataSize < 5 * 60 * 1000) {
return;
}
this._lastLoggedDataSize = now;
const result = await this.get(`
SELECT SUM(pgsize - unused) AS totalSize
FROM dbstat
WHERE NOT (
name LIKE 'sqlite_%' OR
name LIKE '_gristsys_%'
);
`);
log.rawInfo("Data size from dbstat...", {docId: this.docName, dataSize: result!.totalSize});
}
} }
interface RebuildResult { interface RebuildResult {

View File

@ -300,6 +300,8 @@ export class Sharing {
const actionSummary = await this._activeDoc.handleTriggers(localActionBundle); const actionSummary = await this._activeDoc.handleTriggers(localActionBundle);
await this._activeDoc.updateRowCount(sandboxActionBundle.rowCount, docSession);
// Broadcast the action to connected browsers. // Broadcast the action to connected browsers.
const actionGroup = asActionGroup(this._actionHistory, localActionBundle, { const actionGroup = asActionGroup(this._actionHistory, localActionBundle, {
clientId: client?.clientId, clientId: client?.clientId,