(core) Limit total attachment file size per document

Summary:
- Add a new parameter `Features.baseMaxAttachmentsBytesPerDocument` and set it to 1GB for the free team product.
- Add a method to DocStorage to calculate the total size of existing and used attachments.
- Add a migration to DocStorage adding an index to make the query in the above method fast.
- Check in ActiveDoc if uploading attachment(s) would exceed the product limit on that document.

Test Plan: Added test in `limits.ts` testing enforcement of the attachment limit.

Reviewers: georgegevoian

Reviewed By: georgegevoian

Differential Revision: https://phab.getgrist.com/D3374
pull/203/head
Alex Hall 2 years ago
parent c1af5a9803
commit 64a5c79dbc

@ -47,6 +47,8 @@ export interface Features {
// Actual max for a document may be higher.
baseMaxApiUnitsPerDocumentPerDay?: number; // Similar for api calls.
baseMaxDataSizePerDocument?: number; // Similar maximum for number of bytes of 'normal' data in a document
baseMaxAttachmentsBytesPerDocument?: number; // Similar maximum for total number of bytes used
// for attached files in a document
gracePeriodDays?: number; // Duration of the grace period in days, before entering delete-only mode
}

@ -394,6 +394,9 @@ export interface DocAPI {
* @param title Name of the spreadsheet that will be created (should use a Grist document's title)
*/
sendToDrive(code: string, title: string): Promise<{url: string}>;
// Upload a single attachment and return the resulting metadata row ID.
// The arguments are passed to FormData.append.
uploadAttachment(value: string | Blob, filename?: string): Promise<number>;
}
// Operations that are supported by a doc worker.
@ -869,6 +872,20 @@ export class DocAPIImpl extends BaseAPI implements DocAPI {
return this.requestJson(url.href);
}
public async uploadAttachment(value: string | Blob, filename?: string): Promise<number> {
const formData = this.newFormData();
formData.append('upload', value, filename);
const response = await this.requestAxios(`${this._url}/attachments`, {
method: 'POST',
data: formData,
// On browser, it is important not to set Content-Type so that the browser takes care
// of setting HTTP headers appropriately. Outside browser, requestAxios has logic
// for setting the HTTP headers.
headers: {...this.defaultHeadersWithoutContentType()},
});
return response.data[0];
}
private _getRecords(tableId: string, endpoint: 'data' | 'records', options?: GetRowsParams): Promise<any> {
const url = new URL(`${this._url}/tables/${tableId}/${endpoint}`);
if (options?.filters) {

@ -39,6 +39,7 @@ export const teamFreeFeatures: Features = {
baseMaxRowsPerDocument: 5000,
baseMaxApiUnitsPerDocumentPerDay: 5000,
baseMaxDataSizePerDocument: 5000 * 2 * 1024, // 2KB per row
baseMaxAttachmentsBytesPerDocument: 1 * 1024 * 1024 * 1024, // 1GB
gracePeriodDays: 14,
};

@ -96,6 +96,7 @@ import {findOrAddAllEnvelope, Sharing} from './Sharing';
import cloneDeep = require('lodash/cloneDeep');
import flatten = require('lodash/flatten');
import remove = require('lodash/remove');
import sum = require('lodash/sum');
import without = require('lodash/without');
import zipObject = require('lodash/zipObject');
@ -650,6 +651,7 @@ export class ActiveDoc extends EventEmitter {
const userId = getDocSessionUserId(docSession);
const upload: UploadInfo = globalUploadSet.getUploadInfo(uploadId, this.makeAccessId(userId));
try {
await this._checkDocAttachmentsLimit(upload);
const userActions: UserAction[] = await Promise.all(
upload.files.map(file => this._prepAttachment(docSession, file)));
const result = await this.applyUserActions(docSession, userActions);
@ -1307,11 +1309,12 @@ export class ActiveDoc extends EventEmitter {
* 'soft deleting' them so that they get cleaned up automatically from _gristsys_Files after enough time has passed.
* Set timeDeleted to null on used attachments that were previously soft deleted,
* so that undo can 'undelete' attachments.
* Returns true if any changes were made, i.e. some row(s) of _grist_Attachments were updated.
*/
public async updateUsedAttachments() {
const changes = await this.docStorage.scanAttachmentsForUsageChanges();
if (!changes.length) {
return;
return false;
}
const rowIds = changes.map(r => r.id);
const now = Date.now() / 1000;
@ -1319,6 +1322,7 @@ export class ActiveDoc extends EventEmitter {
const action: BulkUpdateRecord = ["BulkUpdateRecord", "_grist_Attachments", rowIds, {timeDeleted}];
// Don't use applyUserActions which may block the update action in delete-only mode
await this._applyUserActions(makeExceptionalDocSession('system'), [action]);
return true;
}
/**
@ -1886,6 +1890,40 @@ export class ActiveDoc extends EventEmitter {
},
});
}
/**
* Throw an error if the provided upload would exceed the total attachment filesize limit for this document.
*/
private async _checkDocAttachmentsLimit(upload: UploadInfo) {
const maxSize = this._productFeatures?.baseMaxAttachmentsBytesPerDocument;
if (!maxSize) {
// This document has no limit, nothing to check.
return;
}
// Minor flaw: while we don't double-count existing duplicate files in the total size,
// we don't check here if any of the uploaded files already exist and could be left out of the calculation.
const totalAddedSize = sum(upload.files.map(f => f.size));
// Returns true if this upload won't bring the total over the limit.
const isOK = async () => (await this.docStorage.getTotalAttachmentFileSizes()) + totalAddedSize <= maxSize;
if (await isOK()) {
return;
}
// Looks like the limit is being exceeded.
// Check if any attachments are unused and can be soft-deleted to reduce the existing total size.
// We could do this from the beginning, but updateUsedAttachments is potentially expensive,
// so this optimises the common case of not exceeding the limit.
// updateUsedAttachments returns true if there were any changes. Otherwise there's no point checking isOK again.
if (await this.updateUsedAttachments() && await isOK()) {
return;
}
// TODO probably want a nicer error message here.
throw new Error("Exceeded attachments limit for document");
}
}
// Helper to initialize a sandbox action bundle with no values.

@ -376,6 +376,18 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
}
},
async function(db: SQLiteDB): Promise<void> {
// Storage version 8.
// Migration to add an index to _grist_Attachments.fileIdent for fast joining against _gristsys_Files.ident.
const tables = await db.all(`SELECT * FROM sqlite_master WHERE type='table' AND name='_grist_Attachments'`);
if (!tables.length) {
// _grist_Attachments is created in the first Python migration.
// For the sake of migration tests on ancient documents, just skip this migration if the table doesn't exist.
return;
}
await db.exec(`CREATE INDEX _grist_Attachments_fileIdent ON _grist_Attachments(fileIdent)`);
},
]
};
@ -1228,6 +1240,31 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
});
}
/**
* Returns the total number of bytes used for storing attachments that haven't been soft-deleted.
* May be stale if ActiveDoc.updateUsedAttachments isn't called first.
*/
public async getTotalAttachmentFileSizes() {
const result = await this.get(`
SELECT SUM(len) AS total
FROM (
-- Using MAX(LENGTH()) instead of just LENGTH() is needed in the presence of GROUP BY
-- to make LENGTH() quickly read the stored length instead of actually reading the blob data.
-- We use LENGTH() in the first place instead of _grist_Attachments.fileSize because the latter can
-- be changed by users.
SELECT MAX(LENGTH(files.data)) AS len
FROM _gristsys_Files AS files
JOIN _grist_Attachments AS meta
ON meta.fileIdent = files.ident
WHERE meta.timeDeleted IS NULL -- Don't count soft-deleted attachments
-- Duplicate attachments (i.e. identical file contents) are only stored once in _gristsys_Files
-- but can be duplicated in _grist_Attachments, so the GROUP BY prevents adding up duplicated sizes.
GROUP BY meta.fileIdent
)
`);
return result!.total as number;
}
/**
* Returns an array of objects where:
* - `id` is a row ID of _grist_Attachments

Binary file not shown.
Loading…
Cancel
Save