From 09da815c0c7fdd78bc72dbb5a8514a2c1d3d76bd Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Tue, 12 Apr 2022 16:33:48 +0200 Subject: [PATCH] (core) Add /attachments/removeUnused DocApi endpoint to hard delete all unused attachments in document Summary: Adds methods to delete metadata rows based on timeDeleted. The flag expiredOnly determines if it only deletes attachments that were soft-deleted 7 days ago, or just all soft-deleted rows. Then any actual file data that doesn't have matching metadata is deleted. Test Plan: DocApi test Reviewers: paulfitz Reviewed By: paulfitz Subscribers: dsagal Differential Revision: https://phab.getgrist.com/D3364 --- app/server/lib/ActiveDoc.ts | 15 ++++++++++ app/server/lib/DocApi.ts | 13 +++++++++ app/server/lib/DocStorage.ts | 40 +++++++++++++++++++++++++++ test/server/lib/DocApi.ts | 53 +++++++++++++++++++++++++++++++++++- 4 files changed, 120 insertions(+), 1 deletion(-) diff --git a/app/server/lib/ActiveDoc.ts b/app/server/lib/ActiveDoc.ts index 312e8011..fd4fcdd5 100644 --- a/app/server/lib/ActiveDoc.ts +++ b/app/server/lib/ActiveDoc.ts @@ -24,6 +24,7 @@ import { import {ApiError} from 'app/common/ApiError'; import {mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate'; import { + BulkRemoveRecord, BulkUpdateRecord, CellValue, DocAction, @@ -1320,6 +1321,20 @@ export class ActiveDoc extends EventEmitter { await this._applyUserActions(makeExceptionalDocSession('system'), [action]); } + /** + * Delete unused attachments from _grist_Attachments and gristsys_Files. + * @param expiredOnly: if true, only delete attachments that were soft-deleted sufficiently long ago. + */ + public async removeUnusedAttachments(expiredOnly: boolean) { + await this.updateUsedAttachments(); + const rowIds = await this.docStorage.getSoftDeletedAttachmentIds(expiredOnly); + if (rowIds.length) { + const action: BulkRemoveRecord = ["BulkRemoveRecord", "_grist_Attachments", rowIds]; + await this.applyUserActions(makeExceptionalDocSession('system'), [action]); + } + await this.docStorage.removeUnusedAttachments(); + } + // Needed for test/server/migrations.js tests public async testGetVersionFromDataEngine() { return this._pyCall('get_version'); diff --git a/app/server/lib/DocApi.ts b/app/server/lib/DocApi.ts index 33786a3a..9347b7fd 100644 --- a/app/server/lib/DocApi.ts +++ b/app/server/lib/DocApi.ts @@ -56,6 +56,7 @@ import {ServerColumnGetters} from 'app/server/lib/ServerColumnGetters'; import {localeFromRequest} from "app/server/lib/ServerLocale"; import {allowedEventTypes, isUrlAllowed, WebhookAction, WebHookSecret} from "app/server/lib/Triggers"; import {handleOptionalUpload, handleUpload} from "app/server/lib/uploads"; +import * as assert from 'assert'; import * as contentDisposition from 'content-disposition'; import {Application, NextFunction, Request, RequestHandler, Response} from "express"; import * as _ from "lodash"; @@ -235,6 +236,18 @@ export class DocWorkerApi { await activeDoc.updateUsedAttachments(); res.json(null); })); + this._app.post('/api/docs/:docId/attachments/removeUnused', isOwner, withDoc(async (activeDoc, req, res) => { + const expiredOnly = isAffirmative(req.query.expiredonly); + const verifyFiles = isAffirmative(req.query.verifyfiles); + await activeDoc.removeUnusedAttachments(expiredOnly); + if (verifyFiles) { + assert.deepStrictEqual( + await activeDoc.docStorage.all(`SELECT DISTINCT fileIdent AS ident FROM _grist_Attachments ORDER BY ident`), + await activeDoc.docStorage.all(`SELECT ident FROM _gristsys_Files ORDER BY ident`), + ); + } + res.json(null); + })); // Adds records given in a column oriented format, // returns an array of row IDs diff --git a/app/server/lib/DocStorage.ts b/app/server/lib/DocStorage.ts index 61b4a4b6..fbadee97 100644 --- a/app/server/lib/DocStorage.ts +++ b/app/server/lib/DocStorage.ts @@ -39,6 +39,11 @@ const maxSQLiteVariables = 500; // Actually could be 999, so this is playing const PENDING_VALUE = [GristObjCode.Pending]; +// Number of days that soft-deleted attachments are kept in file storage before being completely deleted. +// Once a file is deleted it can't be restored by undo, so we want it to be impossible or at least extremely unlikely +// that someone would delete a reference to an attachment and then undo that action this many days later. +export const ATTACHMENTS_EXPIRY_DAYS = 7; + export class DocStorage implements ISQLiteDB, OnDemandStorage { // ====================================================================== @@ -1267,6 +1272,41 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage { return (await this.all(sql)) as any[]; } + /** + * Return row IDs of unused attachments in _grist_Attachments. + * Uses the timeDeleted column which is updated in ActiveDoc.updateUsedAttachments. + * @param expiredOnly: if true, only return attachments where timeDeleted is at least + * ATTACHMENTS_EXPIRY_DAYS days ago. + */ + public async getSoftDeletedAttachmentIds(expiredOnly: boolean): Promise { + const condition = expiredOnly + ? `datetime(timeDeleted, 'unixepoch') < datetime('now', '-${ATTACHMENTS_EXPIRY_DAYS} days')` + : "timeDeleted IS NOT NULL"; + + const rows = await this.all(` + SELECT id + FROM _grist_Attachments + WHERE ${condition} + `); + return rows.map(r => r.id); + } + + /** + * Delete attachments from _gristsys_Files that have no matching metadata row in _grist_Attachments. + */ + public async removeUnusedAttachments() { + await this.run(` + DELETE FROM _gristsys_Files + WHERE ident IN ( + SELECT ident + FROM _gristsys_Files + LEFT JOIN _grist_Attachments + ON fileIdent = ident + WHERE fileIdent IS NULL + ) + `); + } + public all(sql: string, ...args: any[]): Promise { return this._getDB().all(sql, ...args); } diff --git a/test/server/lib/DocApi.ts b/test/server/lib/DocApi.ts index 422306f5..4b2c4432 100644 --- a/test/server/lib/DocApi.ts +++ b/test/server/lib/DocApi.ts @@ -1562,7 +1562,7 @@ function testDocApi() { let resp = await axios.post(`${docUrl}/apply`, actions, chimpy); assert.equal(resp.status, 200); - resp = await axios.post(`${docUrl}/attachments/updateUsed`, actions, chimpy); + resp = await axios.post(`${docUrl}/attachments/updateUsed`, null, chimpy); assert.equal(resp.status, 200); resp = await axios.get(`${docUrl}/tables/Table1/records`, chimpy); @@ -1670,6 +1670,57 @@ function testDocApi() { _.range(totalAttachments).map(index => ({id: index + 1, deleted: index >= totalUsedAttachments})), ); }); + + it("POST /docs/{did}/attachments/removeUnused removes unused attachments", async function() { + const wid = await getWorkspaceId(userApi, 'Private'); + const docId = await userApi.newDoc({name: 'TestDoc3'}, wid); + const docUrl = `${serverUrl}/api/docs/${docId}`; + + const formData = new FormData(); + formData.append('upload', 'foobar', "hello.doc"); + formData.append('upload', '123456', "world.jpg"); + formData.append('upload', 'foobar', "hello2.doc"); + let resp = await axios.post(`${docUrl}/attachments`, formData, + defaultsDeep({headers: formData.getHeaders()}, chimpy)); + assert.equal(resp.status, 200); + assert.deepEqual(resp.data, [1, 2, 3]); + + async function checkAttachmentIds(ids: number[]) { + resp = await axios.get( + `${docUrl}/tables/_grist_Attachments/records`, + chimpy, + ); + assert.equal(resp.status, 200); + assert.deepEqual(resp.data.records.map((r: any) => r.id), ids); + } + + resp = await axios.patch( + `${docUrl}/tables/_grist_Attachments/records`, + { + records: [ + {id: 1, fields: {timeDeleted: Date.now() / 1000 - 8 * 24 * 60 * 60}}, // 8 days ago, i.e. expired + {id: 2, fields: {timeDeleted: Date.now() / 1000 - 6 * 24 * 60 * 60}}, // 6 days ago, i.e. not expired + ] + }, + chimpy, + ); + assert.equal(resp.status, 200); + await checkAttachmentIds([1, 2, 3]); + + // Remove the expired attachment (1) + // It has a duplicate (3) that hasn't expired and thus isn't removed, + // although they share the same fileIdent and row in _gristsys_Files. + // So for now only the metadata is removed. + resp = await axios.post(`${docUrl}/attachments/removeUnused?verifyfiles=1&expiredonly=1`, null, chimpy); + assert.equal(resp.status, 200); + await checkAttachmentIds([2, 3]); + + // Remove the not expired attachments (2 and 3). + // We didn't set a timeDeleted for 3, but it gets set automatically by updateUsedAttachments. + resp = await axios.post(`${docUrl}/attachments/removeUnused?verifyfiles=1`, null, chimpy); + await checkAttachmentIds([]); + }); + }); it("GET /docs/{did}/download serves document", async function() {