(core) Add /attachments/removeUnused DocApi endpoint to hard delete all unused attachments in document

Summary: Adds methods to delete metadata rows based on timeDeleted. The flag expiredOnly determines if it only deletes attachments that were soft-deleted 7 days ago, or just all soft-deleted rows. Then any actual file data that doesn't have matching metadata is deleted.

Test Plan: DocApi test

Reviewers: paulfitz

Reviewed By: paulfitz

Subscribers: dsagal

Differential Revision: https://phab.getgrist.com/D3364
pull/203/head
Alex Hall 2 years ago
parent 4401ec4d79
commit 09da815c0c

@ -24,6 +24,7 @@ import {
import {ApiError} from 'app/common/ApiError';
import {mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate';
import {
BulkRemoveRecord,
BulkUpdateRecord,
CellValue,
DocAction,
@ -1320,6 +1321,20 @@ export class ActiveDoc extends EventEmitter {
await this._applyUserActions(makeExceptionalDocSession('system'), [action]);
}
/**
* Delete unused attachments from _grist_Attachments and gristsys_Files.
* @param expiredOnly: if true, only delete attachments that were soft-deleted sufficiently long ago.
*/
public async removeUnusedAttachments(expiredOnly: boolean) {
await this.updateUsedAttachments();
const rowIds = await this.docStorage.getSoftDeletedAttachmentIds(expiredOnly);
if (rowIds.length) {
const action: BulkRemoveRecord = ["BulkRemoveRecord", "_grist_Attachments", rowIds];
await this.applyUserActions(makeExceptionalDocSession('system'), [action]);
}
await this.docStorage.removeUnusedAttachments();
}
// Needed for test/server/migrations.js tests
public async testGetVersionFromDataEngine() {
return this._pyCall('get_version');

@ -56,6 +56,7 @@ import {ServerColumnGetters} from 'app/server/lib/ServerColumnGetters';
import {localeFromRequest} from "app/server/lib/ServerLocale";
import {allowedEventTypes, isUrlAllowed, WebhookAction, WebHookSecret} from "app/server/lib/Triggers";
import {handleOptionalUpload, handleUpload} from "app/server/lib/uploads";
import * as assert from 'assert';
import * as contentDisposition from 'content-disposition';
import {Application, NextFunction, Request, RequestHandler, Response} from "express";
import * as _ from "lodash";
@ -235,6 +236,18 @@ export class DocWorkerApi {
await activeDoc.updateUsedAttachments();
res.json(null);
}));
this._app.post('/api/docs/:docId/attachments/removeUnused', isOwner, withDoc(async (activeDoc, req, res) => {
const expiredOnly = isAffirmative(req.query.expiredonly);
const verifyFiles = isAffirmative(req.query.verifyfiles);
await activeDoc.removeUnusedAttachments(expiredOnly);
if (verifyFiles) {
assert.deepStrictEqual(
await activeDoc.docStorage.all(`SELECT DISTINCT fileIdent AS ident FROM _grist_Attachments ORDER BY ident`),
await activeDoc.docStorage.all(`SELECT ident FROM _gristsys_Files ORDER BY ident`),
);
}
res.json(null);
}));
// Adds records given in a column oriented format,
// returns an array of row IDs

@ -39,6 +39,11 @@ const maxSQLiteVariables = 500; // Actually could be 999, so this is playing
const PENDING_VALUE = [GristObjCode.Pending];
// Number of days that soft-deleted attachments are kept in file storage before being completely deleted.
// Once a file is deleted it can't be restored by undo, so we want it to be impossible or at least extremely unlikely
// that someone would delete a reference to an attachment and then undo that action this many days later.
export const ATTACHMENTS_EXPIRY_DAYS = 7;
export class DocStorage implements ISQLiteDB, OnDemandStorage {
// ======================================================================
@ -1267,6 +1272,41 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
return (await this.all(sql)) as any[];
}
/**
* Return row IDs of unused attachments in _grist_Attachments.
* Uses the timeDeleted column which is updated in ActiveDoc.updateUsedAttachments.
* @param expiredOnly: if true, only return attachments where timeDeleted is at least
* ATTACHMENTS_EXPIRY_DAYS days ago.
*/
public async getSoftDeletedAttachmentIds(expiredOnly: boolean): Promise<number[]> {
const condition = expiredOnly
? `datetime(timeDeleted, 'unixepoch') < datetime('now', '-${ATTACHMENTS_EXPIRY_DAYS} days')`
: "timeDeleted IS NOT NULL";
const rows = await this.all(`
SELECT id
FROM _grist_Attachments
WHERE ${condition}
`);
return rows.map(r => r.id);
}
/**
* Delete attachments from _gristsys_Files that have no matching metadata row in _grist_Attachments.
*/
public async removeUnusedAttachments() {
await this.run(`
DELETE FROM _gristsys_Files
WHERE ident IN (
SELECT ident
FROM _gristsys_Files
LEFT JOIN _grist_Attachments
ON fileIdent = ident
WHERE fileIdent IS NULL
)
`);
}
public all(sql: string, ...args: any[]): Promise<ResultRow[]> {
return this._getDB().all(sql, ...args);
}

@ -1562,7 +1562,7 @@ function testDocApi() {
let resp = await axios.post(`${docUrl}/apply`, actions, chimpy);
assert.equal(resp.status, 200);
resp = await axios.post(`${docUrl}/attachments/updateUsed`, actions, chimpy);
resp = await axios.post(`${docUrl}/attachments/updateUsed`, null, chimpy);
assert.equal(resp.status, 200);
resp = await axios.get(`${docUrl}/tables/Table1/records`, chimpy);
@ -1670,6 +1670,57 @@ function testDocApi() {
_.range(totalAttachments).map(index => ({id: index + 1, deleted: index >= totalUsedAttachments})),
);
});
it("POST /docs/{did}/attachments/removeUnused removes unused attachments", async function() {
const wid = await getWorkspaceId(userApi, 'Private');
const docId = await userApi.newDoc({name: 'TestDoc3'}, wid);
const docUrl = `${serverUrl}/api/docs/${docId}`;
const formData = new FormData();
formData.append('upload', 'foobar', "hello.doc");
formData.append('upload', '123456', "world.jpg");
formData.append('upload', 'foobar', "hello2.doc");
let resp = await axios.post(`${docUrl}/attachments`, formData,
defaultsDeep({headers: formData.getHeaders()}, chimpy));
assert.equal(resp.status, 200);
assert.deepEqual(resp.data, [1, 2, 3]);
async function checkAttachmentIds(ids: number[]) {
resp = await axios.get(
`${docUrl}/tables/_grist_Attachments/records`,
chimpy,
);
assert.equal(resp.status, 200);
assert.deepEqual(resp.data.records.map((r: any) => r.id), ids);
}
resp = await axios.patch(
`${docUrl}/tables/_grist_Attachments/records`,
{
records: [
{id: 1, fields: {timeDeleted: Date.now() / 1000 - 8 * 24 * 60 * 60}}, // 8 days ago, i.e. expired
{id: 2, fields: {timeDeleted: Date.now() / 1000 - 6 * 24 * 60 * 60}}, // 6 days ago, i.e. not expired
]
},
chimpy,
);
assert.equal(resp.status, 200);
await checkAttachmentIds([1, 2, 3]);
// Remove the expired attachment (1)
// It has a duplicate (3) that hasn't expired and thus isn't removed,
// although they share the same fileIdent and row in _gristsys_Files.
// So for now only the metadata is removed.
resp = await axios.post(`${docUrl}/attachments/removeUnused?verifyfiles=1&expiredonly=1`, null, chimpy);
assert.equal(resp.status, 200);
await checkAttachmentIds([2, 3]);
// Remove the not expired attachments (2 and 3).
// We didn't set a timeDeleted for 3, but it gets set automatically by updateUsedAttachments.
resp = await axios.post(`${docUrl}/attachments/removeUnused?verifyfiles=1`, null, chimpy);
await checkAttachmentIds([]);
});
});
it("GET /docs/{did}/download serves document", async function() {

Loading…
Cancel
Save