(core) Add /attachments/removeUnused DocApi endpoint to hard delete all unused attachments in document

Summary: Adds methods to delete metadata rows based on timeDeleted. The flag expiredOnly determines if it only deletes attachments that were soft-deleted 7 days ago, or just all soft-deleted rows. Then any actual file data that doesn't have matching metadata is deleted.

Test Plan: DocApi test

Reviewers: paulfitz

Reviewed By: paulfitz

Subscribers: dsagal

Differential Revision: https://phab.getgrist.com/D3364
This commit is contained in:
Alex Hall
2022-04-12 16:33:48 +02:00
parent 4401ec4d79
commit 09da815c0c
4 changed files with 120 additions and 1 deletions

View File

@@ -24,6 +24,7 @@ import {
import {ApiError} from 'app/common/ApiError';
import {mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate';
import {
BulkRemoveRecord,
BulkUpdateRecord,
CellValue,
DocAction,
@@ -1320,6 +1321,20 @@ export class ActiveDoc extends EventEmitter {
await this._applyUserActions(makeExceptionalDocSession('system'), [action]);
}
/**
* Delete unused attachments from _grist_Attachments and gristsys_Files.
* @param expiredOnly: if true, only delete attachments that were soft-deleted sufficiently long ago.
*/
public async removeUnusedAttachments(expiredOnly: boolean) {
await this.updateUsedAttachments();
const rowIds = await this.docStorage.getSoftDeletedAttachmentIds(expiredOnly);
if (rowIds.length) {
const action: BulkRemoveRecord = ["BulkRemoveRecord", "_grist_Attachments", rowIds];
await this.applyUserActions(makeExceptionalDocSession('system'), [action]);
}
await this.docStorage.removeUnusedAttachments();
}
// Needed for test/server/migrations.js tests
public async testGetVersionFromDataEngine() {
return this._pyCall('get_version');

View File

@@ -56,6 +56,7 @@ import {ServerColumnGetters} from 'app/server/lib/ServerColumnGetters';
import {localeFromRequest} from "app/server/lib/ServerLocale";
import {allowedEventTypes, isUrlAllowed, WebhookAction, WebHookSecret} from "app/server/lib/Triggers";
import {handleOptionalUpload, handleUpload} from "app/server/lib/uploads";
import * as assert from 'assert';
import * as contentDisposition from 'content-disposition';
import {Application, NextFunction, Request, RequestHandler, Response} from "express";
import * as _ from "lodash";
@@ -235,6 +236,18 @@ export class DocWorkerApi {
await activeDoc.updateUsedAttachments();
res.json(null);
}));
this._app.post('/api/docs/:docId/attachments/removeUnused', isOwner, withDoc(async (activeDoc, req, res) => {
const expiredOnly = isAffirmative(req.query.expiredonly);
const verifyFiles = isAffirmative(req.query.verifyfiles);
await activeDoc.removeUnusedAttachments(expiredOnly);
if (verifyFiles) {
assert.deepStrictEqual(
await activeDoc.docStorage.all(`SELECT DISTINCT fileIdent AS ident FROM _grist_Attachments ORDER BY ident`),
await activeDoc.docStorage.all(`SELECT ident FROM _gristsys_Files ORDER BY ident`),
);
}
res.json(null);
}));
// Adds records given in a column oriented format,
// returns an array of row IDs

View File

@@ -39,6 +39,11 @@ const maxSQLiteVariables = 500; // Actually could be 999, so this is playing
const PENDING_VALUE = [GristObjCode.Pending];
// Number of days that soft-deleted attachments are kept in file storage before being completely deleted.
// Once a file is deleted it can't be restored by undo, so we want it to be impossible or at least extremely unlikely
// that someone would delete a reference to an attachment and then undo that action this many days later.
export const ATTACHMENTS_EXPIRY_DAYS = 7;
export class DocStorage implements ISQLiteDB, OnDemandStorage {
// ======================================================================
@@ -1267,6 +1272,41 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
return (await this.all(sql)) as any[];
}
/**
* Return row IDs of unused attachments in _grist_Attachments.
* Uses the timeDeleted column which is updated in ActiveDoc.updateUsedAttachments.
* @param expiredOnly: if true, only return attachments where timeDeleted is at least
* ATTACHMENTS_EXPIRY_DAYS days ago.
*/
public async getSoftDeletedAttachmentIds(expiredOnly: boolean): Promise<number[]> {
const condition = expiredOnly
? `datetime(timeDeleted, 'unixepoch') < datetime('now', '-${ATTACHMENTS_EXPIRY_DAYS} days')`
: "timeDeleted IS NOT NULL";
const rows = await this.all(`
SELECT id
FROM _grist_Attachments
WHERE ${condition}
`);
return rows.map(r => r.id);
}
/**
* Delete attachments from _gristsys_Files that have no matching metadata row in _grist_Attachments.
*/
public async removeUnusedAttachments() {
await this.run(`
DELETE FROM _gristsys_Files
WHERE ident IN (
SELECT ident
FROM _gristsys_Files
LEFT JOIN _grist_Attachments
ON fileIdent = ident
WHERE fileIdent IS NULL
)
`);
}
public all(sql: string, ...args: any[]): Promise<ResultRow[]> {
return this._getDB().all(sql, ...args);
}