(core) Add /attachments/updateUsed DocApi endpoint to soft delete all unused attachments in document

Summary:
Builds on https://phab.getgrist.com/D3352

Add DocStorage.scanAttachmentsForUsageChanges to do fancy JSON query to find all attachment metadata rows whose soft deletion status needs updating.

Add ActiveDoc.updateUsedAttachments which uses the above and then applies the appropriate user action if needed to soft delete/undelete metadata rows.

Add endpoint in DocApi calling ActiveDoc method.

Test Plan: Added DocApi test

Reviewers: paulfitz

Reviewed By: paulfitz

Differential Revision: https://phab.getgrist.com/D3357
This commit is contained in:
Alex Hall
2022-04-07 14:34:50 +02:00
parent a6ba40558a
commit 64369df4c3
4 changed files with 200 additions and 2 deletions

View File

@@ -24,6 +24,7 @@ import {
import {ApiError} from 'app/common/ApiError';
import {mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate';
import {
BulkUpdateRecord,
CellValue,
DocAction,
TableDataAction,
@@ -1298,6 +1299,27 @@ export class ActiveDoc extends EventEmitter {
return sandboxActionBundle;
}
/**
* Check which attachments in the _grist_Attachments metadata are actually used,
* i.e. referenced by some cell in an Attachments type column.
* Set timeDeleted to the current time on newly unused attachments,
* 'soft deleting' them so that they get cleaned up automatically from _gristsys_Files after enough time has passed.
* Set timeDeleted to null on used attachments that were previously soft deleted,
* so that undo can 'undelete' attachments.
*/
public async updateUsedAttachments() {
const changes = await this.docStorage.scanAttachmentsForUsageChanges();
if (!changes.length) {
return;
}
const rowIds = changes.map(r => r.id);
const now = Date.now() / 1000;
const timeDeleted = changes.map(r => r.used ? null : now);
const action: BulkUpdateRecord = ["BulkUpdateRecord", "_grist_Attachments", rowIds, {timeDeleted}];
// Don't use applyUserActions which may block the update action in delete-only mode
await this._applyUserActions(makeExceptionalDocSession('system'), [action]);
}
// Needed for test/server/migrations.js tests
public async testGetVersionFromDataEngine() {
return this._pyCall('get_version');

View File

@@ -230,6 +230,12 @@ export class DocWorkerApi {
.send(fileData);
}));
// Mostly for testing
this._app.post('/api/docs/:docId/attachments/updateUsed', canEdit, withDoc(async (activeDoc, req, res) => {
await activeDoc.updateUsedAttachments();
res.json(null);
}));
// Adds records given in a column oriented format,
// returns an array of row IDs
this._app.post('/api/docs/:docId/tables/:tableId/data', canEdit,

View File

@@ -1223,6 +1223,50 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
});
}
/**
* Returns an array of objects where:
* - `id` is a row ID of _grist_Attachments
* - `used` is true if and only if `id` is in a list in a cell of type Attachments
* - The value of `timeDeleted` in this row of _grist_Attachments needs to be updated
* because its truthiness doesn't match `used`, i.e. either:
* - a used attachment is marked as deleted, OR
* - an unused attachment is not marked as deleted
*/
public async scanAttachmentsForUsageChanges(): Promise<{ used: boolean, id: number }[]> {
// Array of SQL queries where attachment_ids contains JSON arrays (typically containg row IDs).
// Below we add one query for each column of type Attachments in the document.
// We always include this first dummy query because if the array is empty then the final SQL query
// will just have `()` causing a syntax error.
// We can't just return when there are no Attachments columns
// because we may still need to delete all remaining attachments.
const attachmentsQueries = ["SELECT '[0]' AS attachment_ids"];
for (const [tableId, cols] of Object.entries(this._docSchema)) {
for (const [colId, type] of Object.entries(cols)) {
if (type === "Attachments") {
attachmentsQueries.push(`
SELECT t.${quoteIdent(colId)} AS attachment_ids
FROM ${quoteIdent(tableId)} AS t
WHERE json_valid(attachment_ids)
`);
}
}
}
// `UNION ALL` instead of `UNION` because duplicate values are unlikely and deduplicating is not worth the cost
const allAttachmentsQuery = attachmentsQueries.join(' UNION ALL ');
const sql = `
WITH all_attachment_ids(id) AS (
SELECT json_each.value AS id
FROM json_each(attachment_ids), (${allAttachmentsQuery})
) -- flatten out all the lists of IDs into a simple column of IDs
SELECT id, id IN all_attachment_ids AS used
FROM _grist_Attachments
WHERE used != (timeDeleted IS NULL); -- only include rows that need updating
`;
return (await this.all(sql)) as any[];
}
public all(sql: string, ...args: any[]): Promise<ResultRow[]> {
return this._getDB().all(sql, ...args);
}