gristlabs_grist-core/app/server/lib/ActiveDoc.ts
Alex Hall 276adc5f51 (core) Starting to make webhooks more robust
Summary:
- Puts events on a queue in memory and ensures they are sent in the order they were generated.
- Makes the caller (Sharing.ts) wait until changed records have been fetched from the DB, but allows it to continue after while remaining work happens asynchronously.
- Gathers all new webhook events into an array so they can be backed up to the queue on redis in a single command (in a future diff).
- Uses changes in isReady to determine event type, no more 'existed before'

The structure of the code has changed a lot, so I think the scope of the diff needs to stop here. Lots of work is still deferred in TODOs.

Test Plan: Updated existing test. Actually dropped testing of retry on failures and slowness because it no longer made sense to keep that as part of the current test, so a new test for that will be added in a future diff.

Reviewers: paulfitz

Reviewed By: paulfitz

Differential Revision: https://phab.getgrist.com/D3074
2021-10-15 20:01:15 +02:00

1735 lines
76 KiB
TypeScript

/**
* Module to manage "active" Grist documents, i.e. those loaded in-memory, with
* clients connected to them. It handles the incoming user actions, and outgoing
* change events.
*/
import { ActionSummary } from "app/common/ActionSummary";
import { DocTriggers } from "app/server/lib/Triggers";
import * as assert from 'assert';
import {Mutex} from 'async-mutex';
import * as bluebird from 'bluebird';
import {EventEmitter} from 'events';
import {IMessage, MsgType} from 'grain-rpc';
import * as imageSize from 'image-size';
import * as moment from 'moment-timezone';
import fetch from 'node-fetch';
import * as tmp from 'tmp';
import {getEnvContent, LocalActionBundle, SandboxActionBundle, UserActionBundle} from 'app/common/ActionBundle';
import { ActionGroup, MinimalActionGroup} from 'app/common/ActionGroup';
import {
ApplyUAOptions,
ApplyUAResult,
DataSourceTransformed,
ForkResult,
ImportOptions,
ImportResult,
MergeOptions,
PermissionDataWithExtraUsers,
QueryResult,
ServerQuery,
TransformRule
} from 'app/common/ActiveDocAPI';
import {ApiError} from 'app/common/ApiError';
import {mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate';
import {
CellValue,
DocAction,
RowRecord,
TableDataAction,
TableRecordValue,
toTableDataAction,
UserAction
} from 'app/common/DocActions';
import {DocData} from 'app/common/DocData';
import {DocSnapshots} from 'app/common/DocSnapshot';
import {DocumentSettings} from 'app/common/DocumentSettings';
import {normalizeEmail} from 'app/common/emails';
import {FormulaProperties, getFormulaProperties} from 'app/common/GranularAccessClause';
import {byteString, countIf, safeJsonParse} from 'app/common/gutil';
import {InactivityTimer} from 'app/common/InactivityTimer';
import {schema, SCHEMA_VERSION} from 'app/common/schema';
import {FetchUrlOptions, UploadResult} from 'app/common/uploads';
import {DocReplacementOptions, DocState, DocStateComparison} from 'app/common/UserAPI';
import {ParseOptions} from 'app/plugin/FileParserAPI';
import {GristDocAPI} from 'app/plugin/GristAPI';
import {compileAclFormula} from 'app/server/lib/ACLFormula';
import {Authorizer} from 'app/server/lib/Authorizer';
import {checksumFile} from 'app/server/lib/checksumFile';
import {Client} from 'app/server/lib/Client';
import {DEFAULT_CACHE_TTL, DocManager} from 'app/server/lib/DocManager';
import {makeForkIds} from 'app/server/lib/idUtils';
import {GRIST_DOC_SQL, GRIST_DOC_WITH_TABLE1_SQL} from 'app/server/lib/initialDocSql';
import {ISandbox} from 'app/server/lib/ISandbox';
import * as log from 'app/server/lib/log';
import {shortDesc} from 'app/server/lib/shortDesc';
import {TableMetadataLoader} from 'app/server/lib/TableMetadataLoader';
import {fetchURL, FileUploadInfo, globalUploadSet, UploadInfo} from 'app/server/lib/uploads';
import {ActionHistory} from './ActionHistory';
import {ActionHistoryImpl} from './ActionHistoryImpl';
import {ActiveDocImport} from './ActiveDocImport';
import {DocClients} from './DocClients';
import {DocPluginManager} from './DocPluginManager';
import {
DocSession,
getDocSessionUser,
getDocSessionUserId,
makeExceptionalDocSession,
OptDocSession
} from './DocSession';
import {DocStorage} from './DocStorage';
import {expandQuery} from './ExpandedQuery';
import {GranularAccess, GranularAccessForBundle} from './GranularAccess';
import {OnDemandActions} from './OnDemandActions';
import {getLogMetaFromDocSession, supportsEngineChoices, timeoutReached} from './serverUtils';
import {findOrAddAllEnvelope, Sharing} from './Sharing';
import cloneDeep = require('lodash/cloneDeep');
import flatten = require('lodash/flatten');
import remove = require('lodash/remove');
import zipObject = require('lodash/zipObject');
import without = require('lodash/without');
bluebird.promisifyAll(tmp);
const MAX_RECENT_ACTIONS = 100;
const DEFAULT_TIMEZONE = (process.versions as any).electron ? moment.tz.guess() : "UTC";
const DEFAULT_LOCALE = "en-US";
// Number of seconds an ActiveDoc is retained without any clients.
// In dev environment, it is convenient to keep this low for quick tests.
// In production, it is reasonable to stretch it out a bit.
const ACTIVEDOC_TIMEOUT = (process.env.NODE_ENV === 'production') ? 30 : 5;
// We'll wait this long between re-measuring sandbox memory.
const MEMORY_MEASUREMENT_INTERVAL_MS = 60 * 1000;
// A hook for dependency injection.
export const Deps = {ACTIVEDOC_TIMEOUT};
/**
* Represents an active document with the given name. The document isn't actually open until
* either .loadDoc() or .createEmptyDoc() is called.
* @param {String} docName - The document's filename, without the '.grist' extension.
*/
export class ActiveDoc extends EventEmitter {
/**
* Decorator for ActiveDoc methods that prevents shutdown while the method is running, i.e.
* until the returned promise is resolved.
*/
public static keepDocOpen(target: ActiveDoc, propertyKey: string, descriptor: PropertyDescriptor) {
const origFunc = descriptor.value;
descriptor.value = function(this: ActiveDoc) {
return this._inactivityTimer.disableUntilFinish(origFunc.apply(this, arguments));
};
}
public readonly docStorage: DocStorage;
public readonly docPluginManager: DocPluginManager;
public readonly docClients: DocClients; // Only exposed for Sharing.ts
public docData: DocData|null = null;
protected _actionHistory: ActionHistory;
protected _docManager: DocManager;
protected _docName: string;
protected _sharing: Sharing;
// This lock is used to avoid reading sandbox state while it is being modified but before
// the result has been confirmed to pass granular access rules (which may depend on the
// result).
protected _modificationLock: Mutex = new Mutex();
private _triggers: DocTriggers;
private _dataEngine: Promise<ISandbox>|undefined;
private _activeDocImport: ActiveDocImport;
private _onDemandActions: OnDemandActions;
private _granularAccess: GranularAccess;
private _tableMetadataLoader: TableMetadataLoader;
private _muted: boolean = false; // If set, changes to this document should not propagate
// to outside world
private _migrating: number = 0; // If positive, a migration is in progress
private _initializationPromise: Promise<boolean>|null = null;
// If set, wait on this to be sure the ActiveDoc is fully
// initialized. True on success.
private _fullyLoaded: boolean = false; // Becomes true once all columns are loaded/computed.
private _lastMemoryMeasurement: number = 0; // Timestamp when memory was last measured.
private _fetchCache = new MapWithTTL<string, Promise<TableDataAction>>(DEFAULT_CACHE_TTL);
// Timer for shutting down the ActiveDoc a bit after all clients are gone.
private _inactivityTimer = new InactivityTimer(() => this.shutdown(), Deps.ACTIVEDOC_TIMEOUT * 1000);
private _recoveryMode: boolean = false;
private _shuttingDown: boolean = false;
constructor(docManager: DocManager, docName: string, private _options?: {
safeMode?: boolean,
docUrl?: string
}) {
super();
if (_options?.safeMode) { this._recoveryMode = true; }
this._docManager = docManager;
this._docName = docName;
this.docStorage = new DocStorage(docManager.storageManager, docName);
this.docClients = new DocClients(this);
this._triggers = new DocTriggers(this);
this._actionHistory = new ActionHistoryImpl(this.docStorage);
this.docPluginManager = new DocPluginManager(docManager.pluginManager.getPlugins(),
docManager.pluginManager.appRoot!, this, this._docManager.gristServer);
this._tableMetadataLoader = new TableMetadataLoader({
decodeBuffer: this.docStorage.decodeMarshalledData.bind(this.docStorage),
fetchTable: this.docStorage.fetchTable.bind(this.docStorage),
loadMetaTables: this._rawPyCall.bind(this, 'load_meta_tables'),
loadTable: this._rawPyCall.bind(this, 'load_table'),
});
// Our DataEngine is a separate sandboxed process (one per open document). The data engine runs
// user-defined python code including formula calculations. It maintains all document data and
// metadata, and applies translates higher-level UserActions into lower-level DocActions.
// Creation of the data engine currently needs to be deferred if we support a choice of
// engines, since we need to look at the document to see what kind of engine it needs.
// This doesn't delay loading the document, but does delay first calculation and modification.
// So if we don't offer a choice, go ahead and start creating the data engine now, so it
// is created in parallel to fetching the document from external storage (if needed).
// TODO: cache engine requirement for doc in home db so we can retain this parallelism
// when offering a choice of data engines.
if (!supportsEngineChoices()) {
this._getEngine().catch(e => this.logError({client: null}, `engine for ${docName} failed to launch: ${e}`));
}
this._activeDocImport = new ActiveDocImport(this);
// Schedule shutdown immediately. If a client connects soon (normal case), it will get
// unscheduled. If not (e.g. abandoned import, network problems after creating a doc), then
// the ActiveDoc will get cleaned up.
this._inactivityTimer.enable();
}
public get docName(): string { return this._docName; }
public get recoveryMode(): boolean { return this._recoveryMode; }
public get isShuttingDown(): boolean { return this._shuttingDown; }
public async getUserOverride(docSession: OptDocSession) {
return this._granularAccess.getUserOverride(docSession);
}
// Helpers to log a message along with metadata about the request.
public logDebug(s: OptDocSession, msg: string, ...args: any[]) { this._log('debug', s, msg, ...args); }
public logInfo(s: OptDocSession, msg: string, ...args: any[]) { this._log('info', s, msg, ...args); }
public logWarn(s: OptDocSession, msg: string, ...args: any[]) { this._log('warn', s, msg, ...args); }
public logError(s: OptDocSession, msg: string, ...args: any[]) { this._log('error', s, msg, ...args); }
// Constructs metadata for logging, given a Client or an OptDocSession.
public getLogMeta(docSession: OptDocSession, docMethod?: string): log.ILogMeta {
return {
...getLogMetaFromDocSession(docSession),
docId: this._docName,
...(docMethod ? {docMethod} : {}),
};
}
public setMuted() {
this._muted = true;
}
public get muted() {
return this._muted;
}
public isMigrating() {
return this._migrating;
}
// Note that this method is only used in tests, and should be avoided in production (see note
// in ActionHistory about getRecentActions).
public getRecentActionsDirect(maxActions?: number): Promise<LocalActionBundle[]> {
return this._actionHistory.getRecentActions(maxActions);
}
public async getRecentStates(docSession: OptDocSession, maxStates?: number): Promise<DocState[]> {
// Doc states currently don't include user content, so it seems ok to let all
// viewers have access to them.
return this._actionHistory.getRecentStates(maxStates);
}
/**
* Access specific actions identified by actionNum.
* TODO: for memory reasons on large docs, would be best not to hold many actions
* in memory at a time, so should e.g. fetch them one at a time.
*/
public getActions(actionNums: number[]): Promise<Array<LocalActionBundle|undefined>> {
return this._actionHistory.getActions(actionNums);
}
/**
* Get the most recent actions from the history. Results are ordered by
* earliest actions first, later actions later. If `summarize` is set,
* action summaries are computed and included.
*/
public async getRecentActions(docSession: OptDocSession, summarize: boolean): Promise<ActionGroup[]> {
const groups = await this._actionHistory.getRecentActionGroups(MAX_RECENT_ACTIONS,
{clientId: docSession.client?.clientId, summarize});
const permittedGroups: ActionGroup[] = [];
// Process groups serially since the work is synchronous except for some
// possible db accesses that will be serialized in any case.
for (const group of groups) {
if (await this._granularAccess.allowActionGroup(docSession, group)) {
permittedGroups.push(group);
}
}
return permittedGroups;
}
public async getRecentMinimalActions(docSession: OptDocSession): Promise<MinimalActionGroup[]> {
return this._actionHistory.getRecentMinimalActionGroups(
MAX_RECENT_ACTIONS, docSession.client?.clientId);
}
/** expose action history for tests */
public getActionHistory(): ActionHistory {
return this._actionHistory;
}
public handleTriggers(summary: ActionSummary): Promise<void> {
return this._triggers.handle(summary);
}
/**
* Adds a client of this doc to the list of connected clients.
* @param client: The client object maintaining the websocket connection.
* @param authorizer: The authorizer for the client/doc combination.
* @returns docSession
*/
public addClient(client: Client, authorizer: Authorizer): DocSession {
const docSession: DocSession = this.docClients.addClient(client, authorizer);
// If we had a shutdown scheduled, unschedule it.
if (this._inactivityTimer.isEnabled()) {
this.logInfo(docSession, "will stay open");
this._inactivityTimer.disable();
}
return docSession;
}
/**
* Shut down the ActiveDoc, and (by default) remove it from the docManager.
* @returns {Promise} Promise for when database and data engine are done shutting down.
*/
public async shutdown(removeThisActiveDoc: boolean = true): Promise<void> {
const docSession = makeExceptionalDocSession('system');
this.logDebug(docSession, "shutdown starting");
this._inactivityTimer.disable();
if (this.docClients.clientCount() > 0) {
this.logWarn(docSession, `Doc being closed with ${this.docClients.clientCount()} clients left`);
await this.docClients.broadcastDocMessage(null, 'docShutdown', null);
this.docClients.removeAllClients();
}
this._triggers.shutdown();
// Clear the MapWithTTL to remove all timers from the event loop.
this._fetchCache.clear();
if (removeThisActiveDoc) { await this._docManager.removeActiveDoc(this); }
try {
await this._docManager.storageManager.closeDocument(this.docName);
} catch (err) {
log.error('Problem shutting down document: %s %s', this.docName, err.message);
}
try {
const dataEngine = this._dataEngine ? await this._getEngine() : null;
this._shuttingDown = true; // Block creation of engine if not yet in existence.
if (dataEngine) {
// Give a small grace period for finishing initialization if we are being shut
// down while initialization is still in progress, and we don't have an easy
// way yet to cancel it cleanly. This is mainly for the benefit of automated
// tests.
await timeoutReached(3000, this.waitForInitialization());
}
await Promise.all([
this.docStorage.shutdown(),
this.docPluginManager.shutdown(),
dataEngine?.shutdown()
]);
// The this.waitForInitialization promise may not yet have resolved, but
// should do so quickly now we've killed everything it depends on.
try {
await this.waitForInitialization();
} catch (err) {
// Initialization errors do not matter at this point.
}
this.logDebug(docSession, "shutdown complete");
} catch (err) {
this.logError(docSession, "failed to shutdown some resources", err);
}
}
/**
* Create a new blank document (no "Table1") using the data engine. This is used only
* to generate the SQL saved to initialDocSql.ts
*/
@ActiveDoc.keepDocOpen
public async createEmptyDocWithDataEngine(docSession: OptDocSession): Promise<ActiveDoc> {
this.logDebug(docSession, "createEmptyDocWithDataEngine");
await this._docManager.storageManager.prepareToCreateDoc(this.docName);
await this.docStorage.createFile();
await this._rawPyCall('load_empty');
const timezone = docSession.browserSettings?.timezone ?? DEFAULT_TIMEZONE;
const locale = docSession.browserSettings?.locale ?? DEFAULT_LOCALE;
// This init action is special. It creates schema tables, and is used to init the DB, but does
// not go through other steps of a regular action (no ActionHistory or broadcasting).
const initBundle = await this._rawPyCall('apply_user_actions', [["InitNewDoc", timezone, locale]]);
await this.docStorage.execTransaction(() =>
this.docStorage.applyStoredActions(getEnvContent(initBundle.stored)));
await this._initDoc(docSession);
await this._tableMetadataLoader.clean();
// Makes sure docPluginManager is ready in case new doc is used to import new data
await this.docPluginManager.ready;
this._fullyLoaded = true;
return this;
}
/**
* Create a new blank document (no "Table1"), used as a stub when importing.
*/
@ActiveDoc.keepDocOpen
public async createEmptyDoc(docSession: OptDocSession): Promise<ActiveDoc> {
await this.loadDoc(docSession, {forceNew: true, skipInitialTable: true});
// Makes sure docPluginManager is ready in case new doc is used to import new data
await this.docPluginManager.ready;
this._fullyLoaded = true;
return this;
}
/**
* Loads an existing document from storage, fetching all data from the database via DocStorage and
* loading it into the DataEngine. User tables are not immediately loaded (see use of
* this.waitForInitialization throughout this class to wait for that).
* @returns {Promise} Promise for this ActiveDoc itself.
*/
@ActiveDoc.keepDocOpen
public async loadDoc(docSession: OptDocSession, options?: {
forceNew?: boolean, // If set, document will be created.
skipInitialTable?: boolean, // If set, and document is new, "Table1" will not be added.
}): Promise<ActiveDoc> {
const startTime = Date.now();
this.logDebug(docSession, "loadDoc");
try {
const isNew: boolean = options?.forceNew || await this._docManager.storageManager.prepareLocalDoc(this.docName);
if (isNew) {
await this._createDocFile(docSession, {skipInitialTable: options?.skipInitialTable});
} else {
await this.docStorage.openFile({
beforeMigration: async (currentVersion, newVersion) => {
return this._beforeMigration(docSession, 'storage', currentVersion, newVersion);
},
afterMigration: async (newVersion, success) => {
return this._afterMigration(docSession, 'storage', newVersion, success);
},
});
}
const tableNames = await this._loadOpenDoc(docSession);
const desiredTableNames = tableNames.filter(name => name.startsWith('_grist_'));
this._startLoadingTables(docSession, desiredTableNames);
const pendingTableNames = tableNames.filter(name => !name.startsWith('_grist_'));
await this._initDoc(docSession);
this._initializationPromise = this._finishInitialization(docSession, pendingTableNames, startTime);
} catch (err) {
await this.shutdown();
throw err;
}
return this;
}
/**
* Replace this document with another, in-place so its id and other metadata does not change.
* This operation will leave the ActiveDoc it is called for unusable. It will mute it,
* shut it down, and unlist it via the DocManager. A fresh ActiveDoc can be acquired via the
* DocManager.
*/
public async replace(source: DocReplacementOptions) {
// During replacement, it is important for all hands to be off the document. So:
// - We set the "mute" flag. Setting this means that any operations in progress
// using this ActiveDoc should be ineffective (apart from the replacement).
// In other words, the operations shouldn't ultimately result in any changes in S3,
// and any related requests should result in a failure or be retried. TODO:
// review how well we do on meeting this goal.
// - We close the ActiveDoc, retaining its listing in DocManager but shutting down
// all its component parts. We retain it in DocManager to delay another
// ActiveDoc being opened for the same document if someone is trying to operate
// on it.
// - We replace the document.
// - We remove the ActiveDoc from DocManager, opening the way for the document to be
// freshly opened.
// The "mute" flag is borrowed from worker shutdown. Note this scenario is a little
// different, since the worker is not withdrawing from service, so fresh work may get
// assigned to it at any time.
this.setMuted();
this.docClients.interruptAllClients();
try {
await this.shutdown(false);
await this._docManager.storageManager.replace(this.docName, source);
} finally {
// Whatever happened, success or failure, there is nothing further we can do
// with this ActiveDoc. Unlist it.
await this._docManager.removeActiveDoc(this);
}
}
/**
* Finish initializing ActiveDoc, by initializing ActionHistory, Sharing, and docData.
*/
public async _initDoc(docSession: OptDocSession): Promise<void> {
const metaTableData = await this._tableMetadataLoader.fetchTablesAsActions();
this.docData = new DocData(tableId => this.fetchTable(makeExceptionalDocSession('system'), tableId), metaTableData);
this._onDemandActions = new OnDemandActions(this.docStorage, this.docData);
await this._actionHistory.initialize();
this._granularAccess = new GranularAccess(this.docData, this.docClients, (query) => {
return this._fetchQueryFromDB(query, false);
}, this.recoveryMode, this.getHomeDbManager(), this.docName);
await this._granularAccess.update();
this._sharing = new Sharing(this, this._actionHistory, this._modificationLock);
// Make sure there is at least one item in action history. The document will be perfectly
// functional without it, but comparing documents would need updating if history can
// be empty. For example, comparing an empty document immediately forked with the
// original would fail. So far, we have treated documents without a common history
// as incomparible, and we'd need to weaken that to allow comparisons with a document
// with nothing in action history.
if (this._actionHistory.getNextLocalActionNum() === 1) {
await this._actionHistory.recordNextShared({
userActions: [],
undo: [],
info: [0, this._makeInfo(makeExceptionalDocSession('system'))],
actionNum: 1,
actionHash: null, // set by ActionHistory
parentActionHash: null,
stored: [],
calc: [],
envelopes: [],
});
}
}
public getHomeDbManager() {
return this._docManager.getHomeDbManager();
}
/**
* Adds a small table to start off a newly-created blank document.
*/
public addInitialTable(docSession: OptDocSession) {
// Use a non-client-specific session, so that this action is not part of anyone's undo history.
const newDocSession = makeExceptionalDocSession('nascent');
return this.applyUserActions(newDocSession, [["AddEmptyTable"]]);
}
/**
* Imports files, removes previously created temporary hidden tables and creates the new ones.
* Param `prevTableIds` is an array of hiddenTableIds as received from previous `importFiles`
* call, or empty if there was no previous call.
*/
public importFiles(docSession: DocSession, dataSource: DataSourceTransformed,
parseOptions: ParseOptions, prevTableIds: string[]): Promise<ImportResult> {
return this._activeDocImport.importFiles(docSession, dataSource, parseOptions, prevTableIds);
}
/**
* Finishes import files, creates the new tables, and cleans up temporary hidden tables and uploads.
* Param `prevTableIds` is an array of hiddenTableIds as received from previous `importFiles`
* call, or empty if there was no previous call.
*/
public finishImportFiles(docSession: DocSession, dataSource: DataSourceTransformed,
prevTableIds: string[], importOptions: ImportOptions): Promise<ImportResult> {
return this._activeDocImport.finishImportFiles(docSession, dataSource, prevTableIds, importOptions);
}
/**
* Cancels import files, cleans up temporary hidden tables and uploads.
* Param `prevTableIds` is an array of hiddenTableIds as received from previous `importFiles`
* call, or empty if there was no previous call.
*/
public cancelImportFiles(docSession: DocSession, dataSource: DataSourceTransformed,
prevTableIds: string[]): Promise<void> {
return this._activeDocImport.cancelImportFiles(docSession, dataSource, prevTableIds);
}
/**
* Returns a diff of changes that will be applied to the destination table from `transformRule`
* if the data from `hiddenTableId` is imported with the specified `mergeOptions`.
*
* The diff is returned as a `DocStateComparison` of the same doc, with the `rightChanges`
* containing the updated cell values. Old values are pulled from the destination record (if
* a match was found), and new values are the result of merging in the new cell values with
* the merge strategy from `mergeOptions`.
*
* No distinction is currently made for added records vs. updated existing records; instead,
* we treat added records as an updated record in `hiddenTableId` where all the column
* values changed from blank to the original column values from `hiddenTableId`.
*/
public generateImportDiff(_docSession: DocSession, hiddenTableId: string, transformRule: TransformRule,
mergeOptions: MergeOptions): Promise<DocStateComparison> {
return this._activeDocImport.generateImportDiff(hiddenTableId, transformRule, mergeOptions);
}
/**
* Close the current document.
*/
public async closeDoc(docSession: DocSession): Promise<void> {
// Note that it's async only to satisfy the Rpc interface that expects a promise.
this.docClients.removeClient(docSession);
// If no more clients, schedule a shutdown.
if (this.docClients.clientCount() === 0) {
this.logInfo(docSession, "will self-close in %d ms", this._inactivityTimer.getDelay());
this._inactivityTimer.enable();
}
}
/**
* Import the given upload as new tables in one step.
*/
@ActiveDoc.keepDocOpen
public async oneStepImport(docSession: OptDocSession, uploadInfo: UploadInfo): Promise<void> {
await this._activeDocImport.oneStepImport(docSession, uploadInfo);
}
/**
* This function saves attachments from a given upload and creates an entry for them in the database.
* It returns the list of rowIds for the rows created in the _grist_Attachments table.
*/
public async addAttachments(docSession: OptDocSession, uploadId: number): Promise<number[]> {
const userId = getDocSessionUserId(docSession);
const upload: UploadInfo = globalUploadSet.getUploadInfo(uploadId, this.makeAccessId(userId));
try {
const userActions: UserAction[] = await Promise.all(
upload.files.map(file => this._prepAttachment(docSession, file)));
const result = await this.applyUserActions(docSession, userActions);
return result.retValues;
} finally {
await globalUploadSet.cleanup(uploadId);
}
}
/**
* Returns the record from _grist_Attachments table for the given attachment ID,
* or throws an error if not found.
*/
public getAttachmentMetadata(attId: number|string): RowRecord {
// docData should always be available after loadDoc() or createDoc().
if (!this.docData) {
throw new Error("No doc data");
}
// Parse strings into numbers to make more convenient to call from route handlers.
const attachmentId: number = (typeof attId === 'string') ? parseInt(attId, 10) : attId;
const attRecord = this.docData.getTable('_grist_Attachments')!.getRecord(attachmentId);
if (!attRecord) {
throw new ApiError(`Attachment not found: ${attId}`, 404);
}
return attRecord;
}
/**
* Given the fileIdent of an attachment, returns a promise for the attachment data.
* @param {String} fileIdent: The unique identifier of the attachment (as stored in fileIdent
* field of the _grist_Attachments table).
* @returns {Promise<Buffer>} Promise for the data of this attachment; rejected on error.
*/
public async getAttachmentData(docSession: OptDocSession, fileIdent: string): Promise<Buffer> {
// We don't know for sure whether the attachment is available via a table the user
// has access to, but at least they are presenting a SHA1 checksum of the file content,
// and they have at least view access to the document to get to this point. So we go ahead
// and serve the attachment.
const data = await this.docStorage.getFileData(fileIdent);
if (!data) { throw new ApiError("Invalid attachment identifier", 404); }
this.logInfo(docSession, "getAttachment: %s -> %s bytes", fileIdent, data.length);
return data;
}
/**
* Fetches the meta tables to return to the client when first opening a document.
*/
public async fetchMetaTables(docSession: OptDocSession) {
this.logInfo(docSession, "fetchMetaTables");
if (!this.docData) { throw new Error("No doc data"); }
// Get metadata from local cache rather than data engine, so that we can
// still get it even if data engine is busy calculating.
const tables: {[key: string]: TableDataAction} = {};
for (const [tableId, tableData] of this.docData.getTables().entries()) {
if (!tableId.startsWith('_grist_')) { continue; }
tables[tableId] = tableData.getTableDataAction();
}
return this._granularAccess.filterMetaTables(docSession, tables);
}
/**
* Makes sure document is completely initialized. May throw if doc is broken.
*/
public async waitForInitialization() {
if (this._initializationPromise) {
if (!await this._initializationPromise) {
throw new Error('ActiveDoc initialization failed');
}
}
return true;
}
// Check if user has rights to download this doc.
public async canDownload(docSession: OptDocSession) {
return this._granularAccess.canCopyEverything(docSession);
}
// Check if it is appropriate for the user to be treated as an owner of
// the document for granular access purposes when in "prefork" mode
// (meaning a document has been opened with the intent to fork it, but
// an initial modification has not yet been made).
// Currently, we decide it is appropriate if the user has access to all
// the data in the document, either directly or via the special
// "FullCopies" permission.
public async canForkAsOwner(docSession: OptDocSession) {
return this._granularAccess.canCopyEverything(docSession);
}
// Remove cached access information for a given session.
public flushAccess(docSession: OptDocSession) {
return this._granularAccess.flushAccess(docSession);
}
/**
* Fetches a particular table from the data engine to return to the client.
* @param {String} tableId: The string identifier of the table.
* @param {Boolean} waitForFormulas: If true, wait for all data to be loaded/calculated.
* @returns {Promise} Promise for the TableData object, which is a BulkAddRecord-like array of the
* form of the form ["TableData", table_id, row_ids, column_values].
*/
public async fetchTable(docSession: OptDocSession, tableId: string,
waitForFormulas: boolean = false): Promise<TableDataAction> {
return this.fetchQuery(docSession, {tableId, filters: {}}, waitForFormulas);
}
/**
* Fetches data according to the given query, which includes tableId and filters (see Query in
* app/common/ActiveDocAPI.ts). The data is fetched from the data engine for regular tables, or
* from the DocStorage directly for onDemand tables.
* @param {Boolean} waitForFormulas: If true, wait for all data to be loaded/calculated. If false,
* special "pending" values may be returned.
*/
public async fetchQuery(docSession: OptDocSession, query: ServerQuery,
waitForFormulas: boolean = false): Promise<TableDataAction> {
this._inactivityTimer.ping(); // The doc is in active use; ping it to stay open longer.
// If user does not have rights to access what this query is asking for, fail.
const tableAccess = await this._granularAccess.getTableAccess(docSession, query.tableId);
this._granularAccess.assertCanRead(tableAccess);
if (query.tableId.startsWith('_gristsys_')) {
throw new Error('Cannot fetch _gristsys tables');
}
if (query.tableId.startsWith('_grist_') && !await this._granularAccess.canReadEverything(docSession)) {
// Metadata tables may need filtering, and this can't be done by looking at a single
// table. So we pick out the table we want from fetchMetaTables (which has applied
// filtering).
const tables = await this.fetchMetaTables(docSession);
const table = tables[query.tableId];
if (table) { return table; }
// If table not found, continue, to give a consistent error for a table not found.
}
// Some tests read _grist_ tables via the api. The _fetchQueryFromDB method
// currently cannot read those tables, so we load them from the data engine
// when ready.
// Also, if row-level access is being controlled, we wait for formula columns
// to be populated.
const wantFull = waitForFormulas || query.tableId.startsWith('_grist_') ||
this._granularAccess.getReadPermission(tableAccess) === 'mixed';
const onDemand = this._onDemandActions.isOnDemand(query.tableId);
this.logInfo(docSession, "fetchQuery %s %s", JSON.stringify(query),
onDemand ? "(onDemand)" : "(regular)");
let data: TableDataAction;
if (onDemand) {
data = await this._fetchQueryFromDB(query, onDemand);
} else if (wantFull) {
await this.waitForInitialization();
data = await this._fetchQueryFromDataEngine(query);
} else {
if (!this._fullyLoaded) {
data = await this._fetchQueryFromDB(query, false);
}
if (this._fullyLoaded) { // Already loaded or finished loading while fetching from DB
const key = JSON.stringify(query);
// TODO: cache longer if the underlying fetch takes longer to do.
data = await mapGetOrSet(this._fetchCache, key, () => this._fetchQueryFromDataEngine(query));
}
}
// If row-level access is being controlled, filter the data appropriately.
// Likewise if column-level access is being controlled.
if (this._granularAccess.getReadPermission(tableAccess) !== 'allow') {
data = cloneDeep(data!); // Clone since underlying fetch may be cached and shared.
await this._granularAccess.filterData(docSession, data);
}
this.logInfo(docSession, "fetchQuery -> %d rows, cols: %s",
data![2].length, Object.keys(data![3]).join(", "));
return data!;
}
/**
* Fetches the generated schema for a given table.
* @param {String} tableId: The string identifier of the table.
* @returns {Promise} Promise for a string representing the generated table schema.
*/
public async fetchTableSchema(docSession: DocSession): Promise<string> {
this.logInfo(docSession, "fetchTableSchema(%s)", docSession);
// Permit code view if user can read everything, or can download/copy (perhaps
// via an exceptional permission for sample documents)
if (!(await this._granularAccess.canReadEverything(docSession) ||
await this.canDownload(docSession))) {
throw new ApiError('Cannot view code, it may contain private material', 403);
}
await this.waitForInitialization();
return this._pyCall('fetch_table_schema');
}
/**
* Makes a query (documented elsewhere) and subscribes to it, so that the client receives
* docActions that affect this query's results.
*/
public async useQuerySet(docSession: OptDocSession, query: ServerQuery): Promise<QueryResult> {
this.logInfo(docSession, "useQuerySet(%s, %s)", docSession, query);
// TODO implement subscribing to the query.
// - Convert tableId+colIds to TableData/ColData references
// - Return a unique identifier for unsubscribing
// - Each call can create its own object, return own identifier.
// - Subscription should not be affected by renames (so don't hold on to query/tableId/colIds)
// - Table/column deletion should make subscription inactive, and unsubscribing an inactive
// subscription should not produce an error.
const tableData: TableDataAction = await this.fetchQuery(docSession, query);
return {querySubId: 0, tableData};
}
/**
* Removes all subscriptions to the given query from this client, so that it stops receiving
* docActions relevant only to this query.
*/
public async disposeQuerySet(docSession: DocSession, querySubId: number): Promise<void> {
this.logInfo(docSession, "disposeQuerySet(%s, %s)", docSession, querySubId);
// TODO To-be-implemented
}
/**
* Returns the most likely target column in the document for the given column.
* @param {Array} values: An array of values to search for in columns in the document.
* @param {Number} n: Number of results to return.
* @param {String} optTableId: If a valid tableId, search only that table.
* @returns {Promise} Promise for an array of colRefs describing matching columns ordered from
* best to worst. Match quality is determined by searching only a sample of column data.
* See engine.py find_col_from_values for implementation.
*/
public async findColFromValues(docSession: DocSession, values: any[], n: number,
optTableId?: string): Promise<number[]> {
// This could leak information about private tables, so check for permission.
if (!await this._granularAccess.canScanData(docSession)) { return []; }
this.logInfo(docSession, "findColFromValues(%s, %s, %s)", docSession, values, n);
await this.waitForInitialization();
return this._pyCall('find_col_from_values', values, n, optTableId);
}
/**
* Returns column metadata for all visible columns from `tableId`.
*
* @param {string} tableId Table to retrieve column metadata for.
* @returns {Promise<TableRecordValue[]>} Records containing metadata about the visible columns
* from `tableId`.
*/
public async getTableCols(docSession: OptDocSession, tableId: string): Promise<TableRecordValue[]> {
const metaTables = await this.fetchMetaTables(docSession);
const tableRef = tableIdToRef(metaTables, tableId);
const [, , colRefs, columnData] = metaTables._grist_Tables_column;
// colId is pulled out of fields and used as the root id
const fieldNames = without(Object.keys(columnData), "colId");
const columns: TableRecordValue[] = [];
(columnData.colId as string[]).forEach((id, index) => {
if (
// TODO param to include hidden columns
id === "manualSort" || id.startsWith("gristHelper_") || !id ||
// Filter columns from the requested table
columnData.parentId[index] !== tableRef
) {
return;
}
const column: TableRecordValue = { id, fields: { colRef: colRefs[index] } };
for (const key of fieldNames) {
column.fields[key] = columnData[key][index];
}
columns.push(column);
});
return columns;
}
/**
* Returns error message (traceback) for one invalid formula cell.
* @param {String} tableId - Table name
* @param {String} colId - Column name
* @param {Integer} rowId - Row number
* @returns {Promise} Promise for a error message
*/
public async getFormulaError(docSession: DocSession, tableId: string, colId: string,
rowId: number): Promise<CellValue> {
if (!await this._granularAccess.hasTableAccess(docSession, tableId)) { return null; }
this.logInfo(docSession, "getFormulaError(%s, %s, %s, %s)",
docSession, tableId, colId, rowId);
await this.waitForInitialization();
return this._pyCall('get_formula_error', tableId, colId, rowId);
}
/**
* Applies an array of user actions received from a browser client.
*
* @param {Object} docSession: The client session originating this action.
* @param {Array} action: The user action to apply, e.g. ["UpdateRecord", tableId, rowId, etc].
* @param {Object} options: See _applyUserActions for documentation
* @returns {Promise:Array[Object]} Promise that's resolved when action is applied successfully.
* The array includes the retValue objects for each
* actionGroup.
*/
public async applyUserActions(docSession: OptDocSession, actions: UserAction[],
options?: ApplyUAOptions): Promise<ApplyUAResult> {
assert(Array.isArray(actions), "`actions` parameter should be an array.");
// Be careful not to sneak into user action queue before Calculate action, otherwise
// there'll be a deadlock.
await this.waitForInitialization();
// Granular access control implemented in _applyUserActions.
return await this._applyUserActions(docSession, actions, options);
}
/**
* A variant of applyUserActions where actions are passed in by ids (actionNum, actionHash)
* rather than by value.
*
* @param docSession: The client session originating this action.
* @param actionNums: The user actions to do/undo, by actionNum.
* @param actionHashes: actionHash checksums for each listed actionNum.
* @param undo: Whether the actions are to be undone.
* @param options: As for applyUserActions.
* @returns Promise of retValues, see applyUserActions.
*/
public async applyUserActionsById(docSession: DocSession,
actionNums: number[],
actionHashes: string[],
undo: boolean,
options?: ApplyUAOptions): Promise<ApplyUAResult> {
const actionBundles = await this._actionHistory.getActions(actionNums);
for (const [index, bundle] of actionBundles.entries()) {
const actionNum = actionNums[index];
const actionHash = actionHashes[index];
if (!bundle) { throw new Error(`Could not find actionNum ${actionNum}`); }
if (actionHash !== bundle.actionHash) {
throw new Error(`Hash mismatch for actionNum ${actionNum}: ` +
`expected ${actionHash} but got ${bundle.actionHash}`);
}
}
let actions: UserAction[];
if (undo) {
actions = [['ApplyUndoActions', flatten(actionBundles.map(a => a!.undo))]];
} else {
actions = flatten(actionBundles.map(a => a!.userActions));
}
// Granular access control implemented ultimately in _applyUserActions.
// It could be that error cases and timing etc leak some info prior to this
// point.
// Undos are best effort now by default.
return this.applyUserActions(docSession, actions, {bestEffort: undo, ...(options||{})});
}
/**
* Called by Sharing class for every LocalActionBundle (of our own actions) that gets applied.
*/
public async processActionBundle(localActionBundle: LocalActionBundle): Promise<void> {
const docData = this.docData;
if (!docData) { return; } // Happens on doc creation while processing InitNewDoc action.
localActionBundle.stored.forEach(da => docData.receiveAction(da[1]));
localActionBundle.calc.forEach(da => docData.receiveAction(da[1]));
const docActions = getEnvContent(localActionBundle.stored);
if (docActions.some(docAction => this._onDemandActions.isSchemaAction(docAction))) {
const indexes = this._onDemandActions.getDesiredIndexes();
await this.docStorage.updateIndexes(indexes);
// TODO: should probably add indexes for user attribute tables.
}
}
/**
* Used by tests to force an update indexes. We don't otherwise update indexes until
* there is a schema change.
*/
public async testUpdateIndexes() {
const indexes = this._onDemandActions.getDesiredIndexes();
await this.docStorage.updateIndexes(indexes);
}
public async removeInstanceFromDoc(docSession: DocSession): Promise<void> {
await this._sharing.removeInstanceFromDoc();
}
public async renameDocTo(docSession: OptDocSession, newName: string): Promise<void> {
this.logDebug(docSession, 'renameDoc', newName);
await this.docStorage.renameDocTo(newName);
this._docName = newName;
}
/**
* Initiates user actions bandling for undo.
*/
public startBundleUserActions(docSession: OptDocSession) {
if (!docSession.shouldBundleActions) {
docSession.shouldBundleActions = true;
docSession.linkId = 0;
}
}
/**
* Stops user actions bandling for undo.
*/
public stopBundleUserActions(docSession: OptDocSession) {
docSession.shouldBundleActions = false;
docSession.linkId = 0;
}
public async autocomplete(docSession: DocSession, txt: string, tableId: string, columnId: string): Promise<string[]> {
// Autocompletion can leak names of tables and columns.
if (!await this._granularAccess.canScanData(docSession)) { return []; }
await this.waitForInitialization();
const user = await this._granularAccess.getCachedUser(docSession);
return this._pyCall('autocomplete', txt, tableId, columnId, user.toJSON());
}
public fetchURL(docSession: DocSession, url: string, options?: FetchUrlOptions): Promise<UploadResult> {
return fetchURL(url, this.makeAccessId(docSession.authorizer.getUserId()), options);
}
public async forwardPluginRpc(docSession: DocSession, pluginId: string, msg: IMessage): Promise<any> {
if (await this._granularAccess.hasNuancedAccess(docSession)) {
throw new Error('cannot confirm access to plugin');
}
const pluginRpc = this.docPluginManager.plugins[pluginId].rpc;
switch (msg.mtype) {
case MsgType.RpcCall: return pluginRpc.forwardCall(msg);
case MsgType.Custom: return pluginRpc.forwardMessage(msg);
}
throw new Error(`Invalid message type for forwardPluginRpc: ${msg.mtype}`);
}
/**
* Reload documents plugins.
*/
public async reloadPlugins(docSession: DocSession) {
// refresh the list plugins found on the system
await this._docManager.pluginManager.reloadPlugins();
const plugins = this._docManager.pluginManager.getPlugins();
// reload found plugins
await this.docPluginManager.reload(plugins);
}
/**
* Immediately close the document and data engine, to be reloaded from scratch, and cause all
* browser clients to reopen it.
*/
public async reloadDoc(docSession?: DocSession) {
return this.shutdown();
}
public isOwner(docSession: OptDocSession): Promise<boolean> {
return this._granularAccess.isOwner(docSession);
}
/**
* Fork the current document. In fact, all that requires is calculating a good
* ID for the fork. TODO: reconcile the two ways there are now of preparing a fork.
*/
public async fork(docSession: OptDocSession): Promise<ForkResult> {
const user = getDocSessionUser(docSession);
// For now, fork only if user can read everything (or is owner).
// TODO: allow forks with partial content.
if (!user || !await this.canDownload(docSession)) {
throw new ApiError('Insufficient access to document to copy it entirely', 403);
}
const userId = user.id;
const isAnonymous = this._docManager.isAnonymous(userId);
// Get fresh document metadata (the cached metadata doesn't include the urlId).
const doc = await docSession.authorizer?.getDoc();
if (!doc) { throw new Error('document id not known'); }
const trunkDocId = doc.id;
const trunkUrlId = doc.urlId || doc.id;
await this.flushDoc(); // Make sure fork won't be too out of date.
const forkIds = makeForkIds({userId, isAnonymous, trunkDocId, trunkUrlId});
// To actually create the fork, we call an endpoint. This is so the fork
// can be associated with an arbitrary doc worker, rather than tied to the
// same worker as the trunk. We use a Permit for authorization.
const permitStore = this._docManager.gristServer.getPermitStore();
const permitKey = await permitStore.setPermit({docId: forkIds.docId,
otherDocId: this.docName});
try {
const url = await this._docManager.gristServer.getHomeUrlByDocId(
forkIds.docId, `/api/docs/${forkIds.docId}/create-fork`);
const resp = await fetch(url, {
method: 'POST',
body: JSON.stringify({ srcDocId: this.docName }),
headers: {
'Permit': permitKey,
'Content-Type': 'application/json',
},
});
if (resp.status !== 200) {
throw new ApiError(resp.statusText, resp.status);
}
} finally {
await permitStore.removePermit(permitKey);
}
return forkIds;
}
/**
* Check if an ACL formula is valid. If not, will throw an error with an explanation.
*/
public async checkAclFormula(docSession: DocSession, text: string): Promise<FormulaProperties> {
// Checks can leak names of tables and columns.
if (await this._granularAccess.hasNuancedAccess(docSession)) { return {}; }
await this.waitForInitialization();
try {
const parsedAclFormula = await this._pyCall('parse_acl_formula', text);
compileAclFormula(parsedAclFormula);
// TODO We also need to check the validity of attributes, and of tables and columns
// mentioned in resources and userAttribute rules.
return getFormulaProperties(parsedAclFormula);
} catch (e) {
e.message = e.message?.replace('[Sandbox] ', '');
throw e;
}
}
/**
* Returns the full set of tableIds, with the list of colIds for each table. This is intended
* for editing ACLs. It is only available to users who can edit ACLs, and lists all resources
* regardless of rules that may block access to them.
*/
public async getAclResources(docSession: DocSession): Promise<{[tableId: string]: string[]}> {
if (!this.docData || !await this._granularAccess.hasAccessRulesPermission(docSession)) {
throw new Error('Cannot list ACL resources');
}
const result: {[tableId: string]: string[]} = {};
const tables = this.docData.getTable('_grist_Tables')!;
for (const tableId of tables.getColValues('tableId')!) {
result[tableId as string] = ['id'];
}
const columns = this.docData.getTable('_grist_Tables_column')!;
for (const col of columns.getRecords()) {
const tableId = tables.getValue(col.parentId as number, 'tableId');
result[tableId as string].push(col.colId as string);
}
return result;
}
/**
* Get users that are worth proposing to "View As" for access control purposes.
* User are drawn from the following sources:
* - Users document is shared with.
* - Users mentioned in user attribute tables keyed by email address.
* - Some predefined example users.
*
* The users the document is shared with are only available if the
* user is an owner of the document (or, in a fork, an owner of the
* trunk document). For viewers or editors, only the user calling
* the method will be included as users the document is shared with.
*
* Users mentioned in user attribute tables will be available to any user with
* the right to view access rules.
*
* Example users are always included.
*/
public async getUsersForViewAs(docSession: DocSession): Promise<PermissionDataWithExtraUsers> {
// Make sure we have rights to view access rules.
const db = this.getHomeDbManager();
if (!db || !await this._granularAccess.hasAccessRulesPermission(docSession)) {
throw new Error('Cannot list ACL users');
}
// Prepare a stub for the collected results.
const result: PermissionDataWithExtraUsers = {
users: [],
attributeTableUsers: [],
exampleUsers: [],
};
const isShared = new Set<string>();
// Collect users the document is shared with.
const userId = getDocSessionUserId(docSession);
if (!userId) { throw new Error('Cannot determine user'); }
const access = db.unwrapQueryResult(
await db.getDocAccess({userId, urlId: this.docName}, {
flatten: true, excludeUsersWithoutAccess: true,
}));
result.users = access.users;
result.users.forEach(user => isShared.add(normalizeEmail(user.email)));
// Collect users from user attribute tables. Omit duplicates with users the document is
// shared with.
const usersFromUserAttributes = await this._granularAccess.collectViewAsUsersFromUserAttributeTables();
for (const user of usersFromUserAttributes) {
if (!user.email) { continue; }
const email = normalizeEmail(user.email);
if (!isShared.has(email)) {
result.attributeTableUsers.push({email: user.email, name: user.name || '',
id: 0, access: user.access === undefined ? 'editors' : user.access});
}
}
// Add some example users.
result.exampleUsers = this._granularAccess.getExampleViewAsUsers();
return result;
}
public getGristDocAPI(): GristDocAPI {
return this.docPluginManager.gristDocAPI;
}
// Get recent actions in ActionGroup format with summaries included.
public async getActionSummaries(docSession: DocSession): Promise<ActionGroup[]> {
return this.getRecentActions(docSession, true);
}
/**
* Applies normal actions to the data engine while processing onDemand actions separately.
* Should only be called by a Sharing object, with this._modificationLock held, since the
* actions may need to be rolled back if final access control checks fail.
*/
public async applyActionsToDataEngine(
docSession: OptDocSession|null,
userActions: UserAction[]
): Promise<SandboxActionBundle> {
const [normalActions, onDemandActions] = this._onDemandActions.splitByOnDemand(userActions);
let sandboxActionBundle: SandboxActionBundle;
if (normalActions.length > 0) {
// For all but the special 'Calculate' action, we wait for full initialization.
if (normalActions[0][0] !== 'Calculate') {
await this.waitForInitialization();
}
const user = docSession ? await this._granularAccess.getCachedUser(docSession) : undefined;
sandboxActionBundle = await this._rawPyCall('apply_user_actions', normalActions, user?.toJSON());
await this._reportDataEngineMemory();
} else {
// Create default SandboxActionBundle to use if the data engine is not called.
sandboxActionBundle = createEmptySandboxActionBundle();
}
if (onDemandActions.length > 0) {
const allIndex = findOrAddAllEnvelope(sandboxActionBundle.envelopes);
await this.docStorage.execTransaction(async () => {
for (const action of onDemandActions) {
const {stored, undo, retValues} = await this._onDemandActions.processUserAction(action);
// Note: onDemand stored/undo actions are arbitrarily processed/added after normal actions
// and do not support access control.
sandboxActionBundle.stored.push(...stored.map(a => [allIndex, a] as [number, DocAction]));
sandboxActionBundle.direct.push(...stored.map(a => [allIndex, true] as [number, boolean]));
sandboxActionBundle.undo.push(...undo.map(a => [allIndex, a] as [number, DocAction]));
sandboxActionBundle.retValues.push(retValues);
}
});
}
return sandboxActionBundle;
}
// Needed for test/server/migrations.js tests
public async testGetVersionFromDataEngine() {
return this._pyCall('get_version');
}
// Needed for test/server/lib/HostedStorageManager.ts tests
public async testKeepOpen() {
this._inactivityTimer.ping();
}
public async getSnapshots(skipMetadataCache?: boolean): Promise<DocSnapshots> {
// Assume any viewer can access this list.
return this._docManager.storageManager.getSnapshots(this.docName, skipMetadataCache);
}
public async removeSnapshots(docSession: OptDocSession, snapshotIds: string[]): Promise<void> {
if (!await this.isOwner(docSession)) {
throw new Error('cannot remove snapshots, access denied');
}
return this._docManager.storageManager.removeSnapshots(this.docName, snapshotIds);
}
public async deleteActions(docSession: OptDocSession, keepN: number): Promise<void> {
if (!await this.isOwner(docSession)) {
throw new Error('cannot delete actions, access denied');
}
await this._actionHistory.deleteActions(keepN);
}
/**
* Make sure the current version of the document has been pushed to persistent
* storage.
*/
public async flushDoc(): Promise<void> {
return this._docManager.storageManager.flushDoc(this.docName);
}
public makeAccessId(userId: number|null): string|null {
return this._docManager.makeAccessId(userId);
}
/**
* Called by Sharing manager when working on modifying the document.
* Called when DocActions have been produced from UserActions, but
* before those DocActions have been applied to the DB. GranularAccessBundle
* methods can confirm that those DocActions are legal according to any
* granular access rules.
*/
public getGranularAccessForBundle(docSession: OptDocSession, docActions: DocAction[], undo: DocAction[],
userActions: UserAction[], isDirect: boolean[]): GranularAccessForBundle {
this._granularAccess.getGranularAccessForBundle(docSession, docActions, undo, userActions, isDirect);
return this._granularAccess;
}
/**
* Loads an open document from DocStorage. Returns a list of the tables it contains.
*/
protected async _loadOpenDoc(docSession: OptDocSession): Promise<string[]> {
// Check the schema version of document and sandbox, and migrate if the sandbox is newer.
const schemaVersion = SCHEMA_VERSION;
// Migrate the document if needed.
const docInfo = await this._tableMetadataLoader.fetchBulkColValuesWithoutIds('_grist_DocInfo');
const versionCol = docInfo.schemaVersion;
const docSchemaVersion = (versionCol && versionCol.length === 1 ? versionCol[0] : 0) as number;
if (docSchemaVersion < schemaVersion) {
this.logInfo(docSession, "Doc needs migration from v%s to v%s", docSchemaVersion, schemaVersion);
await this._beforeMigration(docSession, 'schema', docSchemaVersion, schemaVersion);
let success: boolean = false;
try {
await this._migrate(docSession);
success = true;
} finally {
await this._afterMigration(docSession, 'schema', schemaVersion, success);
await this._tableMetadataLoader.clean(); // _grist_DocInfo may have changed.
}
} else if (docSchemaVersion > schemaVersion) {
// We do NOT attempt to down-migrate in this case. Migration code cannot down-migrate
// directly (since it doesn't know anything about newer documents). We could revert the
// migration action, but that requires merging and still may not be safe. For now, doing
// nothing seems best, as long as we follow the recommendations in migrations.py (never
// remove/modify/rename metadata tables or columns, or change their meaning).
this.logWarn(docSession, "Doc is newer (v%s) than this version of Grist (v%s); " +
"proceeding with fingers crossed", docSchemaVersion, schemaVersion);
}
// Start loading the initial meta tables which determine the document schema.
this._tableMetadataLoader.startStreamingToEngine();
this._tableMetadataLoader.startFetchingTable('_grist_Tables');
this._tableMetadataLoader.startFetchingTable('_grist_Tables_column');
// Get names of remaining tables.
const tablesParsed = await this._tableMetadataLoader.fetchBulkColValuesWithoutIds('_grist_Tables');
const tableNames = (tablesParsed.tableId as string[])
.concat(Object.keys(schema))
.filter(tableId => tableId !== '_grist_Tables' && tableId !== '_grist_Tables_column')
.sort();
// Figure out which tables are on-demand.
const onDemandMap = zipObject(tablesParsed.tableId as string[], tablesParsed.onDemand);
const onDemandNames = remove(tableNames, (t) => onDemandMap[t]);
this.logDebug(docSession, "found %s tables: %s", tableNames.length,
tableNames.join(", "));
this.logDebug(docSession, "skipping %s on-demand tables: %s", onDemandNames.length,
onDemandNames.join(", "));
return tableNames;
}
/**
* Applies an array of user actions to the sandbox and broadcasts the results to doc's clients.
*
* @private
* @param {Object} client - The client originating this action. May be null.
* @param {Array} actions - The user actions to apply.
* @param {String} options.desc - Description of the action which overrides the default client
* description if provided. Should be used to describe bundled actions.
* @param {Int} options.otherId - Action number for the original useraction to which this undo/redo
* action applies.
* @param {Boolean} options.linkId - ActionNumber of the previous action in an undo/redo bundle.
* @returns {Promise} Promise that's resolved when all actions are applied successfully to {
* actionNum: number of the action that got recorded
* retValues: array of return values, one for each of the passed-in user actions.
* isModification: true if document was changed by one or more actions.
* }
*/
@ActiveDoc.keepDocOpen
protected async _applyUserActions(docSession: OptDocSession, actions: UserAction[],
options: ApplyUAOptions = {}): Promise<ApplyUAResult> {
const client = docSession.client;
this.logDebug(docSession, "_applyUserActions(%s, %s)", client, shortDesc(actions));
this._inactivityTimer.ping(); // The doc is in active use; ping it to stay open longer.
if (options?.bestEffort) {
actions = await this._granularAccess.prefilterUserActions(docSession, actions);
}
await this._granularAccess.assertCanMaybeApplyUserActions(docSession, actions);
// Create the UserActionBundle.
const action: UserActionBundle = {
info: this._makeInfo(docSession, options),
userActions: actions,
};
const result: ApplyUAResult = await new Promise<ApplyUAResult>(
(resolve, reject) =>
this._sharing.addUserAction({action, docSession, resolve, reject}));
this.logDebug(docSession, "_applyUserActions returning %s", shortDesc(result));
if (result.isModification) {
this._fetchCache.clear(); // This could be more nuanced.
this._docManager.markAsChanged(this, 'edit');
}
return result;
}
/**
* Create a new document file without using or initializing the data engine.
*/
@ActiveDoc.keepDocOpen
private async _createDocFile(docSession: OptDocSession, options?: {
skipInitialTable?: boolean, // If set, "Table1" will not be added.
}): Promise<void> {
this.logDebug(docSession, "createDoc");
await this._docManager.storageManager.prepareToCreateDoc(this.docName);
await this.docStorage.createFile();
const sql = options?.skipInitialTable ? GRIST_DOC_SQL : GRIST_DOC_WITH_TABLE1_SQL;
await this.docStorage.exec(sql);
const timezone = docSession.browserSettings?.timezone ?? DEFAULT_TIMEZONE;
const locale = docSession.browserSettings?.locale ?? DEFAULT_LOCALE;
await this.docStorage.run('UPDATE _grist_DocInfo SET timezone = ?, documentSettings = ?',
[timezone, JSON.stringify({locale})]);
}
private _makeInfo(docSession: OptDocSession, options: ApplyUAOptions = {}) {
const client = docSession.client;
const user = docSession.mode === 'system' ? 'grist' :
(client?.getProfile()?.email || '');
return {
time: Date.now(),
user,
inst: this._sharing.instanceId || "unset-inst",
desc: options.desc,
otherId: options.otherId || 0,
linkId: options.linkId || 0,
};
}
/**
* Prepares a single attachment by adding it DocStorage and returns a UserAction to apply.
*/
private async _prepAttachment(docSession: OptDocSession, fileData: FileUploadInfo): Promise<UserAction> {
// Check that upload size is within the configured limits.
const limit = (Number(process.env.GRIST_MAX_UPLOAD_ATTACHMENT_MB) * 1024 * 1024) || Infinity;
if (fileData.size > limit) {
throw new ApiError(`Attachments must not exceed ${byteString(limit)}`, 413);
}
let dimensions: {width?: number, height?: number} = {};
// imageSize returns an object with a width, height and type property if the file is an image.
// The width and height properties are integers representing width and height in pixels.
try {
dimensions = await bluebird.fromCallback((cb: any) => imageSize(fileData.absPath, cb));
} catch (err) {
// Non-images will fail in some way, and that's OK.
dimensions.height = 0;
dimensions.width = 0;
}
const checksum = await checksumFile(fileData.absPath);
const fileIdent = checksum + fileData.ext;
const ret: boolean = await this.docStorage.findOrAttachFile(fileData.absPath, fileIdent);
this.logInfo(docSession, "addAttachment: file %s (image %sx%s) %s", fileIdent,
dimensions.width, dimensions.height, ret ? "attached" : "already exists");
return ['AddRecord', '_grist_Attachments', null, {
fileIdent,
fileName: fileData.origName,
// We used to set fileType, but it's not easily available for native types. Since it's
// also entirely unused, we just skip it until it becomes relevant.
fileSize: fileData.size,
imageHeight: dimensions.height,
imageWidth: dimensions.width,
timeUploaded: Date.now()
}];
}
/**
* If the software is newer than the document, migrate the document by fetching all tables, and
* giving them to the sandbox so that it can produce migration actions.
* TODO: We haven't figured out how to do sharing between different Grist versions that
* expect different schema versions. The returned actions at the moment aren't even shared with
* collaborators.
*/
private async _migrate(docSession: OptDocSession): Promise<void> {
const tableNames = await this.docStorage.getAllTableNames();
// Fetch only metadata tables first, and try to migrate with only those.
const tableData: {[key: string]: Buffer|null} = {};
for (const tableName of tableNames) {
if (tableName.startsWith('_grist_')) {
tableData[tableName] = await this.docStorage.fetchTable(tableName);
}
}
let docActions: DocAction[];
try {
// The last argument tells create_migrations() that only metadata is included.
docActions = await this._rawPyCall('create_migrations', tableData, true);
} catch (e) {
if (!/need all tables/.test(e.message)) {
throw e;
}
// If the migration failed because it needs all tables (i.e. involves changes to data), then
// fetch them all. TODO: This is used for some older migrations, and is relied on by tests.
// If a new migration needs this flag, more work is needed. The current approach creates
// more memory pressure than usual since full data is present in memory at once both in node
// and in Python; and it doesn't skip onDemand tables. This is liable to cause crashes.
this.logWarn(docSession, "_migrate: retrying with all tables");
for (const tableName of tableNames) {
if (!tableData[tableName] && !tableName.startsWith('_gristsys_')) {
tableData[tableName] = await this.docStorage.fetchTable(tableName);
}
}
docActions = await this._rawPyCall('create_migrations', tableData);
}
const processedTables = Object.keys(tableData);
const numSchema = countIf(processedTables, t => t.startsWith("_grist_"));
const numUser = countIf(processedTables, t => !t.startsWith("_grist_"));
this.logInfo(docSession, "_migrate: applying %d migration actions (processed %s schema, %s user tables)",
docActions.length, numSchema, numUser);
docActions.forEach((action, i) => this.logInfo(docSession, "_migrate: docAction %s: %s", i, shortDesc(action)));
await this.docStorage.execTransaction(() => this.docStorage.applyStoredActions(docActions));
}
/**
* Load the specified tables into the data engine.
*/
private async _loadTables(docSession: OptDocSession, tableNames: string[]) {
this.logDebug(docSession, "loading %s tables: %s", tableNames.length,
tableNames.join(", "));
// Pass the resulting array to `map`, which allows parallel processing of the tables. Database
// and DataEngine may still do things serially, but it allows them to be busy simultaneously.
await bluebird.map(tableNames, async (tableName: string) =>
this._pyCall('load_table', tableName, await this._fetchTableIfPresent(tableName)),
// How many tables to query for and push to the data engine in parallel.
{ concurrency: 3 });
return this;
}
/**
* Start loading the specified tables from the db, without waiting for completion.
* The loader can be directed to stream the tables on to the engine.
*/
private _startLoadingTables(docSession: OptDocSession, tableNames: string[]) {
this.logDebug(docSession, "starting to load %s tables: %s", tableNames.length,
tableNames.join(", "));
for (const tableId of tableNames) {
this._tableMetadataLoader.startFetchingTable(tableId);
}
}
// Fetches and returns the requested table, or null if it's missing. This allows documents to
// load with missing metadata tables (should only matter if migrations are also broken).
private async _fetchTableIfPresent(tableName: string): Promise<Buffer|null> {
try {
return await this.docStorage.fetchTable(tableName);
} catch (err) {
if (/no such table/.test(err.message)) { return null; }
throw err;
}
}
// It's a bit risky letting "Calculate" (and other formula-dependent calls) to disable
// inactivityTimer, since a user formulas with an infinite loop can disable it forever.
// TODO find a solution to this issue.
@ActiveDoc.keepDocOpen
private async _finishInitialization(docSession: OptDocSession, pendingTableNames: string[], startTime: number) {
try {
await this._tableMetadataLoader.wait();
await this._tableMetadataLoader.clean();
await this._loadTables(docSession, pendingTableNames);
// Calculations are not associated specifically with the user opening the document.
// TODO: be careful with which users can create formulas.
await this._applyUserActions(makeExceptionalDocSession('system'), [['Calculate']]);
await this._reportDataEngineMemory();
this._fullyLoaded = true;
const endTime = Date.now();
const loadMs = endTime - startTime;
// Adjust the inactivity timer: if the load took under 1 sec, use the regular timeout; if it
// took longer, scale it up proportionately.
const closeTimeout = Math.max(loadMs, 1000) * Deps.ACTIVEDOC_TIMEOUT;
this._inactivityTimer.setDelay(closeTimeout);
this.logDebug(docSession, `loaded in ${loadMs} ms, InactivityTimer set to ${closeTimeout} ms`);
return true;
} catch (err) {
if (!this._shuttingDown) {
this.logWarn(docSession, "_finishInitialization stopped with %s", err);
}
this._fullyLoaded = true;
return false;
}
}
private async _fetchQueryFromDB(query: ServerQuery, onDemand: boolean): Promise<TableDataAction> {
// Expand query to compute formulas (or include placeholders for them).
const expandedQuery = expandQuery(query, this.docData!, onDemand);
const marshalled = await this.docStorage.fetchQuery(expandedQuery);
const table = this.docStorage.decodeMarshalledData(marshalled, query.tableId);
// Substitute in constant values for errors / placeholders.
if (expandedQuery.constants) {
for (const colId of Object.keys(expandedQuery.constants)) {
const constant = expandedQuery.constants[colId];
table[colId] = table[colId].map(() => constant);
}
}
return toTableDataAction(query.tableId, table);
}
private async _fetchQueryFromDataEngine(query: ServerQuery): Promise<TableDataAction> {
return this._pyCall('fetch_table', query.tableId, true, query.filters);
}
private async _reportDataEngineMemory() {
const now = Date.now();
if (now >= this._lastMemoryMeasurement + MEMORY_MEASUREMENT_INTERVAL_MS) {
this._lastMemoryMeasurement = now;
if (this._dataEngine && !this._shuttingDown) {
const dataEngine = await this._getEngine();
await dataEngine.reportMemoryUsage();
}
}
}
private _log(level: string, docSession: OptDocSession, msg: string, ...args: any[]) {
log.origLog(level, `ActiveDoc ` + msg, ...args, this.getLogMeta(docSession));
}
/**
* Called before a migration. Makes sure a back-up is made.
*/
private async _beforeMigration(docSession: OptDocSession, versionType: 'storage' | 'schema',
currentVersion: number, newVersion: number) {
this._migrating++;
const label = `migrate-${versionType}-last-v${currentVersion}-before-v${newVersion}`;
this._docManager.markAsChanged(this); // Give backup current time.
const location = await this._docManager.makeBackup(this, label);
this.logInfo(docSession, "_beforeMigration: backup made with label %s at %s", label, location);
this.emit("backupMade", location);
}
/**
* Called after a migration.
*/
private async _afterMigration(docSession: OptDocSession, versionType: 'storage' | 'schema',
newVersion: number, success: boolean) {
this._migrating--;
// Mark as changed even if migration is not successful, out of caution.
if (!this._migrating) { this._docManager.markAsChanged(this); }
}
/**
* Call a method in the sandbox, without checking the _modificationLock. Calls to
* the sandbox are naturally serialized.
*/
private async _rawPyCall(funcName: string, ...varArgs: unknown[]): Promise<any> {
const dataEngine = await this._getEngine();
return dataEngine.pyCall(funcName, ...varArgs);
}
/**
* Call a method in the sandbox, while checking on the _modificationLock. If the
* lock is held, the call will wait until the lock is released, and then hold
* the lock itself while operating.
*/
private _pyCall(funcName: string, ...varArgs: unknown[]): Promise<any> {
return this._modificationLock.runExclusive(() => this._rawPyCall(funcName, ...varArgs));
}
private async _getEngine(): Promise<ISandbox> {
if (this._shuttingDown) { throw new Error('shutting down, data engine unavailable'); }
this._dataEngine = this._dataEngine || this._makeEngine();
return this._dataEngine;
}
private async _makeEngine(): Promise<ISandbox> {
// Figure out what kind of engine we need for this document.
let preferredPythonVersion: '2' | '3' = process.env.PYTHON_VERSION === '3' ? '3' : '2';
// Currently only respect engine preference on experimental deployments (staging/dev).
if (process.env.GRIST_EXPERIMENTAL_PLUGINS === '1') {
// Careful, migrations may not have run on this document and it may not have a
// documentSettings column. Failures are treated as lack of an engine preference.
const docInfo = await this.docStorage.get('SELECT documentSettings FROM _grist_DocInfo').catch(e => undefined);
const docSettingsString = docInfo?.documentSettings;
if (docSettingsString) {
const docSettings: DocumentSettings|undefined = safeJsonParse(docSettingsString, undefined);
const engine = docSettings?.engine;
if (engine) {
if (engine === 'python2') {
preferredPythonVersion = '2';
} else if (engine === 'python3') {
preferredPythonVersion = '3';
} else {
throw new Error(`engine type not recognized: ${engine}`);
}
}
}
}
return this._docManager.gristServer.create.NSandbox({
comment: this._docName,
logCalls: false,
logTimes: true,
logMeta: {docId: this._docName},
docUrl: this._options?.docUrl,
preferredPythonVersion,
});
}
}
// Helper to initialize a sandbox action bundle with no values.
function createEmptySandboxActionBundle(): SandboxActionBundle {
return {
envelopes: [],
stored: [],
direct: [],
calc: [],
undo: [],
retValues: []
};
}
// Helper that converts a Grist table id to a ref.
export function tableIdToRef(metaTables: { [p: string]: TableDataAction }, tableId: string) {
const [, , tableRefs, tableData] = metaTables._grist_Tables;
const tableRowIndex = tableData.tableId.indexOf(tableId);
if (tableRowIndex === -1) {
throw new ApiError(`Table not found "${tableId}"`, 404);
}
return tableRefs[tableRowIndex];
}