mirror of
https://github.com/gristlabs/grist-core.git
synced 2026-03-02 04:09:24 +00:00
(core) Fix issue with 'UNEXPECTED ORDER OF CALLBACKS' in Client.ts.
Summary: - Substantial refactoring of the logic when the server fails to send some messages to a client. - Add seqId numbers to server messages to ensure reliable order. - Add a needReload flag in clientConnect for a clear indication whent the browser client needs to reload the app. - Reproduce some potential failure scenarios in a test case (some of which previously could have led to incorrectly ordered messages). - Convert other Comm tests to typescript. - Tweak logging of Comm and Client to be slightly more concise (in particular, avoid logging sessionId) Note that despite the big refactoring, this only addresses a fairly rare situation, with websocket failures while server is trying to send to the client. It includes no improvements for failures while the client is sending to the server. (I looked for an existing library that would take care of these issues. A relevant article I found is https://docs.microsoft.com/en-us/azure/azure-web-pubsub/howto-develop-reliable-clients, but it doesn't include a library for both ends, and is still in review. Other libraries with similar purposes did not inspire enough confidence.) Test Plan: New test cases, which reproduce some previously problematic scenarios. Reviewers: paulfitz Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D3470
This commit is contained in:
@@ -73,12 +73,12 @@ export class Client {
|
||||
|
||||
private _session: ScopedSession|null = null;
|
||||
|
||||
private _log = new LogMethods('Client ', (s: null) => this.getLogMeta());
|
||||
private _log = new LogMethods('Client ', (extra?: object|null) => this.getLogMeta(extra || {}));
|
||||
|
||||
// Maps docFDs to DocSession objects.
|
||||
private _docFDs: Array<DocSession|null> = [];
|
||||
|
||||
private _missedMessages: string[] = [];
|
||||
private _missedMessages = new Map<number, string>();
|
||||
private _destroyTimer: NodeJS.Timer|null = null;
|
||||
private _destroyed: boolean = false;
|
||||
private _websocket: WebSocket|null;
|
||||
@@ -88,13 +88,15 @@ export class Client {
|
||||
private _userName: string|null = null;
|
||||
private _firstLoginAt: Date|null = null;
|
||||
private _isAnonymous: boolean = false;
|
||||
private _nextSeqId: number = 0; // Next sequence-ID for messages sent to the client
|
||||
|
||||
// Identifier for the current GristWSConnection object connected to this client.
|
||||
private _counter: string|null = null;
|
||||
|
||||
constructor(
|
||||
private _comm: Comm,
|
||||
private _methods: Map<string, ClientMethod>,
|
||||
private _locale: string,
|
||||
// Identifier for the current GristWSConnection object connected to this client.
|
||||
private _counter: string|null,
|
||||
) {
|
||||
this.clientId = generateClientId();
|
||||
}
|
||||
@@ -105,13 +107,14 @@ export class Client {
|
||||
return this._locale;
|
||||
}
|
||||
|
||||
public setConnection(websocket: WebSocket, browserSettings: BrowserSettings) {
|
||||
public setConnection(websocket: WebSocket, counter: string|null, browserSettings: BrowserSettings) {
|
||||
this._websocket = websocket;
|
||||
this._counter = counter;
|
||||
this.browserSettings = browserSettings;
|
||||
|
||||
websocket.on('error', this._onError.bind(this));
|
||||
websocket.on('close', this._onClose.bind(this));
|
||||
websocket.on('message', this._onMessage.bind(this));
|
||||
websocket.on('error', (err) => this._onError(err));
|
||||
websocket.on('close', () => this._onClose());
|
||||
websocket.on('message', (msg: string) => this._onMessage(msg));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -138,25 +141,10 @@ export class Client {
|
||||
this._docFDs[fd] = null;
|
||||
}
|
||||
|
||||
// Check that client still has access to all documents. Used to determine whether
|
||||
// a Comm client can be safely reused after a reconnect. Without this check, the client
|
||||
// would be reused even if access to a document has been lost (although an error would be
|
||||
// issued later, on first use of the document).
|
||||
public async isAuthorized(): Promise<boolean> {
|
||||
for (const docFD of this._docFDs) {
|
||||
try {
|
||||
if (docFD !== null) { await docFD.authorizer.assertAccess('viewers'); }
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes all docs.
|
||||
* Closes all docs. Returns the number of documents closed.
|
||||
*/
|
||||
public closeAllDocs() {
|
||||
public closeAllDocs(): number {
|
||||
let count = 0;
|
||||
for (let fd = 0; fd < this._docFDs.length; fd++) {
|
||||
const docSession = this._docFDs[fd];
|
||||
@@ -168,7 +156,7 @@ export class Client {
|
||||
}
|
||||
this._docFDs[fd] = null;
|
||||
}
|
||||
this._log.debug(null, "closeAllDocs() closed %d doc(s)", count);
|
||||
return count;
|
||||
}
|
||||
|
||||
public interruptConnection() {
|
||||
@@ -187,7 +175,8 @@ export class Client {
|
||||
return;
|
||||
}
|
||||
|
||||
const message: string = JSON.stringify(messageObj);
|
||||
const seqId = this._nextSeqId++;
|
||||
const message: string = JSON.stringify({...messageObj, seqId});
|
||||
|
||||
// Log something useful about the message being sent.
|
||||
if ('error' in messageObj && messageObj.error) {
|
||||
@@ -199,91 +188,125 @@ export class Client {
|
||||
try {
|
||||
await this._sendToWebsocket(message);
|
||||
} catch (err) {
|
||||
// Sending failed. Presumably we should be getting onClose around now too.
|
||||
// NOTE: if this handler is run after onClose, we could have messages end up out of order.
|
||||
// Let's check to make sure. If this can happen, we need to refactor for correct ordering.
|
||||
if (!this._websocket) {
|
||||
this._log.error(null, "sendMessage: UNEXPECTED ORDER OF CALLBACKS");
|
||||
}
|
||||
this._log.warn(null, "sendMessage: queuing after send error: %s", err.toString());
|
||||
this._missedMessages.push(message);
|
||||
// Sending failed. Add the message to missedMessages.
|
||||
this._log.warn(null, "sendMessage: queuing after send error:", err.toString());
|
||||
this._missedMessages.set(seqId, message);
|
||||
|
||||
// NOTE: A successful send does NOT mean the message was received. For a better system, see
|
||||
// https://docs.microsoft.com/en-us/azure/azure-web-pubsub/howto-develop-reliable-clients
|
||||
// (keeping a copy of messages until acked). With our system, we are more likely to be
|
||||
// lacking the needed messages on reconnect, and having to reset the client.
|
||||
}
|
||||
} else if (this._missedMessages.length < clientMaxMissedMessages) {
|
||||
} else if (this._missedMessages.size < clientMaxMissedMessages) {
|
||||
// Queue up the message.
|
||||
this._missedMessages.push(message);
|
||||
this._missedMessages.set(seqId, message);
|
||||
} else {
|
||||
// Too many messages queued. Boot the client now, to make it reset when/if it reconnects.
|
||||
this._log.error(null, "sendMessage: too many messages queued; booting client");
|
||||
if (this._destroyTimer) {
|
||||
clearTimeout(this._destroyTimer);
|
||||
this._destroyTimer = null;
|
||||
}
|
||||
this._log.warn(null, "sendMessage: too many messages queued; booting client");
|
||||
this.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called from Comm.ts to prepare this Client object to accept a new connection that requests
|
||||
* the same clientId. Returns whether this Client is available and ready for this connection.
|
||||
* Called from Comm.ts to decide whether this Client is available to accept a new connection
|
||||
* that requests the same clientId.
|
||||
*/
|
||||
public async reconnect(counter: string|null, newClient: boolean): Promise<boolean> {
|
||||
// Refuse reconnect if another websocket is currently active. It may be a new browser tab,
|
||||
// and will need its own Client object.
|
||||
if (this._websocket) { return false; }
|
||||
|
||||
// Don't reuse this Client object if it's no longer authorized to access the open documents.
|
||||
if (!await this.isAuthorized()) { return false; }
|
||||
|
||||
this._counter = counter;
|
||||
|
||||
this._log.info(null, "existing client reconnected (%d missed messages)", this._missedMessages.length);
|
||||
if (this._destroyTimer) {
|
||||
this._log.warn(null, "clearing scheduled destruction");
|
||||
clearTimeout(this._destroyTimer);
|
||||
this._destroyTimer = null;
|
||||
}
|
||||
if (newClient) {
|
||||
// If newClient is set, then we assume that the browser client lost its state (e.g.
|
||||
// reloaded the page), so we treat it as a disconnect followed by a new connection to the
|
||||
// same state. At the moment, this only means that we close all docs.
|
||||
if (this._missedMessages.length) {
|
||||
this._log.warn(null, "clearing missed messages for new client");
|
||||
}
|
||||
this._missedMessages.length = 0;
|
||||
this.closeAllDocs();
|
||||
}
|
||||
return true;
|
||||
public canAcceptConnection(): boolean {
|
||||
// Refuse reconnect if another websocket is currently active. It may be a new browser tab
|
||||
// (which may reuse clientId from a copy of sessinStorage). It will need its own Client object.
|
||||
return !this._websocket;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send the initial 'clientConnect' message after receiving a connection.
|
||||
* Complete initialization of a new connection, and send the initial 'clientConnect' message.
|
||||
* See comments at the top of app/server/lib/Comm.ts for some relevant notes.
|
||||
*/
|
||||
public async sendConnectMessage(parts: Partial<CommClientConnect>): Promise<void> {
|
||||
this._log.debug(null, `sending clientConnect with ${this._missedMessages.length} missed messages`);
|
||||
public async sendConnectMessage(
|
||||
newClient: boolean, reuseClient: boolean, lastSeqId: number|null, parts: Partial<CommClientConnect>
|
||||
): Promise<void> {
|
||||
if (this._destroyTimer) {
|
||||
clearTimeout(this._destroyTimer);
|
||||
this._destroyTimer = null;
|
||||
}
|
||||
|
||||
let missedMessages: string[]|undefined = undefined;
|
||||
let seamlessReconnect = false;
|
||||
if (!newClient && reuseClient && await this._isAuthorized()) {
|
||||
// Websocket-level reconnect: existing browser tab reconnected to an existing Client object.
|
||||
// We also check that the Client is still authorized to access all open docs. If not, we'll
|
||||
// close the docs and tell the Client to reload the app.
|
||||
missedMessages = this.getMissedMessages(lastSeqId);
|
||||
if (missedMessages) {
|
||||
// We have all the needed messages (possibly an empty array); can do a seamless reconnect.
|
||||
seamlessReconnect = true;
|
||||
}
|
||||
}
|
||||
|
||||
// We collected any missed messages we need; clear the stored map of them.
|
||||
this._missedMessages.clear();
|
||||
|
||||
let docsClosed: number|null = null;
|
||||
if (!seamlessReconnect) {
|
||||
// The browser client can't recover from missed messages and will need to reopen docs. Close
|
||||
// all docs we kept open. If it's a new Client object, this is a no-op.
|
||||
docsClosed = this.closeAllDocs();
|
||||
}
|
||||
|
||||
// An existing browser client that can't recover, or that connected to a new Client object,
|
||||
// will need to reopen docs. Tell it to reload.
|
||||
const needReload = !newClient && !seamlessReconnect;
|
||||
|
||||
this._log.debug({newClient, needReload, docsClosed, missedMessages: missedMessages?.length},
|
||||
'sending clientConnect');
|
||||
|
||||
// Don't use sendMessage here, since we don't want to queue up this message on failure.
|
||||
const clientConnectMsg: CommClientConnect = {
|
||||
...parts,
|
||||
type: 'clientConnect',
|
||||
clientId: this.clientId,
|
||||
missedMessages: this._missedMessages.slice(0),
|
||||
profile: this._profile,
|
||||
missedMessages,
|
||||
needReload,
|
||||
};
|
||||
// If reconnecting a client with missed messages, clear them now.
|
||||
this._missedMessages.length = 0;
|
||||
|
||||
await this._sendToWebsocket(JSON.stringify(clientConnectMsg));
|
||||
// A heavy-handed fix to T396, since 'clientConnect' is sometimes not seen in the browser,
|
||||
// (seemingly when the 'message' event is triggered before 'open' on the native WebSocket.)
|
||||
// See also my report at https://stackoverflow.com/a/48411315/328565
|
||||
await delay(250);
|
||||
try {
|
||||
await this._sendToWebsocket(JSON.stringify(clientConnectMsg));
|
||||
|
||||
if (this._destroyed || this._websocket?.readyState !== WebSocket.OPEN) {
|
||||
this._log.debug(null, `websocket closed right after clientConnect`);
|
||||
} else {
|
||||
await this._sendToWebsocket(JSON.stringify({...clientConnectMsg, dup: true}));
|
||||
if (needReload) {
|
||||
// If the client should reload, close the socket without waiting. This connection should
|
||||
// not be used anyway, and we want it released by the time the new connection comes in.
|
||||
this._websocket?.close();
|
||||
return;
|
||||
}
|
||||
|
||||
// A heavy-handed fix to T396, since 'clientConnect' is sometimes not seen in the browser,
|
||||
// (seemingly when the 'message' event is triggered before 'open' on the native WebSocket.)
|
||||
// See also my report at https://stackoverflow.com/a/48411315/328565
|
||||
await delay(250);
|
||||
|
||||
if (!this._destroyed && this._websocket?.readyState === WebSocket.OPEN) {
|
||||
await this._sendToWebsocket(JSON.stringify({...clientConnectMsg, dup: true}));
|
||||
}
|
||||
} catch (err) {
|
||||
// It's possible that the connection was closed while we were preparing this response.
|
||||
// We just warn, and let _onClose() take care of cleanup.
|
||||
this._log.warn(null, "failed to prepare or send clientConnect:", err.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// Get messages in order of their key in the _missedMessages map.
|
||||
public getMissedMessages(lastSeqId: number|null): string[]|undefined {
|
||||
const result: string[] = [];
|
||||
if (lastSeqId !== null) {
|
||||
for (let i = lastSeqId + 1; i < this._nextSeqId; i++) {
|
||||
const m = this._missedMessages.get(i);
|
||||
if (m === undefined) { return; }
|
||||
result.push(m);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Assigns the given ScopedSession to the client.
|
||||
public setSession(session: ScopedSession): void {
|
||||
this._session = session;
|
||||
@@ -302,11 +325,13 @@ export class Client {
|
||||
* object and clientId.
|
||||
*/
|
||||
public destroy() {
|
||||
this._log.info(null, "client gone");
|
||||
this.closeAllDocs();
|
||||
const docsClosed = this.closeAllDocs();
|
||||
this._log.info({docsClosed}, "client gone");
|
||||
if (this._destroyTimer) {
|
||||
clearTimeout(this._destroyTimer);
|
||||
this._destroyTimer = null;
|
||||
}
|
||||
this._missedMessages.clear();
|
||||
this._comm.removeClient(this);
|
||||
this._destroyed = true;
|
||||
}
|
||||
@@ -379,8 +404,7 @@ export class Client {
|
||||
throw new ApiError(this._profile ? `user not known: ${this._profile.email}` : 'user not set', 403);
|
||||
}
|
||||
|
||||
public getLogMeta() {
|
||||
const meta: {[key: string]: any} = {};
|
||||
public getLogMeta(meta: {[key: string]: any} = {}) {
|
||||
if (this._profile) { meta.email = this._profile.email; }
|
||||
// We assume the _userId has already been cached, which will be true always (for all practical
|
||||
// purposes) because it's set when the Authorizer checks this client.
|
||||
@@ -455,6 +479,21 @@ export class Client {
|
||||
undefined;
|
||||
}
|
||||
|
||||
// Check that client still has access to all documents. Used to determine whether
|
||||
// a Comm client can be safely reused after a reconnect. Without this check, the client
|
||||
// would be reused even if access to a document has been lost (although an error would be
|
||||
// issued later, on first use of the document).
|
||||
private async _isAuthorized(): Promise<boolean> {
|
||||
for (const docFD of this._docFDs) {
|
||||
try {
|
||||
if (docFD !== null) { await docFD.authorizer.assertAccess('viewers'); }
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the next unused docFD number.
|
||||
private _getNextDocFD(): number {
|
||||
let fd = 0;
|
||||
@@ -478,7 +517,6 @@ export class Client {
|
||||
* Processes the closing of a websocket.
|
||||
*/
|
||||
private _onClose() {
|
||||
this._log.info(null, "onClose");
|
||||
this._websocket?.removeAllListeners();
|
||||
|
||||
// Remove all references to the websocket.
|
||||
@@ -490,7 +528,7 @@ export class Client {
|
||||
this._log.warn(null, "clearing previously scheduled destruction");
|
||||
clearTimeout(this._destroyTimer);
|
||||
}
|
||||
this._log.warn(null, "will discard client in %s sec", Deps.clientRemovalTimeoutMs / 1000);
|
||||
this._log.info(null, "websocket closed; will discard client in %s sec", Deps.clientRemovalTimeoutMs / 1000);
|
||||
this._destroyTimer = setTimeout(() => this.destroy(), Deps.clientRemovalTimeoutMs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,22 @@
|
||||
* browser window, and should persist across brief disconnects. A Client has a 'clientId'
|
||||
* property, which uniquely identifies a client within the currently running server. Method
|
||||
* registered with Comm always receive a Client object as the first argument.
|
||||
*
|
||||
* NOTES:
|
||||
*
|
||||
* The communication setup involves primarily the modules app/server/lib/{Comm,Client}.ts, and
|
||||
* app/client/components/{Comm,GristWSConnection}.ts. In particular, these implement reconnect
|
||||
* logic, which is particularly confusing as done here because it combines two layers:
|
||||
*
|
||||
* - Websocket-level reconnects, where an existing browser tab may reconnect and attempt to
|
||||
* restore state seamlessly by recovering any missed messages.
|
||||
*
|
||||
* - Application-level reconnects, where even in case of a failed websocket-level reconnect (e.g.
|
||||
* a reloaded browser tab, or existing tab that can't recover missed messages), the tab may
|
||||
* connect to existing state. This matters for undo/redo history (to allow a user to undo after
|
||||
* reloading a browser tab), but the only thing this relies on is preserving the clientId.
|
||||
*
|
||||
* In other words, there is an opportunity for untangling and simplifying.
|
||||
*/
|
||||
|
||||
import {EventEmitter} from 'events';
|
||||
@@ -185,7 +201,6 @@ export class Comm extends EventEmitter {
|
||||
* Processes a new websocket connection, and associates the websocket and a Client object.
|
||||
*/
|
||||
private async _onWebSocketConnection(websocket: WebSocket, req: http.IncomingMessage) {
|
||||
log.info("Comm: Got WebSocket connection: %s", req.url);
|
||||
if (this._options.hosts) {
|
||||
// DocWorker ID (/dw/) and version tag (/v/) may be present in this request but are not
|
||||
// needed. addOrgInfo assumes req.url starts with /o/ if present.
|
||||
@@ -197,37 +212,37 @@ export class Comm extends EventEmitter {
|
||||
// Parse the cookie in the request to get the sessionId.
|
||||
const sessionId = this.sessions.getSessionIdFromRequest(req);
|
||||
|
||||
const params = new URL(req.url!, `http://${req.headers.host}`).searchParams;
|
||||
const params = new URL(req.url!, `ws://${req.headers.host}`).searchParams;
|
||||
const existingClientId = params.get('clientId');
|
||||
const browserSettings = safeJsonParse(params.get('browserSettings') || '', {});
|
||||
const newClient = (params.get('newClient') === '1');
|
||||
const newClient = (params.get('newClient') !== '0'); // Treat omitted as new, for the sake of tests.
|
||||
const lastSeqIdStr = params.get('lastSeqId');
|
||||
const lastSeqId = lastSeqIdStr ? parseInt(lastSeqIdStr) : null;
|
||||
const counter = params.get('counter');
|
||||
const userSelector = params.get('user') || '';
|
||||
|
||||
const scopedSession = this.getOrCreateSession(sessionId!, req as RequestWithOrg, userSelector);
|
||||
const profile = await this._getSessionProfile(scopedSession, req);
|
||||
|
||||
// Associate an ID with each websocket, reusing the supplied one if it's valid.
|
||||
let client: Client|undefined = this._clients.get(existingClientId!);
|
||||
if (!client || !await client.reconnect(counter, newClient)) {
|
||||
client = new Client(this, this._methods, localeFromRequest(req), counter);
|
||||
let reuseClient = true;
|
||||
if (!client?.canAcceptConnection()) {
|
||||
reuseClient = false;
|
||||
client = new Client(this, this._methods, localeFromRequest(req));
|
||||
this._clients.set(client.clientId, client);
|
||||
}
|
||||
// Add a Session object to the client.
|
||||
log.info(`Comm ${client}: using session ${sessionId}`);
|
||||
const scopedSession = this.getOrCreateSession(sessionId!, req as RequestWithOrg, userSelector);
|
||||
client.setSession(scopedSession);
|
||||
|
||||
// Associate the client with this websocket.
|
||||
client.setConnection(websocket, browserSettings);
|
||||
log.rawInfo('Comm: Got Websocket connection', {...client.getLogMeta(), urlPath: req.url, reuseClient});
|
||||
|
||||
const profile = await this._getSessionProfile(scopedSession, req);
|
||||
client.setSession(scopedSession); // Add a Session object to the client.
|
||||
client.setOrg((req as RequestWithOrg).org || "");
|
||||
client.setProfile(profile);
|
||||
client.setConnection(websocket, counter, browserSettings);
|
||||
|
||||
client.sendConnectMessage({
|
||||
await client.sendConnectMessage(newClient, reuseClient, lastSeqId, {
|
||||
serverVersion: this._serverVersion || version.gitcommit,
|
||||
settings: this._options.settings,
|
||||
})
|
||||
.catch(err => {
|
||||
log.error(`Comm ${client}: failed to prepare or send clientConnect:`, err);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ export function fromCallback<T>(nodeFunc: NodeCallbackFunc<T>): Promise<T> {
|
||||
* @param {Number} optCount: Number of ports to check, defaults to 200.
|
||||
* @returns Promise<Number>: Promise for an available port.
|
||||
*/
|
||||
export function getAvailablePort(firstPort: number = 8000, optCount: number = 200) {
|
||||
export function getAvailablePort(firstPort: number = 8000, optCount: number = 200): Promise<number> {
|
||||
const lastPort = firstPort + optCount - 1;
|
||||
function checkNext(port: number): Promise<number> {
|
||||
if (port > lastPort) {
|
||||
|
||||
Reference in New Issue
Block a user