gristlabs_grist-core/app/gen-server/lib/DocApiForwarder.ts

117 lines
5.1 KiB
TypeScript
Raw Permalink Normal View History

import * as express from "express";
import fetch, { RequestInit } from 'node-fetch';
import { ApiError } from 'app/common/ApiError';
import { removeTrailingSlash } from 'app/common/gutil';
import { HomeDBManager } from "app/gen-server/lib/HomeDBManager";
import { assertAccess, getOrSetDocAuth, getTransitiveHeaders, RequestWithLogin } from 'app/server/lib/Authorizer';
import { IDocWorkerMap } from "app/server/lib/DocWorkerMap";
import { expressWrap } from "app/server/lib/expressWrap";
import { GristServer } from "app/server/lib/GristServer";
import { getAssignmentId } from "app/server/lib/idUtils";
/**
* Forwards all /api/docs/:docId/tables requests to the doc worker handling the :docId document. Makes
* sure the user has at least view access to the document otherwise rejects the request. For
* performance reason we stream the body directly from the request, which requires that no-one reads
* the req before, in particular you should register DocApiForwarder before bodyParser.
*
* Use:
* const home = new ApiServer(false);
* const docApiForwarder = new DocApiForwarder(getDocWorkerMap(), home);
* app.use(docApiForwarder.getMiddleware());
*
* Note that it expects userId, and jsonErrorHandler middleware to be set up outside
* to apply to these routes.
*/
export class DocApiForwarder {
constructor(private _docWorkerMap: IDocWorkerMap, private _dbManager: HomeDBManager,
private _gristServer: GristServer) {
}
public addEndpoints(app: express.Application) {
// Middleware to forward a request about an existing document that user has access to.
// We do not check whether the document has been soft-deleted; that will be checked by
// the worker if needed.
(core) support GRIST_WORKER_GROUP to place worker into an exclusive group Summary: In an emergency, we may want to serve certain documents with "old" workers as we fix problems. This diff adds some support for that. * Creates duplicate task definitions and services for staging and production doc workers (called grist-docs-staging2 and grist-docs-prod2), pulling from distinct docker tags (staging2 and prod2). The services are set to have zero workers until we need them. * These new workers are started with a new env variable `GRIST_WORKER_GROUP` set to `secondary`. * The `GRIST_WORKER_GROUP` variable, if set, makes the worker available to documents in the named group, and only that group. * An unauthenticated `/assign` endpoint is added to documents which, when POSTed to, checks that the doc is served by a worker in the desired group for that doc (as set manually in redis), and if not frees the doc up for reassignment. This makes it possible to move individual docs between workers without redeployments. The bash scripts added are a record of how the task definitions + services were created. The services could just have been copied manually, but the task definitions will need to be updated whenever the definitions for the main doc workers are updated, so it is worth scripting that. For example, if a certain document were to fail on a new deployment of Grist, but rolling back the full deployment wasn't practical: * Set prod2 tag in docker to desired codebase for that document * Set desired_count for grist-docs-prod2 service to non-zero * Set doc-<docid>-group for that doc in redis to secondary * Hit /api/docs/<docid>/assign to move the doc to grist-docs-prod2 (If the document needs to be reverted to a previous snapshot, that currently would need doing manually - could be made simpler, but not in scope of this diff). Test Plan: added tests Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D2649
2020-11-02 19:24:46 +00:00
const withDoc = expressWrap(this._forwardToDocWorker.bind(this, true, 'viewers'));
// Middleware to forward a request without a pre-existing document (for imports/uploads).
(core) support GRIST_WORKER_GROUP to place worker into an exclusive group Summary: In an emergency, we may want to serve certain documents with "old" workers as we fix problems. This diff adds some support for that. * Creates duplicate task definitions and services for staging and production doc workers (called grist-docs-staging2 and grist-docs-prod2), pulling from distinct docker tags (staging2 and prod2). The services are set to have zero workers until we need them. * These new workers are started with a new env variable `GRIST_WORKER_GROUP` set to `secondary`. * The `GRIST_WORKER_GROUP` variable, if set, makes the worker available to documents in the named group, and only that group. * An unauthenticated `/assign` endpoint is added to documents which, when POSTed to, checks that the doc is served by a worker in the desired group for that doc (as set manually in redis), and if not frees the doc up for reassignment. This makes it possible to move individual docs between workers without redeployments. The bash scripts added are a record of how the task definitions + services were created. The services could just have been copied manually, but the task definitions will need to be updated whenever the definitions for the main doc workers are updated, so it is worth scripting that. For example, if a certain document were to fail on a new deployment of Grist, but rolling back the full deployment wasn't practical: * Set prod2 tag in docker to desired codebase for that document * Set desired_count for grist-docs-prod2 service to non-zero * Set doc-<docid>-group for that doc in redis to secondary * Hit /api/docs/<docid>/assign to move the doc to grist-docs-prod2 (If the document needs to be reverted to a previous snapshot, that currently would need doing manually - could be made simpler, but not in scope of this diff). Test Plan: added tests Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D2649
2020-11-02 19:24:46 +00:00
const withoutDoc = expressWrap(this._forwardToDocWorker.bind(this, false, null));
const withDocWithoutAuth = expressWrap(this._forwardToDocWorker.bind(this, true, null));
app.use('/api/docs/:docId/tables', withDoc);
app.use('/api/docs/:docId/force-reload', withDoc);
app.use('/api/docs/:docId/recover', withDoc);
app.use('/api/docs/:docId/remove', withDoc);
app.delete('/api/docs/:docId', withDoc);
app.use('/api/docs/:docId/download', withDoc);
app.use('/api/docs/:docId/send-to-drive', withDoc);
app.use('/api/docs/:docId/fork', withDoc);
app.use('/api/docs/:docId/create-fork', withDoc);
app.use('/api/docs/:docId/apply', withDoc);
app.use('/api/docs/:docId/attachments', withDoc);
app.use('/api/docs/:docId/snapshots', withDoc);
app.use('/api/docs/:docId/replace', withDoc);
app.use('/api/docs/:docId/flush', withDoc);
app.use('/api/docs/:docId/states', withDoc);
app.use('/api/docs/:docId/compare', withDoc);
(core) support GRIST_WORKER_GROUP to place worker into an exclusive group Summary: In an emergency, we may want to serve certain documents with "old" workers as we fix problems. This diff adds some support for that. * Creates duplicate task definitions and services for staging and production doc workers (called grist-docs-staging2 and grist-docs-prod2), pulling from distinct docker tags (staging2 and prod2). The services are set to have zero workers until we need them. * These new workers are started with a new env variable `GRIST_WORKER_GROUP` set to `secondary`. * The `GRIST_WORKER_GROUP` variable, if set, makes the worker available to documents in the named group, and only that group. * An unauthenticated `/assign` endpoint is added to documents which, when POSTed to, checks that the doc is served by a worker in the desired group for that doc (as set manually in redis), and if not frees the doc up for reassignment. This makes it possible to move individual docs between workers without redeployments. The bash scripts added are a record of how the task definitions + services were created. The services could just have been copied manually, but the task definitions will need to be updated whenever the definitions for the main doc workers are updated, so it is worth scripting that. For example, if a certain document were to fail on a new deployment of Grist, but rolling back the full deployment wasn't practical: * Set prod2 tag in docker to desired codebase for that document * Set desired_count for grist-docs-prod2 service to non-zero * Set doc-<docid>-group for that doc in redis to secondary * Hit /api/docs/<docid>/assign to move the doc to grist-docs-prod2 (If the document needs to be reverted to a previous snapshot, that currently would need doing manually - could be made simpler, but not in scope of this diff). Test Plan: added tests Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D2649
2020-11-02 19:24:46 +00:00
app.use('/api/docs/:docId/assign', withDocWithoutAuth);
app.use('^/api/docs$', withoutDoc);
}
private async _forwardToDocWorker(
withDocId: boolean, role: 'viewers'|null, req: express.Request, res: express.Response,
): Promise<void> {
let docId: string|null = null;
if (withDocId) {
const docAuth = await getOrSetDocAuth(req as RequestWithLogin, this._dbManager,
this._gristServer, req.params.docId);
(core) support GRIST_WORKER_GROUP to place worker into an exclusive group Summary: In an emergency, we may want to serve certain documents with "old" workers as we fix problems. This diff adds some support for that. * Creates duplicate task definitions and services for staging and production doc workers (called grist-docs-staging2 and grist-docs-prod2), pulling from distinct docker tags (staging2 and prod2). The services are set to have zero workers until we need them. * These new workers are started with a new env variable `GRIST_WORKER_GROUP` set to `secondary`. * The `GRIST_WORKER_GROUP` variable, if set, makes the worker available to documents in the named group, and only that group. * An unauthenticated `/assign` endpoint is added to documents which, when POSTed to, checks that the doc is served by a worker in the desired group for that doc (as set manually in redis), and if not frees the doc up for reassignment. This makes it possible to move individual docs between workers without redeployments. The bash scripts added are a record of how the task definitions + services were created. The services could just have been copied manually, but the task definitions will need to be updated whenever the definitions for the main doc workers are updated, so it is worth scripting that. For example, if a certain document were to fail on a new deployment of Grist, but rolling back the full deployment wasn't practical: * Set prod2 tag in docker to desired codebase for that document * Set desired_count for grist-docs-prod2 service to non-zero * Set doc-<docid>-group for that doc in redis to secondary * Hit /api/docs/<docid>/assign to move the doc to grist-docs-prod2 (If the document needs to be reverted to a previous snapshot, that currently would need doing manually - could be made simpler, but not in scope of this diff). Test Plan: added tests Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D2649
2020-11-02 19:24:46 +00:00
if (role) {
assertAccess(role, docAuth, {allowRemoved: true});
}
docId = docAuth.docId;
}
// Use the docId for worker assignment, rather than req.params.docId, which could be a urlId.
const assignmentId = getAssignmentId(this._docWorkerMap, docId === null ? 'import' : docId);
if (!this._docWorkerMap) {
throw new ApiError('no worker map', 404);
}
const docStatus = await this._docWorkerMap.assignDocWorker(assignmentId);
// Construct new url by keeping only origin and path prefixes of `docWorker.internalUrl`,
// and otherwise reflecting fully the original url (remaining path, and query params).
const docWorkerUrl = new URL(docStatus.docWorker.internalUrl);
const url = new URL(req.originalUrl, docWorkerUrl.origin);
url.pathname = removeTrailingSlash(docWorkerUrl.pathname) + url.pathname;
const headers: {[key: string]: string} = {
...getTransitiveHeaders(req),
'Content-Type': req.get('Content-Type') || 'application/json',
};
for (const key of ['X-Sort', 'X-Limit']) {
const hdr = req.get(key);
if (hdr) { headers[key] = hdr; }
}
const options: RequestInit = {
method: req.method,
headers,
};
if (['POST', 'PATCH', 'PUT'].includes(req.method)) {
// uses `req` as a stream
options.body = req;
}
const docWorkerRes = await fetch(url.href, options);
res.status(docWorkerRes.status);
for (const key of ['content-type', 'content-disposition', 'cache-control']) {
const value = docWorkerRes.headers.get(key);
if (value) { res.set(key, value); }
}
return new Promise<void>((resolve, reject) => {
docWorkerRes.body.on('error', reject);
res.on('error', reject);
res.on('finish', resolve);
docWorkerRes.body.pipe(res);
});
}
}