Improve /api/workers/:docId worker to only send public urls

This commit is contained in:
fflorent 2024-04-18 15:23:10 +02:00
parent 1530953c3e
commit 9546a1e4ba
6 changed files with 94 additions and 71 deletions

View File

@ -1156,13 +1156,22 @@ export class DocAPIImpl extends BaseAPI implements DocAPI {
}
}
interface DocWorkerInfo {
/**
* Reprensents information to build public doc worker url.
*
* Structure that may contain either **exlusively**:
* - a selfPrefix when no pool of doc worker exist.
* - a public doc worker url otherwise.
*/
export type PublicDocWorkerUrlInfo = {
selfPrefix: string;
docWorkerUrl: null;
} | {
selfPrefix: null;
docWorkerUrl: string|null;
internalDocWorkerUrl: string|null;
selfPrefix?: string;
}
function getUrlFromPrefix(homeUrl: string, prefix?: string) {
export function getUrlFromPrefix(homeUrl: string, prefix: string|null) {
if (!prefix) {
// This should never happen.
throw new Error('missing selfPrefix for docWorkerUrl');
@ -1182,30 +1191,10 @@ function getUrlFromPrefix(homeUrl: string, prefix?: string) {
* doc-worker id, and a tag for the codebase (used in consistency checks).
*
* @param {string} homeUrl
* @param {object} docWorkerInfo
* @param {string} docWorkerInfo.docWorkerUrl The public doc Worker URL
* @param {string} docWorkerInfo.internalDocWorkerUrl The internal doc Worker URL
* @param {string|undefined} docWorkerInfo.selfPrefix
* @param {string} urlType The type of doc worker url to extract from the docWorkerInfo
* @param {string} docWorkerInfo The information to build the public doc worker url
* (result of the call to /api/worker/:docId)
*/
export function getPublicDocWorkerUrl(homeUrl: string, docWorkerInfo: DocWorkerInfo) {
export function getPublicDocWorkerUrl(homeUrl: string, docWorkerInfo: PublicDocWorkerUrlInfo) {
const publicUrl = docWorkerInfo.docWorkerUrl;
return publicUrl || getUrlFromPrefix(homeUrl, docWorkerInfo.selfPrefix);
}
/**
* @see getPublicDocWorkerUrl
* Like getPublicDocWorkerUrl but returns the URL resolvable internally (behind a Reverse Proxy).
* Especially useful when and where the public url cannot be resolved.
*
* @param {string} homeUrl
* @param {object} docWorkerInfo
* @param {string} docWorkerInfo.docWorkerUrl The public doc Worker URL
* @param {string} docWorkerInfo.internalDocWorkerUrl The internal doc Worker URL
* @param {string|undefined} docWorkerInfo.selfPrefix
* @param {string} urlType The type of doc worker url to extract from the docWorkerInfo
*/
export function getInternalDocWorkerUrl(homeUrl: string, docWorkerInfo: DocWorkerInfo) {
const internalUrl = docWorkerInfo.internalDocWorkerUrl;
return internalUrl || getUrlFromPrefix(homeUrl, docWorkerInfo.selfPrefix);
return publicUrl || getUrlFromPrefix(homeUrl, docWorkerInfo?.selfPrefix);
}

View File

@ -9,17 +9,18 @@ import {ApiError} from 'app/common/ApiError';
import {getSlugIfNeeded, parseUrlId, SHARE_KEY_PREFIX} from 'app/common/gristUrls';
import {LocalPlugin} from "app/common/plugin";
import {TELEMETRY_TEMPLATE_SIGNUP_COOKIE_NAME} from 'app/common/Telemetry';
import {Document as APIDocument} from 'app/common/UserAPI';
import {Document as APIDocument, PublicDocWorkerUrlInfo} from 'app/common/UserAPI';
import {Document} from "app/gen-server/entity/Document";
import {HomeDBManager} from 'app/gen-server/lib/HomeDBManager';
import {assertAccess, getTransitiveHeaders, getUserId, isAnonymousUser,
RequestWithLogin} from 'app/server/lib/Authorizer';
import {DocStatus, IDocWorkerMap} from 'app/server/lib/DocWorkerMap';
import {customizeDocWorkerUrl, getWorker, useWorkerPool} from 'app/server/lib/DocWorkerUtils';
import {
customizeDocWorkerUrl, getDocWorkerInfoOrSelfPrefix, getWorker, useWorkerPool
} from 'app/server/lib/DocWorkerUtils';
import {expressWrap} from 'app/server/lib/expressWrap';
import {DocTemplate, GristServer} from 'app/server/lib/GristServer';
import {getCookieDomain} from 'app/server/lib/gristSessions';
import {getAssignmentId} from 'app/server/lib/idUtils';
import log from 'app/server/lib/log';
import {addOrgToPathIfNeeded, pruneAPIResult, trustOrigin} from 'app/server/lib/requestUtils';
import {ISendAppPageOptions} from 'app/server/lib/sendAppPage';
@ -48,35 +49,20 @@ export function attachAppEndpoint(options: AttachOptions): void {
app.get('/apiconsole', expressWrap(async (req, res) =>
sendAppPage(req, res, {path: 'apiconsole.html', status: 200, config: {}})));
app.get('/api/worker/:assignmentId([^/]+)/?*', expressWrap(async (req, res) => {
if (!useWorkerPool()) {
// Let the client know there is not a separate pool of workers,
// so they should continue to use the same base URL for accessing
// documents. For consistency, return a prefix to add into that
// URL, as there would be for a pool of workers. It would be nice
// to go ahead and provide the full URL, but that requires making
// more assumptions about how Grist is configured.
// Alternatives could be: have the client to send their base URL
// in the request; or use headers commonly added by reverse proxies.
const selfPrefix = "/dw/self/v/" + gristServer.getTag();
res.json({docWorkerUrl: null, internalDocWorkerUrl: null, selfPrefix});
return;
}
if (!trustOrigin(req, res)) { throw new Error('Unrecognized origin'); }
app.get('/api/worker/:docId([^/]+)/?*', expressWrap(async (req, res) => {
// FIXME: To the reviewers: I moved these two lines at the top of the express handler.
// Is it OK? Seems rather safe to me.
res.header("Access-Control-Allow-Credentials", "true");
if (!trustOrigin(req, res)) { throw new Error('Unrecognized origin'); }
if (!docWorkerMap) {
return res.status(500).json({error: 'no worker map'});
}
const assignmentId = getAssignmentId(docWorkerMap, req.params.assignmentId);
const {docStatus} = await getWorker(docWorkerMap, assignmentId, '/status');
if (!docStatus) {
return res.status(500).json({error: 'no worker'});
}
res.json({
docWorkerUrl: customizeDocWorkerUrl(docStatus.docWorker.publicUrl, req),
internalDocWorkerUrl: docStatus.docWorker.internalUrl
});
const {selfPrefix, docWorker} = await getDocWorkerInfoOrSelfPrefix(
req.params.docId, docWorkerMap, gristServer.getTag()
);
const info: PublicDocWorkerUrlInfo = selfPrefix ?
{ docWorkerUrl: null, selfPrefix } :
{ docWorkerUrl: customizeDocWorkerUrl(docWorker!.publicUrl, req), selfPrefix: null };
return res.json(info);
}));
// Handler for serving the document landing pages. Expects the following parameters:

View File

@ -1645,7 +1645,7 @@ export class DocWorkerApi {
let uploadResult;
try {
const accessId = makeAccessId(req, getAuthorizedUserId(req));
uploadResult = await fetchDoc(this._grist, sourceDocumentId, req, accessId, asTemplate);
uploadResult = await fetchDoc(this._grist, this._docWorkerMap, sourceDocumentId, req, accessId, asTemplate);
globalUploadSet.changeUploadName(uploadResult.uploadId, accessId, `${documentName}.grist`);
} catch (err) {
if ((err as ApiError).status === 403) {

View File

@ -1,11 +1,12 @@
import {ApiError} from 'app/common/ApiError';
import {parseSubdomainStrictly} from 'app/common/gristUrls';
import {removeTrailingSlash} from 'app/common/gutil';
import {DocStatus, IDocWorkerMap} from 'app/server/lib/DocWorkerMap';
import {DocStatus, DocWorkerInfo, IDocWorkerMap} from 'app/server/lib/DocWorkerMap';
import log from 'app/server/lib/log';
import {adaptServerUrl} from 'app/server/lib/requestUtils';
import * as express from 'express';
import fetch, {Response as FetchResponse, RequestInit} from 'node-fetch';
import { getAssignmentId } from './idUtils';
/**
* This method transforms a doc worker's public url as needed based on the request.
@ -152,6 +153,43 @@ export async function getWorker(
}
}
export type DocWorkerInfoOrSelfPrefix = {
docWorker: DocWorkerInfo,
selfPrefix?: never,
} | {
docWorker?: never,
selfPrefix: string
};
export async function getDocWorkerInfoOrSelfPrefix(
docId: string,
docWorkerMap?: IDocWorkerMap | null,
tag?: string
): Promise<DocWorkerInfoOrSelfPrefix> {
if (!useWorkerPool()) {
// Let the client know there is not a separate pool of workers,
// so they should continue to use the same base URL for accessing
// documents. For consistency, return a prefix to add into that
// URL, as there would be for a pool of workers. It would be nice
// to go ahead and provide the full URL, but that requires making
// more assumptions about how Grist is configured.
// Alternatives could be: have the client to send their base URL
// in the request; or use headers commonly added by reverse proxies.
const selfPrefix = "/dw/self/v/" + tag;
return { selfPrefix };
}
if (!docWorkerMap) {
throw new Error('no worker map');
}
const assignmentId = getAssignmentId(docWorkerMap, docId);
const { docStatus } = await getWorker(docWorkerMap, assignmentId, '/status');
if (!docStatus) {
throw new Error('no worker');
}
return { docWorker: docStatus.docWorker };
}
// Return true if document related endpoints are served by separate workers.
export function useWorkerPool() {
return process.env.GRIST_SINGLE_PORT !== 'true';

View File

@ -1995,7 +1995,7 @@ export class FlexServer implements GristServer {
// Add the handling for the /upload route. Most uploads are meant for a DocWorker: they are put
// in temporary files, and the DocWorker needs to be on the same machine to have access to them.
// This doesn't check for doc access permissions because the request isn't tied to a document.
addUploadRoute(this, this.app, this._trustOriginsMiddleware, ...basicMiddleware);
addUploadRoute(this, this.app, this._docWorkerMap, this._trustOriginsMiddleware, ...basicMiddleware);
this.app.get('/attachment', ...docAccessMiddleware,
expressWrap(async (req, res) => this._docWorker.getAttachment(req, res)));

View File

@ -1,7 +1,7 @@
import {ApiError} from 'app/common/ApiError';
import {InactivityTimer} from 'app/common/InactivityTimer';
import {FetchUrlOptions, FileUploadResult, UPLOAD_URL_PATH, UploadResult} from 'app/common/uploads';
import {getInternalDocWorkerUrl} from 'app/common/UserAPI';
import {getUrlFromPrefix} from 'app/common/UserAPI';
import {getAuthorizedUserId, getTransitiveHeaders, getUserId, isSingleUserMode,
RequestWithLogin} from 'app/server/lib/Authorizer';
import {expressWrap} from 'app/server/lib/expressWrap';
@ -21,6 +21,8 @@ import * as multiparty from 'multiparty';
import fetch, {Response as FetchResponse} from 'node-fetch';
import * as path from 'path';
import * as tmp from 'tmp';
import { IDocWorkerMap } from './DocWorkerMap';
import { getDocWorkerInfoOrSelfPrefix } from './DocWorkerUtils';
// After some time of inactivity, clean up the upload. We give an hour, which seems generous,
// except that if one is toying with import options, and leaves the upload in an open browser idle
@ -39,7 +41,12 @@ export interface FormResult {
/**
* Adds an upload route to the given express app, listening for POST requests at UPLOAD_URL_PATH.
*/
export function addUploadRoute(server: GristServer, expressApp: Application, ...handlers: RequestHandler[]): void {
export function addUploadRoute(
server: GristServer,
expressApp: Application,
docWorkerMap: IDocWorkerMap,
...handlers: RequestHandler[]
): void {
// When doing a cross-origin post, the browser will check for access with options prior to posting.
// We need to reassure it that the request will be accepted before it will go ahead and post.
@ -72,7 +79,7 @@ export function addUploadRoute(server: GristServer, expressApp: Application, ...
if (!docId) { throw new Error('doc must be specified'); }
const accessId = makeAccessId(req, getAuthorizedUserId(req));
try {
const uploadResult: UploadResult = await fetchDoc(server, docId, req, accessId,
const uploadResult: UploadResult = await fetchDoc(server, docWorkerMap, docId, req, accessId,
req.query.template === '1');
if (name) {
globalUploadSet.changeUploadName(uploadResult.uploadId, accessId, name);
@ -404,8 +411,14 @@ async function _fetchURL(url: string, accessId: string|null, options?: FetchUrlO
* Fetches a Grist doc potentially managed by a different doc worker. Passes on credentials
* supplied in the current request.
*/
export async function fetchDoc(server: GristServer, docId: string, req: Request, accessId: string|null,
template: boolean): Promise<UploadResult> {
export async function fetchDoc(
server: GristServer,
docWorkerMap: IDocWorkerMap,
docId: string,
req: Request,
accessId: string|null,
template: boolean
): Promise<UploadResult> {
// Prepare headers that preserve credentials of current user.
const headers = getTransitiveHeaders(req);
@ -417,11 +430,8 @@ export async function fetchDoc(server: GristServer, docId: string, req: Request,
// Find the doc worker responsible for the document we wish to copy.
// The backend needs to be well configured for this to work.
const homeUrl = server.getHomeInternalUrl();
const fetchUrl = new URL(`/api/worker/${docId}`, homeUrl);
const response: FetchResponse = await Deps.fetch(fetchUrl.href, {headers});
await _checkForError(response);
const docWorkerUrl = getInternalDocWorkerUrl(server.getOwnUrl(), await response.json());
const { selfPrefix, docWorker } = await getDocWorkerInfoOrSelfPrefix(docId, docWorkerMap, server.getTag());
const docWorkerUrl = docWorker ? docWorker.internalUrl : getUrlFromPrefix(server.getHomeInternalUrl(), selfPrefix);
// Download the document, in full or as a template.
const url = new URL(`api/docs/${docId}/download?template=${Number(template)}`,
docWorkerUrl.replace(/\/*$/, '/'));