From 9546a1e4bacc72d0e11b43d7a950cc0160971e38 Mon Sep 17 00:00:00 2001 From: fflorent Date: Thu, 18 Apr 2024 15:23:10 +0200 Subject: [PATCH] Improve /api/workers/:docId worker to only send public urls --- app/common/UserAPI.ts | 45 ++++++++++++------------------- app/server/lib/AppEndpoint.ts | 46 +++++++++++--------------------- app/server/lib/DocApi.ts | 2 +- app/server/lib/DocWorkerUtils.ts | 40 ++++++++++++++++++++++++++- app/server/lib/FlexServer.ts | 2 +- app/server/lib/uploads.ts | 30 ++++++++++++++------- 6 files changed, 94 insertions(+), 71 deletions(-) diff --git a/app/common/UserAPI.ts b/app/common/UserAPI.ts index 9160da88..e9f3fa23 100644 --- a/app/common/UserAPI.ts +++ b/app/common/UserAPI.ts @@ -1156,13 +1156,22 @@ export class DocAPIImpl extends BaseAPI implements DocAPI { } } -interface DocWorkerInfo { +/** + * Reprensents information to build public doc worker url. + * + * Structure that may contain either **exlusively**: + * - a selfPrefix when no pool of doc worker exist. + * - a public doc worker url otherwise. + */ +export type PublicDocWorkerUrlInfo = { + selfPrefix: string; + docWorkerUrl: null; +} | { + selfPrefix: null; docWorkerUrl: string|null; - internalDocWorkerUrl: string|null; - selfPrefix?: string; } -function getUrlFromPrefix(homeUrl: string, prefix?: string) { +export function getUrlFromPrefix(homeUrl: string, prefix: string|null) { if (!prefix) { // This should never happen. throw new Error('missing selfPrefix for docWorkerUrl'); @@ -1182,30 +1191,10 @@ function getUrlFromPrefix(homeUrl: string, prefix?: string) { * doc-worker id, and a tag for the codebase (used in consistency checks). * * @param {string} homeUrl - * @param {object} docWorkerInfo - * @param {string} docWorkerInfo.docWorkerUrl The public doc Worker URL - * @param {string} docWorkerInfo.internalDocWorkerUrl The internal doc Worker URL - * @param {string|undefined} docWorkerInfo.selfPrefix - * @param {string} urlType The type of doc worker url to extract from the docWorkerInfo + * @param {string} docWorkerInfo The information to build the public doc worker url + * (result of the call to /api/worker/:docId) */ -export function getPublicDocWorkerUrl(homeUrl: string, docWorkerInfo: DocWorkerInfo) { +export function getPublicDocWorkerUrl(homeUrl: string, docWorkerInfo: PublicDocWorkerUrlInfo) { const publicUrl = docWorkerInfo.docWorkerUrl; - return publicUrl || getUrlFromPrefix(homeUrl, docWorkerInfo.selfPrefix); -} - -/** - * @see getPublicDocWorkerUrl - * Like getPublicDocWorkerUrl but returns the URL resolvable internally (behind a Reverse Proxy). - * Especially useful when and where the public url cannot be resolved. - * - * @param {string} homeUrl - * @param {object} docWorkerInfo - * @param {string} docWorkerInfo.docWorkerUrl The public doc Worker URL - * @param {string} docWorkerInfo.internalDocWorkerUrl The internal doc Worker URL - * @param {string|undefined} docWorkerInfo.selfPrefix - * @param {string} urlType The type of doc worker url to extract from the docWorkerInfo - */ -export function getInternalDocWorkerUrl(homeUrl: string, docWorkerInfo: DocWorkerInfo) { - const internalUrl = docWorkerInfo.internalDocWorkerUrl; - return internalUrl || getUrlFromPrefix(homeUrl, docWorkerInfo.selfPrefix); + return publicUrl || getUrlFromPrefix(homeUrl, docWorkerInfo?.selfPrefix); } diff --git a/app/server/lib/AppEndpoint.ts b/app/server/lib/AppEndpoint.ts index e3e912a1..33f38a4e 100644 --- a/app/server/lib/AppEndpoint.ts +++ b/app/server/lib/AppEndpoint.ts @@ -9,17 +9,18 @@ import {ApiError} from 'app/common/ApiError'; import {getSlugIfNeeded, parseUrlId, SHARE_KEY_PREFIX} from 'app/common/gristUrls'; import {LocalPlugin} from "app/common/plugin"; import {TELEMETRY_TEMPLATE_SIGNUP_COOKIE_NAME} from 'app/common/Telemetry'; -import {Document as APIDocument} from 'app/common/UserAPI'; +import {Document as APIDocument, PublicDocWorkerUrlInfo} from 'app/common/UserAPI'; import {Document} from "app/gen-server/entity/Document"; import {HomeDBManager} from 'app/gen-server/lib/HomeDBManager'; import {assertAccess, getTransitiveHeaders, getUserId, isAnonymousUser, RequestWithLogin} from 'app/server/lib/Authorizer'; import {DocStatus, IDocWorkerMap} from 'app/server/lib/DocWorkerMap'; -import {customizeDocWorkerUrl, getWorker, useWorkerPool} from 'app/server/lib/DocWorkerUtils'; +import { + customizeDocWorkerUrl, getDocWorkerInfoOrSelfPrefix, getWorker, useWorkerPool +} from 'app/server/lib/DocWorkerUtils'; import {expressWrap} from 'app/server/lib/expressWrap'; import {DocTemplate, GristServer} from 'app/server/lib/GristServer'; import {getCookieDomain} from 'app/server/lib/gristSessions'; -import {getAssignmentId} from 'app/server/lib/idUtils'; import log from 'app/server/lib/log'; import {addOrgToPathIfNeeded, pruneAPIResult, trustOrigin} from 'app/server/lib/requestUtils'; import {ISendAppPageOptions} from 'app/server/lib/sendAppPage'; @@ -48,35 +49,20 @@ export function attachAppEndpoint(options: AttachOptions): void { app.get('/apiconsole', expressWrap(async (req, res) => sendAppPage(req, res, {path: 'apiconsole.html', status: 200, config: {}}))); - app.get('/api/worker/:assignmentId([^/]+)/?*', expressWrap(async (req, res) => { - if (!useWorkerPool()) { - // Let the client know there is not a separate pool of workers, - // so they should continue to use the same base URL for accessing - // documents. For consistency, return a prefix to add into that - // URL, as there would be for a pool of workers. It would be nice - // to go ahead and provide the full URL, but that requires making - // more assumptions about how Grist is configured. - // Alternatives could be: have the client to send their base URL - // in the request; or use headers commonly added by reverse proxies. - const selfPrefix = "/dw/self/v/" + gristServer.getTag(); - res.json({docWorkerUrl: null, internalDocWorkerUrl: null, selfPrefix}); - return; - } - if (!trustOrigin(req, res)) { throw new Error('Unrecognized origin'); } + app.get('/api/worker/:docId([^/]+)/?*', expressWrap(async (req, res) => { + // FIXME: To the reviewers: I moved these two lines at the top of the express handler. + // Is it OK? Seems rather safe to me. res.header("Access-Control-Allow-Credentials", "true"); + if (!trustOrigin(req, res)) { throw new Error('Unrecognized origin'); } - if (!docWorkerMap) { - return res.status(500).json({error: 'no worker map'}); - } - const assignmentId = getAssignmentId(docWorkerMap, req.params.assignmentId); - const {docStatus} = await getWorker(docWorkerMap, assignmentId, '/status'); - if (!docStatus) { - return res.status(500).json({error: 'no worker'}); - } - res.json({ - docWorkerUrl: customizeDocWorkerUrl(docStatus.docWorker.publicUrl, req), - internalDocWorkerUrl: docStatus.docWorker.internalUrl - }); + const {selfPrefix, docWorker} = await getDocWorkerInfoOrSelfPrefix( + req.params.docId, docWorkerMap, gristServer.getTag() + ); + const info: PublicDocWorkerUrlInfo = selfPrefix ? + { docWorkerUrl: null, selfPrefix } : + { docWorkerUrl: customizeDocWorkerUrl(docWorker!.publicUrl, req), selfPrefix: null }; + + return res.json(info); })); // Handler for serving the document landing pages. Expects the following parameters: diff --git a/app/server/lib/DocApi.ts b/app/server/lib/DocApi.ts index d0946696..8d33aada 100644 --- a/app/server/lib/DocApi.ts +++ b/app/server/lib/DocApi.ts @@ -1645,7 +1645,7 @@ export class DocWorkerApi { let uploadResult; try { const accessId = makeAccessId(req, getAuthorizedUserId(req)); - uploadResult = await fetchDoc(this._grist, sourceDocumentId, req, accessId, asTemplate); + uploadResult = await fetchDoc(this._grist, this._docWorkerMap, sourceDocumentId, req, accessId, asTemplate); globalUploadSet.changeUploadName(uploadResult.uploadId, accessId, `${documentName}.grist`); } catch (err) { if ((err as ApiError).status === 403) { diff --git a/app/server/lib/DocWorkerUtils.ts b/app/server/lib/DocWorkerUtils.ts index 1396b876..25be2234 100644 --- a/app/server/lib/DocWorkerUtils.ts +++ b/app/server/lib/DocWorkerUtils.ts @@ -1,11 +1,12 @@ import {ApiError} from 'app/common/ApiError'; import {parseSubdomainStrictly} from 'app/common/gristUrls'; import {removeTrailingSlash} from 'app/common/gutil'; -import {DocStatus, IDocWorkerMap} from 'app/server/lib/DocWorkerMap'; +import {DocStatus, DocWorkerInfo, IDocWorkerMap} from 'app/server/lib/DocWorkerMap'; import log from 'app/server/lib/log'; import {adaptServerUrl} from 'app/server/lib/requestUtils'; import * as express from 'express'; import fetch, {Response as FetchResponse, RequestInit} from 'node-fetch'; +import { getAssignmentId } from './idUtils'; /** * This method transforms a doc worker's public url as needed based on the request. @@ -152,6 +153,43 @@ export async function getWorker( } } +export type DocWorkerInfoOrSelfPrefix = { + docWorker: DocWorkerInfo, + selfPrefix?: never, +} | { + docWorker?: never, + selfPrefix: string +}; + +export async function getDocWorkerInfoOrSelfPrefix( + docId: string, + docWorkerMap?: IDocWorkerMap | null, + tag?: string +): Promise { + if (!useWorkerPool()) { + // Let the client know there is not a separate pool of workers, + // so they should continue to use the same base URL for accessing + // documents. For consistency, return a prefix to add into that + // URL, as there would be for a pool of workers. It would be nice + // to go ahead and provide the full URL, but that requires making + // more assumptions about how Grist is configured. + // Alternatives could be: have the client to send their base URL + // in the request; or use headers commonly added by reverse proxies. + const selfPrefix = "/dw/self/v/" + tag; + return { selfPrefix }; + } + + if (!docWorkerMap) { + throw new Error('no worker map'); + } + const assignmentId = getAssignmentId(docWorkerMap, docId); + const { docStatus } = await getWorker(docWorkerMap, assignmentId, '/status'); + if (!docStatus) { + throw new Error('no worker'); + } + return { docWorker: docStatus.docWorker }; +} + // Return true if document related endpoints are served by separate workers. export function useWorkerPool() { return process.env.GRIST_SINGLE_PORT !== 'true'; diff --git a/app/server/lib/FlexServer.ts b/app/server/lib/FlexServer.ts index aa70817c..6a7af17b 100644 --- a/app/server/lib/FlexServer.ts +++ b/app/server/lib/FlexServer.ts @@ -1995,7 +1995,7 @@ export class FlexServer implements GristServer { // Add the handling for the /upload route. Most uploads are meant for a DocWorker: they are put // in temporary files, and the DocWorker needs to be on the same machine to have access to them. // This doesn't check for doc access permissions because the request isn't tied to a document. - addUploadRoute(this, this.app, this._trustOriginsMiddleware, ...basicMiddleware); + addUploadRoute(this, this.app, this._docWorkerMap, this._trustOriginsMiddleware, ...basicMiddleware); this.app.get('/attachment', ...docAccessMiddleware, expressWrap(async (req, res) => this._docWorker.getAttachment(req, res))); diff --git a/app/server/lib/uploads.ts b/app/server/lib/uploads.ts index d0990c72..c2a2eaa1 100644 --- a/app/server/lib/uploads.ts +++ b/app/server/lib/uploads.ts @@ -1,7 +1,7 @@ import {ApiError} from 'app/common/ApiError'; import {InactivityTimer} from 'app/common/InactivityTimer'; import {FetchUrlOptions, FileUploadResult, UPLOAD_URL_PATH, UploadResult} from 'app/common/uploads'; -import {getInternalDocWorkerUrl} from 'app/common/UserAPI'; +import {getUrlFromPrefix} from 'app/common/UserAPI'; import {getAuthorizedUserId, getTransitiveHeaders, getUserId, isSingleUserMode, RequestWithLogin} from 'app/server/lib/Authorizer'; import {expressWrap} from 'app/server/lib/expressWrap'; @@ -21,6 +21,8 @@ import * as multiparty from 'multiparty'; import fetch, {Response as FetchResponse} from 'node-fetch'; import * as path from 'path'; import * as tmp from 'tmp'; +import { IDocWorkerMap } from './DocWorkerMap'; +import { getDocWorkerInfoOrSelfPrefix } from './DocWorkerUtils'; // After some time of inactivity, clean up the upload. We give an hour, which seems generous, // except that if one is toying with import options, and leaves the upload in an open browser idle @@ -39,7 +41,12 @@ export interface FormResult { /** * Adds an upload route to the given express app, listening for POST requests at UPLOAD_URL_PATH. */ -export function addUploadRoute(server: GristServer, expressApp: Application, ...handlers: RequestHandler[]): void { +export function addUploadRoute( + server: GristServer, + expressApp: Application, + docWorkerMap: IDocWorkerMap, + ...handlers: RequestHandler[] +): void { // When doing a cross-origin post, the browser will check for access with options prior to posting. // We need to reassure it that the request will be accepted before it will go ahead and post. @@ -72,7 +79,7 @@ export function addUploadRoute(server: GristServer, expressApp: Application, ... if (!docId) { throw new Error('doc must be specified'); } const accessId = makeAccessId(req, getAuthorizedUserId(req)); try { - const uploadResult: UploadResult = await fetchDoc(server, docId, req, accessId, + const uploadResult: UploadResult = await fetchDoc(server, docWorkerMap, docId, req, accessId, req.query.template === '1'); if (name) { globalUploadSet.changeUploadName(uploadResult.uploadId, accessId, name); @@ -404,8 +411,14 @@ async function _fetchURL(url: string, accessId: string|null, options?: FetchUrlO * Fetches a Grist doc potentially managed by a different doc worker. Passes on credentials * supplied in the current request. */ -export async function fetchDoc(server: GristServer, docId: string, req: Request, accessId: string|null, - template: boolean): Promise { +export async function fetchDoc( + server: GristServer, + docWorkerMap: IDocWorkerMap, + docId: string, + req: Request, + accessId: string|null, + template: boolean +): Promise { // Prepare headers that preserve credentials of current user. const headers = getTransitiveHeaders(req); @@ -417,11 +430,8 @@ export async function fetchDoc(server: GristServer, docId: string, req: Request, // Find the doc worker responsible for the document we wish to copy. // The backend needs to be well configured for this to work. - const homeUrl = server.getHomeInternalUrl(); - const fetchUrl = new URL(`/api/worker/${docId}`, homeUrl); - const response: FetchResponse = await Deps.fetch(fetchUrl.href, {headers}); - await _checkForError(response); - const docWorkerUrl = getInternalDocWorkerUrl(server.getOwnUrl(), await response.json()); + const { selfPrefix, docWorker } = await getDocWorkerInfoOrSelfPrefix(docId, docWorkerMap, server.getTag()); + const docWorkerUrl = docWorker ? docWorker.internalUrl : getUrlFromPrefix(server.getHomeInternalUrl(), selfPrefix); // Download the document, in full or as a template. const url = new URL(`api/docs/${docId}/download?template=${Number(template)}`, docWorkerUrl.replace(/\/*$/, '/'));