import {ApiError} from 'app/common/ApiError'; import {InactivityTimer} from 'app/common/InactivityTimer'; import {FetchUrlOptions, FileUploadResult, UPLOAD_URL_PATH, UploadResult} from 'app/common/uploads'; import {getDocWorkerUrl} from 'app/common/UserAPI'; import {getAuthorizedUserId, getTransitiveHeaders, getUserId, isSingleUserMode, RequestWithLogin} from 'app/server/lib/Authorizer'; import {expressWrap} from 'app/server/lib/expressWrap'; import {downloadFromGDrive, isDriveUrl} from 'app/server/lib/GoogleImport'; import {GristServer, RequestWithGrist} from 'app/server/lib/GristServer'; import {guessExt} from 'app/server/lib/guessExt'; import log from 'app/server/lib/log'; import {optStringParam} from 'app/server/lib/requestUtils'; import {isPathWithin} from 'app/server/lib/serverUtils'; import * as shutdown from 'app/server/lib/shutdown'; import {fromCallback} from 'bluebird'; import * as contentDisposition from 'content-disposition'; import {Application, Request, RequestHandler, Response} from 'express'; import * as fse from 'fs-extra'; import pick = require('lodash/pick'); import * as multiparty from 'multiparty'; import fetch, {Response as FetchResponse} from 'node-fetch'; import * as path from 'path'; import * as tmp from 'tmp'; // After some time of inactivity, clean up the upload. We give an hour, which seems generous, // except that if one is toying with import options, and leaves the upload in an open browser idle // for an hour, it will get cleaned up. TODO Address that; perhaps just with some UI messages. const INACTIVITY_CLEANUP_MS = 60 * 60 * 1000; // an hour, very generously. // A hook for dependency injection. export const Deps = {fetch, INACTIVITY_CLEANUP_MS}; // An optional UploadResult, with parameters. export interface FormResult { upload?: UploadResult; parameters?: {[key: string]: string}; } /** * Adds an upload route to the given express app, listening for POST requests at UPLOAD_URL_PATH. */ export function addUploadRoute(server: GristServer, expressApp: Application, ...handlers: RequestHandler[]): void { // When doing a cross-origin post, the browser will check for access with options prior to posting. // We need to reassure it that the request will be accepted before it will go ahead and post. expressApp.options([`/${UPLOAD_URL_PATH}`, '/copy'], ...handlers, async (req, res) => { // Origin is checked by middleware - if we get this far, we are ok. res.status(200).send(); }); expressApp.post(`/${UPLOAD_URL_PATH}`, ...handlers, expressWrap(async (req: Request, res: Response) => { try { const uploadResult: UploadResult = await handleUpload(req, res); res.status(200).send(JSON.stringify(uploadResult)); } catch (err) { req.resume(); if (err.message && /Request aborted/.test(err.message)) { log.warn("File upload request aborted", err); } else { log.error("Error uploading file", err); } // Respond with a JSON error like jsonErrorHandler does for API calls, // to make it easier for the caller to parse it. res.status(err.status || 500).json({error: err.message || 'internal error'}); } })); // Like upload, but copy data from a document already known to us. expressApp.post(`/copy`, ...handlers, expressWrap(async (req: Request, res: Response) => { const docId = optStringParam(req.query.doc); const name = optStringParam(req.query.name); if (!docId) { throw new Error('doc must be specified'); } const accessId = makeAccessId(req, getAuthorizedUserId(req)); try { const uploadResult: UploadResult = await fetchDoc(server, docId, req, accessId, req.query.template === '1'); if (name) { globalUploadSet.changeUploadName(uploadResult.uploadId, accessId, name); } res.status(200).send(JSON.stringify(uploadResult)); } catch(err) { if ((err as ApiError).status === 403) { res.status(403).json({error:'Insufficient access to document to copy it entirely'}); return; } throw err; } })); } /** * Create a FileUploadInfo for the given file. */ export async function getFileUploadInfo(filePath: string): Promise<FileUploadInfo> { return { absPath: filePath, origName: path.basename(filePath), size: (await fse.stat(filePath)).size, ext: path.extname(filePath).toLowerCase(), }; } /** * Implementation of the express /upload route. */ export async function handleUpload(req: Request, res: Response): Promise<UploadResult> { const {upload} = await handleOptionalUpload(req, res); if (!upload) { throw new ApiError('missing payload', 400); } return upload; } /** * Process form data that may contain an upload, returning that upload (if present) * and any parameters. */ export async function handleOptionalUpload(req: Request, res: Response): Promise<FormResult> { const {tmpDir, cleanupCallback} = await createTmpDir({}); const mreq = req as RequestWithLogin; const meta = { org: mreq.org, email: mreq.user && mreq.user.loginEmail, userId: mreq.userId, altSessionId: mreq.altSessionId, }; log.rawDebug(`Prepared to receive upload into tmp dir ${tmpDir}`, meta); // Note that we don't limit upload sizes here, since this endpoint doesn't know what kind of // upload it is, and some uploads are unlimited (e.g. uploading .grist files). Limits are // checked in the client, and should be enforced on the server where an upload is processed. const form = new multiparty.Form({uploadDir: tmpDir}); const [formFields, formFiles] = await fromCallback((cb: any) => form.parse(req, cb), {multiArgs: true}); // 'upload' is the name of the form field containing file data. let upload: UploadResult|undefined; if (formFiles.upload) { const uploadedFiles: FileUploadInfo[] = []; for (const file of formFiles.upload) { const mimeType = file.headers['content-type']; log.rawDebug(`Received file ${file.originalFilename} (${file.size} bytes)`, meta); uploadedFiles.push({ absPath: file.path, origName: file.originalFilename, size: file.size, ext: await guessExt(file.path, file.originalFilename, mimeType), }); } const accessId = makeAccessId(req, getUserId(req)); const uploadId = globalUploadSet.registerUpload(uploadedFiles, tmpDir, cleanupCallback, accessId); const files: FileUploadResult[] = uploadedFiles.map(f => pick(f, ['origName', 'size', 'ext'])); log.rawDebug(`Created uploadId ${uploadId} in tmp dir ${tmpDir}`, meta); upload = {uploadId, files}; } const parameters: {[key: string]: string} = {}; for (const key of Object.keys(formFields)) { parameters[key] = formFields[key][0]; } return {upload, parameters}; } /** * Represents a single uploaded file on the server side. Only the FileUploadResult part is exposed * to the browser for information purposes. */ export interface FileUploadInfo extends FileUploadResult { absPath: string; // Absolute path to the file on disk. } /** * Represents a complete upload on the server side. It may be a temporary directory containing a * list of files (not subdirectories), or a collection of non-temporary files. The * cleanupCallback() is responsible for removing the temporary directory. It should be a no-op for * non-temporary files. */ export interface UploadInfo { uploadId: number; // ID of the upload files: FileUploadInfo[]; // List of all files included in the upload. tmpDir: string|null; // Temporary directory to remove, containing this upload. // If present, all files must be direct children of this directory. cleanupCallback: CleanupCB; // Callback to clean up this upload, including removing tmpDir. cleanupTimer: InactivityTimer; accessId: string|null; // Optional identifier for access control purposes. } type CleanupCB = () => void|Promise<void>; export class UploadSet { private _uploads: Map<number, UploadInfo> = new Map(); private _nextId: number = 0; /** * Register a new upload. */ public registerUpload(files: FileUploadInfo[], tmpDir: string|null, cleanupCallback: CleanupCB, accessId: string|null): number { const uploadId = this._nextId++; const cleanupTimer = new InactivityTimer(() => this.cleanup(uploadId), Deps.INACTIVITY_CLEANUP_MS); this._uploads.set(uploadId, {uploadId, files, tmpDir, cleanupCallback, cleanupTimer, accessId}); cleanupTimer.ping(); return uploadId; } /** * Returns full info for the given uploadId, if authorized. */ public getUploadInfo(uploadId: number, accessId: string|null): UploadInfo { const info = this._getUploadInfoWithoutAuthorization(uploadId); if (info.accessId !== accessId) { throw new ApiError('access denied', 403); } return info; } /** * Clean up a particular upload. */ public async cleanup(uploadId: number): Promise<void> { log.debug("UploadSet: cleaning up uploadId %s", uploadId); const info = this._getUploadInfoWithoutAuthorization(uploadId); info.cleanupTimer.disable(); this._uploads.delete(uploadId); await info.cleanupCallback(); } /** * Clean up all uploads in this UploadSet. It may be used again after this call (it's called * multiple times in tests). */ public async cleanupAll(): Promise<void> { log.info("UploadSet: cleaning up all %d uploads in set", this._uploads.size); const uploads = Array.from(this._uploads.values()); this._uploads.clear(); this._nextId = 0; for (const info of uploads) { try { info.cleanupTimer.disable(); await info.cleanupCallback(); } catch (err) { log.warn(`Error cleaning upload ${info.uploadId}: ${err}`); } } } /** * Changes the name of an uploaded file. It is an error to use if the upload set has more than one * file and it will throw. */ public changeUploadName(uploadId: number, accessId: string|null, name: string) { const info = this.getUploadInfo(uploadId, accessId); if (info.files.length > 1) { throw new Error("UploadSet.changeUploadName cannot operate on multiple files"); } info.files[0].origName = name; } /** * Returns full info for the given uploadId, without checking authorization. */ private _getUploadInfoWithoutAuthorization(uploadId: number): UploadInfo { const info = this._uploads.get(uploadId); if (!info) { throw new ApiError(`Unknown upload ${uploadId}`, 404); } // If the upload is being used, reschedule the inactivity timeout. info.cleanupTimer.ping(); return info; } } // Maintains uploads created on this host. export const globalUploadSet: UploadSet = new UploadSet(); // Registers a handler to clean up on exit. We do this intentionally: even though module `tmp` has // its own logic to clean up, that logic isn't triggered when the server is killed with a signal. shutdown.addCleanupHandler(null, () => globalUploadSet.cleanupAll()); /** * Moves this upload to a new directory. A new temporary subdirectory is created there first. If * the upload contained temporary files, those are moved; if non-temporary files, those are * copied. Aside from new file locations, the rest of the upload info stays unchanged. * * In any case, the previous cleanupCallback is run, and a new one created for the new tmpDir. * * This is used specifically for placing uploads into a location accessible by sandboxed code. */ export async function moveUpload(uploadInfo: UploadInfo, newDir: string): Promise<void> { if (uploadInfo.tmpDir && isPathWithin(newDir, uploadInfo.tmpDir)) { // Upload is already within newDir. return; } log.debug("UploadSet: moving uploadId %s to %s", uploadInfo.uploadId, newDir); const {tmpDir, cleanupCallback} = await createTmpDir({dir: newDir}); const move: boolean = Boolean(uploadInfo.tmpDir); const files: FileUploadInfo[] = []; for (const f of uploadInfo.files) { const absPath = path.join(tmpDir, path.basename(f.absPath)); await (move ? fse.move(f.absPath, absPath) : fse.copy(f.absPath, absPath)); files.push({...f, absPath}); } try { await uploadInfo.cleanupCallback(); } catch (err) { // This is unexpected, but if the move succeeded, let's warn but not fail on cleanup error. log.warn(`Error cleaning upload ${uploadInfo.uploadId} after move: ${err}`); } Object.assign(uploadInfo, {files, tmpDir, cleanupCallback}); } interface TmpDirResult { tmpDir: string; cleanupCallback: CleanupCB; } /** * Helper to create a temporary directory. It's a simple wrapper around tmp.dir, but replaces the * cleanup callback with an asynchronous version. */ export async function createTmpDir(options: tmp.Options): Promise<TmpDirResult> { const fullOptions = {prefix: 'grist-upload-', unsafeCleanup: true, ...options}; const [tmpDir, tmpCleanup]: [string, CleanupCB] = await fromCallback( (cb: any) => tmp.dir(fullOptions, cb), {multiArgs: true}); async function cleanupCallback() { // Using fs-extra is better because it's asynchronous. await fse.remove(tmpDir); try { // Still call the original callback, so that `tmp` module doesn't keep remembering about // this directory and doesn't try to delete it again on exit. await tmpCleanup(); } catch (err) { // OK if it fails because the dir is already removed. } } return {tmpDir, cleanupCallback}; } /** * Register a new upload with resource fetched from a public url. Returns corresponding UploadInfo. */ export async function fetchURL(url: string, accessId: string|null, options?: FetchUrlOptions): Promise<UploadResult> { return _fetchURL(url, accessId, { fileName: path.basename(url), ...options}); } /** * Register a new upload with resource fetched from a url, optionally including credentials in request. * Returns corresponding UploadInfo. */ async function _fetchURL(url: string, accessId: string|null, options?: FetchUrlOptions): Promise<UploadResult> { try { const code = options?.googleAuthorizationCode; let fileName = options?.fileName ?? ''; const headers = options?.headers; let response: FetchResponse; if (isDriveUrl(url)) { response = await downloadFromGDrive(url, code); fileName = ''; // Read the file name from headers. } else { response = await Deps.fetch(url, { redirect: 'follow', follow: 10, headers }); } await _checkForError(response); if (fileName === '') { const disposition = response.headers.get('content-disposition') || ''; fileName = contentDisposition.parse(disposition).parameters.filename || 'document.grist'; } const mimeType = response.headers.get('content-type'); const {tmpDir, cleanupCallback} = await createTmpDir({}); // Any name will do for the single file in tmpDir, but note that fileName may not be valid. const destPath = path.join(tmpDir, 'upload-content'); await new Promise((resolve, reject) => { const dest = fse.createWriteStream(destPath, {autoClose: true}); response.body.on('error', reject); dest.on('error', reject); dest.on('finish', resolve); response.body.pipe(dest); }); const uploadedFile: FileUploadInfo = { absPath: path.resolve(destPath), origName: fileName, size: (await fse.stat(destPath)).size, ext: await guessExt(destPath, fileName, mimeType), }; log.debug(`done fetching url: ${url} to ${destPath}`); const uploadId = globalUploadSet.registerUpload([uploadedFile], tmpDir, cleanupCallback, accessId); return {uploadId, files: [pick(uploadedFile, ['origName', 'size', 'ext'])]}; } catch(err) { if (err?.code === "EPROTO" || // https vs http error err?.code === "ECONNREFUSED" || // server does not listen err?.code === "ENOTFOUND") { // could not resolve domain throw new ApiError(`Can't connect to the server. The URL seems to be invalid. Error code ${err.code}`, 400); } throw err; } } /** * Fetches a Grist doc potentially managed by a different doc worker. Passes on credentials * supplied in the current request. */ async function fetchDoc(server: GristServer, docId: string, req: Request, accessId: string|null, template: boolean): Promise<UploadResult> { // Prepare headers that preserve credentials of current user. const headers = getTransitiveHeaders(req); // Find the doc worker responsible for the document we wish to copy. // The backend needs to be well configured for this to work. const homeUrl = server.getHomeUrl(req); const fetchUrl = new URL(`/api/worker/${docId}`, homeUrl); const response: FetchResponse = await Deps.fetch(fetchUrl.href, {headers}); await _checkForError(response); const docWorkerUrl = getDocWorkerUrl(server.getOwnUrl(), await response.json()); // Download the document, in full or as a template. const url = new URL(`api/docs/${docId}/download?template=${Number(template)}`, docWorkerUrl.replace(/\/*$/, '/')); return _fetchURL(url.href, accessId, {headers}); } // Re-issue failures as exceptions. async function _checkForError(response: FetchResponse) { if (response.status === 403) { throw new ApiError("Access to this resource was denied.", response.status); } if (response.ok) { const contentType = response.headers.get("content-type"); if (contentType?.startsWith("text/html")) { // Probably we hit some login page if (response.url.startsWith("https://accounts.google.com")) { throw new ApiError("Importing directly from a Google Drive URL is not supported yet. " + 'Use the "Import from Google Drive" menu option instead.', 403); } else { throw new ApiError("Could not import the requested file, check if you have all required permissions.", 403); } } return; } const body = await response.json().catch(() => ({})); if (response.status === 404) { throw new ApiError("File can't be found at the requested URL.", 404); } else if (response.status >= 500 && response.status < 600) { throw new ApiError(`Remote server returned an error (${body.error || response.statusText})`, response.status, body.details); } else { throw new ApiError(body.error || response.statusText, response.status, body.details); } } /** * Create an access identifier, combining the userId supplied with the host of the * doc worker. Returns null if userId is null or in standalone mode. * Adding host information makes workers sharing a process more useful models of * full-blown isolated workers. */ export function makeAccessId(worker: string|Request|GristServer, userId: number|null): string|null { if (isSingleUserMode()) { return null; } if (userId === null) { return null; } let host: string; if (typeof worker === 'string') { host = worker; } else if ('getHost' in worker) { host = worker.getHost(); } else { const gristServer = (worker as RequestWithGrist).gristServer; if (!gristServer) { throw new Error('Problem accessing server with upload'); } host = gristServer.getHost(); } return `${userId}:${host}`; }