/* Helper file to separate ActiveDoc import functions and convert them to TypeScript. */ import * as path from 'path'; import * as _ from 'underscore'; import {DataSourceTransformed, ImportResult, ImportTableResult, TransformRuleMap} from 'app/common/ActiveDocAPI'; import {ApplyUAResult} from 'app/common/ActiveDocAPI'; import {ApiError} from 'app/common/ApiError'; import * as gutil from 'app/common/gutil'; import {ParseFileResult, ParseOptions} from 'app/plugin/FileParserAPI'; import {GristTable} from 'app/plugin/GristTable'; import {ActiveDoc} from 'app/server/lib/ActiveDoc'; import {DocSession, OptDocSession} from 'app/server/lib/DocSession'; import * as log from 'app/server/lib/log'; import {globalUploadSet, moveUpload, UploadInfo} from 'app/server/lib/uploads'; /* * AddTableRetValue contains return value of user actions 'AddTable' */ interface AddTableRetValue { table_id: string; id: number; columns: string[]; views: object[]; } interface ReferenceDescription { // the table index tableIndex: number; // the column index colIndex: number; // the id of the table which is referenced refTableId: string; } export class ActiveDocImport { constructor(private _activeDoc: ActiveDoc) {} /** * Imports files, removes previously created temporary hidden tables and creates the new ones */ public async importFiles(docSession: DocSession, dataSource: DataSourceTransformed, parseOptions: ParseOptions, prevTableIds: string[]): Promise { this._activeDoc.startBundleUserActions(docSession); await this._removeHiddenTables(docSession, prevTableIds); const userId = docSession.authorizer.getUserId(); const accessId = this._activeDoc.makeAccessId(userId); const uploadInfo: UploadInfo = globalUploadSet.getUploadInfo(dataSource.uploadId, accessId); return this._importFiles(docSession, uploadInfo, dataSource.transforms, parseOptions, true); } /** * Finishes import files, removes temporary hidden tables, temporary uploaded files and creates * the new tables */ public async finishImportFiles(docSession: DocSession, dataSource: DataSourceTransformed, parseOptions: ParseOptions, prevTableIds: string[]): Promise { this._activeDoc.startBundleUserActions(docSession); try { await this._removeHiddenTables(docSession, prevTableIds); const userId = docSession.authorizer.getUserId(); const accessId = this._activeDoc.makeAccessId(userId); const uploadInfo: UploadInfo = globalUploadSet.getUploadInfo(dataSource.uploadId, accessId); const importResult = await this._importFiles(docSession, uploadInfo, dataSource.transforms, parseOptions, false); await globalUploadSet.cleanup(dataSource.uploadId); return importResult; } finally { this._activeDoc.stopBundleUserActions(docSession); } } /** * Cancels import files, removes temporary hidden tables and temporary uploaded files * * @param {ActiveDoc} activeDoc: Instance of ActiveDoc. * @param {DataSourceTransformed} dataSource: an array of DataSource * @param {Array} prevTableIds: Array of tableIds as received from previous `importFiles` * call when re-importing with changed `parseOptions`. * @returns {Promise} Promise that's resolved when all actions are applied successfully. */ public async cancelImportFiles(docSession: DocSession, dataSource: DataSourceTransformed, prevTableIds: string[]): Promise { await this._removeHiddenTables(docSession, prevTableIds); this._activeDoc.stopBundleUserActions(docSession); await globalUploadSet.cleanup(dataSource.uploadId); } /** * Import the given upload as new tables in one step. This does not give the user a chance to * modify parse options or transforms. The caller is responsible for cleaning up the upload. */ public async oneStepImport(docSession: OptDocSession, uploadInfo: UploadInfo): Promise { this._activeDoc.startBundleUserActions(docSession); try { return this._importFiles(docSession, uploadInfo, [], {}, false); } finally { this._activeDoc.stopBundleUserActions(docSession); } } /** * Imports all files as new tables, using the given transform rules and parse options. * The isHidden flag indicates whether to create temporary hidden tables, or final ones. */ private async _importFiles(docSession: OptDocSession, upload: UploadInfo, transforms: TransformRuleMap[], parseOptions: ParseOptions, isHidden: boolean): Promise { // Check that upload size is within the configured limits. const limit = (Number(process.env.GRIST_MAX_UPLOAD_IMPORT_MB) * 1024 * 1024) || Infinity; const totalSize = upload.files.reduce((acc, f) => acc + f.size, 0); if (totalSize > limit) { throw new ApiError(`Imported files must not exceed ${gutil.byteString(limit)}`, 413); } // The upload must be within the plugin-accessible directory. Once moved, subsequent calls to // moveUpload() will return without having to do anything. await moveUpload(upload, this._activeDoc.docPluginManager.tmpDir()); const importResult: ImportResult = {options: parseOptions, tables: []}; for (const [index, file] of upload.files.entries()) { // If we have a better guess for the file's extension, replace it in origName, to ensure // that DocPluginManager has access to it to guess the best parser type. let origName: string = file.origName; if (file.ext) { origName = path.basename(origName, path.extname(origName)) + file.ext; } const res = await this._importFileAsNewTable(docSession, index, file.absPath, origName, parseOptions, isHidden, transforms[index] || {}); if (index === 0) { // Returned parse options from the first file should be used for all files in one upload. importResult.options = parseOptions = res.options; } importResult.tables.push(...res.tables); } return importResult; } /** * Imports the data stored at tmpPath. * * Currently it starts a python parser (that relies on the messytables library) as a child process * outside the sandbox, and supports xls(x), csv, txt, and perhaps some other formats. It may * result in the import of multiple tables, in case of e.g. Excel formats. * @param {ActiveDoc} activeDoc: Instance of ActiveDoc. * @param {Number} dataSourceIdx: Index of original dataSourse corresponding to current imported file. * @param {String} tmpPath: The path from of the original file. * @param {String} originalFilename: Suggested name of the import file. It is sometimes used as a * suggested table name, e.g. for csv imports. * @param {String} options: Containing parseOptions as serialized JSON to pass to the import plugin. * @param {Boolean} isHidden: Flag to indicate whether table is temporary and hidden or regular. * @param {TransformRuleMap} transformRuleMap: Containing transform rules for each table in file such as * `destTableId`, `destCols`, `sourceCols`. * @returns {Promise} with `options` property containing parseOptions as serialized JSON as adjusted * or guessed by the plugin, and `tables`, which is which is a list of objects with information about * tables, such as `hiddenTableId`, `dataSourceIndex`, `origTableName`, `transformSectionRef`, `destTableId`. */ private async _importFileAsNewTable(docSession: OptDocSession, uploadFileIndex: number, tmpPath: string, originalFilename: string, options: ParseOptions, isHidden: boolean, transformRuleMap: TransformRuleMap|undefined): Promise { log.info("ActiveDoc._importFileAsNewTable(%s, %s)", tmpPath, originalFilename); const optionsAndData: ParseFileResult = await this._activeDoc.docPluginManager.parseFile(tmpPath, originalFilename, options); options = optionsAndData.parseOptions; const parsedTables = optionsAndData.tables; const references = this._encodeReferenceAsInt(parsedTables); const tables: ImportTableResult[] = []; const fixedColumnIdsByTable: { [tableId: string]: string[]; } = {}; for (const table of parsedTables) { const ext = path.extname(originalFilename); const basename = path.basename(originalFilename, ext).trim(); const hiddenTableName = 'GristHidden_import'; const origTableName = table.table_name ? table.table_name : ''; const transformRule = transformRuleMap && transformRuleMap.hasOwnProperty(origTableName) ? transformRuleMap[origTableName] : null; const result: ApplyUAResult = await this._activeDoc.applyUserActions(docSession, [["AddTable", hiddenTableName, table.column_metadata]]); const retValue: AddTableRetValue = result.retValues[0]; const hiddenTableId = retValue.table_id; // The sanitized version of the table name. const hiddenTableColIds = retValue.columns; // The sanitized names of the columns. // The table_data received from importFile is an array of columns of data, rather than a // dictionary, so that it doesn't depend on column names. We instead construct the // dictionary once we receive the sanitized column names from AddTable. const dataLength = table.table_data[0] ? table.table_data[0].length : 0; log.info("Importing table %s, %s rows, from %s", hiddenTableId, dataLength, table.table_name); const rowIdColumn = _.range(1, dataLength + 1); const columnValues = _.object(hiddenTableColIds, table.table_data); const destTableId = transformRule ? transformRule.destTableId : null; const ruleCanBeApplied = (transformRule != null) && _.difference(transformRule.sourceCols, hiddenTableColIds).length === 0; await this._activeDoc.applyUserActions(docSession, [["ReplaceTableData", hiddenTableId, rowIdColumn, columnValues]]); // data parsed and put into hiddenTableId // For preview_table (isHidden) do GenImporterView to make views and formulas and cols // For final import, call TransformAndFinishImport, which imports file using a transform rule (or blank) let createdTableId: string; let transformSectionRef: number = -1; // TODO: we only have this if we genImporterView, is it necessary? if (isHidden) { // Generate formula columns, view sections, etc const results: ApplyUAResult = await this._activeDoc.applyUserActions(docSession, [['GenImporterView', hiddenTableId, destTableId, ruleCanBeApplied ? transformRule : null]]); transformSectionRef = results.retValues[0]; createdTableId = hiddenTableId; } else { // Do final import const intoNewTable: boolean = destTableId ? false : true; const destTable = destTableId || table.table_name || basename; const tableId = await this._activeDoc.applyUserActions(docSession, [['TransformAndFinishImport', hiddenTableId, destTable, intoNewTable, ruleCanBeApplied ? transformRule : null]]); createdTableId = tableId.retValues[0]; // this is garbage for now I think? } fixedColumnIdsByTable[createdTableId] = hiddenTableColIds; tables.push({ hiddenTableId: createdTableId, // TODO: rename thing? uploadFileIndex, origTableName, transformSectionRef, // TODO: this shouldnt always be needed, and we only get it if genimporttransform destTableId }); } await this._fixReferences(docSession, parsedTables, tables, fixedColumnIdsByTable, references, isHidden); return ({options, tables}); } /** * This function removes temporary hidden tables which were created during the import process * * @param {Array[String]} hiddenTableIds: Array of hidden table ids * @returns {Promise} Promise that's resolved when all actions are applied successfully. */ private async _removeHiddenTables(docSession: DocSession, hiddenTableIds: string[]) { if (hiddenTableIds.length !== 0) { await this._activeDoc.applyUserActions(docSession, hiddenTableIds.map(t => ['RemoveTable', t])); } } /** * The methods changes every column of references into a column of integers in `parsedTables`. It * returns `parsedTable` and a list of descriptors of all columns of references. */ private _encodeReferenceAsInt(parsedTables: GristTable[]): ReferenceDescription[] { const references = []; for (const [tableIndex, parsedTable] of parsedTables.entries()) { for (const [colIndex, col] of parsedTable.column_metadata.entries()) { const refTableId = gutil.removePrefix(col.type, "Ref:"); if (refTableId) { references.push({refTableId, colIndex, tableIndex}); col.type = 'Int'; } } } return references; } /** * This function fix references that are broken by the change of table id. */ private async _fixReferences(docSession: OptDocSession, parsedTables: GristTable[], tables: ImportTableResult[], fixedColumnIds: { [tableId: string]: string[]; }, references: ReferenceDescription[], isHidden: boolean) { // collect all new table ids const tablesByOrigName = _.indexBy(tables, 'origTableName'); // gather all of the user actions let userActions: any[] = references.map( ref => { const fixedTableId = tables[ref.tableIndex].hiddenTableId; return [ 'ModifyColumn', fixedTableId, fixedColumnIds[fixedTableId][ref.colIndex], { type: `Ref:${tablesByOrigName[ref.refTableId].hiddenTableId}` } ]; }); if (isHidden) { userActions = userActions.concat(userActions.map(([, tableId, columnId, colInfo]) => [ 'ModifyColumn', tableId, 'gristHelper_Import_' + columnId, colInfo ])); } // apply user actions if (userActions.length) { await this._activeDoc.applyUserActions(docSession, userActions); } } }