From 87f2fd15fbd71d1fe1c6cdced0950cb6bdb55bf4 Mon Sep 17 00:00:00 2001 From: Paul Fitzpatrick Date: Fri, 18 Sep 2020 14:43:01 -0400 Subject: [PATCH] (core) add more detail to /compare endpoint Summary: * Extends `/api/docs/docId1/compare/docId2` endpoint with a `detail=1` option to include details of what changed in the document content. * Adds an `/api/docs/docId/compare?left=HASH&right=HASH` endpoint for comparing two versions of a single document. This is needed to implement the extension to `/api/docs/docId1/compare/docId2`. * Adds a `HashUtil` class to allow hash aliases like `HEAD` and `HEAD~`. Everything is a bit crude: * Changes are expressed as ActionSummary objects, which aren't fully fleshed out. * Extra data about formula columns is inserted in an inflexible way. This is extracted and cleaned up from https://phab.getgrist.com/D2600. Test Plan: added tests Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D2614 --- app/common/ActionSummary.ts | 8 +++ app/common/UserAPI.ts | 35 +++++++++- app/server/lib/ActiveDoc.ts | 9 +++ app/server/lib/DocApi.ts | 130 ++++++++++++++++++++++++++++++++++-- app/server/lib/HashUtil.ts | 46 +++++++++++++ 5 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 app/server/lib/HashUtil.ts diff --git a/app/common/ActionSummary.ts b/app/common/ActionSummary.ts index 8f4f36d4..831ee596 100644 --- a/app/common/ActionSummary.ts +++ b/app/common/ActionSummary.ts @@ -1,5 +1,6 @@ import {CellDelta, TabularDiff, TabularDiffs} from 'app/common/TabularDiff'; import toPairs = require('lodash/toPairs'); +import { RowRecord } from 'app/plugin/GristData'; /** * An ActionSummary represents the overall effect of changes that took place @@ -90,6 +91,13 @@ export interface TableDelta { /** Partial record of cell-level changes - large bulk changes not included. */ columnDeltas: {[colId: string]: ColumnDelta}; columnRenames: LabelDelta[]; /** a list of column renames/additions/removals */ + + /* + * A snapshot of row content for added and updated rows, including formula columns. + * Not included in regular summaries, but used in comparisons. Hopefully this + * can evaporate in future. + */ + finalRowContent?: {[rowId: number]: RowRecord}; } /** diff --git a/app/common/UserAPI.ts b/app/common/UserAPI.ts index fd105c80..54fd2749 100644 --- a/app/common/UserAPI.ts +++ b/app/common/UserAPI.ts @@ -1,3 +1,4 @@ +import {ActionSummary} from 'app/common/ActionSummary'; import {ApplyUAResult} from 'app/common/ActiveDocAPI'; import {BaseAPI, IOptions} from 'app/common/BaseAPI'; import {BillingAPI, BillingAPIImpl} from 'app/common/BillingAPI'; @@ -218,6 +219,18 @@ export interface DocStateComparison { // both: both documents have changes (possible divergence) // unrelated: no common history found summary: 'same' | 'left' | 'right' | 'both' | 'unrelated'; + // optionally, details of what changed may be included. + details?: DocStateComparisonDetails; +} + +/** + * Detailed comparison between document versions. For now, this + * is provided as a pair of ActionSummary objects, relative to + * the most recent common ancestor. + */ +export interface DocStateComparisonDetails { + leftChanges: ActionSummary; + rightChanges: ActionSummary; } export {UserProfile} from 'app/common/LoginSessionAPI'; @@ -289,7 +302,13 @@ export interface DocAPI { replace(source: DocReplacementOptions): Promise; getSnapshots(): Promise; forceReload(): Promise; - compareState(remoteDocId: string): Promise; + // Compare two documents, optionally including details of the changes. + compareDoc(remoteDocId: string, options?: { detail: boolean }): Promise; + // Compare two versions within a document, including details of the changes. + // Versions are identified by action hashes, or aliases understood by HashUtil. + // Currently, leftHash is expected to be an ancestor of rightHash. If rightHash + // is HEAD, the result will contain a copy of any rows added or updated. + compareVersion(leftHash: string, rightHash: string): Promise; } // Operations that are supported by a doc worker. @@ -676,7 +695,17 @@ export class DocAPIImpl extends BaseAPI implements DocAPI { }); } - public async compareState(remoteDocId: string): Promise { - return this.requestJson(`${this._url}/compare/${remoteDocId}`); + public async compareDoc(remoteDocId: string, options: { + detail?: boolean + } = {}): Promise { + const q = options.detail ? '?detail=true' : ''; + return this.requestJson(`${this._url}/compare/${remoteDocId}${q}`); + } + + public async compareVersion(leftHash: string, rightHash: string): Promise { + const url = new URL(`${this._url}/compare`); + url.searchParams.append('left', leftHash); + url.searchParams.append('right', rightHash); + return this.requestJson(url.href); } } diff --git a/app/server/lib/ActiveDoc.ts b/app/server/lib/ActiveDoc.ts index 66c7b7d2..c2c4ea47 100644 --- a/app/server/lib/ActiveDoc.ts +++ b/app/server/lib/ActiveDoc.ts @@ -184,6 +184,15 @@ export class ActiveDoc extends EventEmitter { return this._actionHistory.getRecentStates(maxStates); } + /** + * Access specific actions identified by actionNum. + * TODO: for memory reasons on large docs, would be best not to hold many actions + * in memory at a time, so should e.g. fetch them one at a time. + */ + public getActions(actionNums: number[]): Promise> { + return this._actionHistory.getActions(actionNums); + } + /** * Get the most recent actions from the history. Results are ordered by * earliest actions first, later actions later. If `summarize` is set, diff --git a/app/server/lib/DocApi.ts b/app/server/lib/DocApi.ts index 01cfcc01..f3e46dc2 100644 --- a/app/server/lib/DocApi.ts +++ b/app/server/lib/DocApi.ts @@ -1,12 +1,12 @@ -import { Application, NextFunction, Request, RequestHandler, Response } from "express"; - +import { ActionSummary, createEmptyActionSummary } from "app/common/ActionSummary"; import { ApiError } from 'app/common/ApiError'; import { BrowserSettings } from "app/common/BrowserSettings"; -import { fromTableDataAction, TableColValues } from 'app/common/DocActions'; -import { arrayRepeat } from "app/common/gutil"; +import { fromTableDataAction, RowRecord, TableColValues } from 'app/common/DocActions'; +import { arrayRepeat, isAffirmative } from "app/common/gutil"; import { SortFunc } from 'app/common/SortFunc'; import { DocReplacementOptions, DocState, DocStateComparison, DocStates, NEW_DOCUMENT_CODE} from 'app/common/UserAPI'; import { HomeDBManager, makeDocAuthResult } from 'app/gen-server/lib/HomeDBManager'; +import { concatenateSummaries, summarizeAction } from "app/server/lib/ActionSummary"; import { ActiveDoc } from "app/server/lib/ActiveDoc"; import { assertAccess, getOrSetDocAuth, getTransitiveHeaders, getUserId, isAnonymousUser, RequestWithLogin } from 'app/server/lib/Authorizer'; @@ -15,12 +15,15 @@ import { docSessionFromRequest, makeExceptionalDocSession, OptDocSession } from import { DocWorker } from "app/server/lib/DocWorker"; import { expressWrap } from 'app/server/lib/expressWrap'; import { GristServer } from 'app/server/lib/GristServer'; +import { HashUtil } from 'app/server/lib/HashUtil'; import { makeForkIds } from "app/server/lib/idUtils"; +import * as log from 'app/server/lib/log'; import { getDocId, getDocScope, integerParam, isParameterOn, optStringParam, - sendOkReply, sendReply } from 'app/server/lib/requestUtils'; + sendOkReply, sendReply, stringParam } from 'app/server/lib/requestUtils'; import { SandboxError } from "app/server/lib/sandboxUtil"; import { handleOptionalUpload, handleUpload } from "app/server/lib/uploads"; import * as contentDisposition from 'content-disposition'; +import { Application, NextFunction, Request, RequestHandler, Response } from "express"; import fetch from 'node-fetch'; import * as path from 'path'; @@ -281,6 +284,7 @@ export class DocWorkerApi { })); this._app.get('/api/docs/:docId/compare/:docId2', canView, withDoc(async (activeDoc, req, res) => { + const showDetails = isAffirmative(req.query.detail); const docSession = docSessionFromRequest(req); const {states} = await this._getStates(docSession, activeDoc); const ref = await fetch(this._grist.getHomeUrl(req, `/api/docs/${req.params.docId2}/states`), { @@ -305,9 +309,37 @@ export class DocWorkerApi { const comparison: DocStateComparison = { left, right, parent, summary }; + if (showDetails && parent) { + // Calculate changes from the parent to the current version of this document. + const leftChanges = (await this._getChanges(docSession, activeDoc, states, parent.h, + 'HEAD')).details!.rightChanges; + + // Calculate changes from the (common) parent to the current version of the other document. + const url = `/api/docs/${req.params.docId2}/compare?left=${parent.h}`; + const rightChangesReq = await fetch(this._grist.getHomeUrl(req, url), { + headers: { + ...getTransitiveHeaders(req), + 'Content-Type': 'application/json', + } + }); + const rightChanges = (await rightChangesReq.json()).details!.rightChanges; + + // Add the left and right changes as details to the result. + comparison.details = { leftChanges, rightChanges }; + } res.json(comparison); })); + // Give details about what changed between two versions of a document. + this._app.get('/api/docs/:docId/compare', canView, withDoc(async (activeDoc, req, res) => { + // This could be a relatively slow operation if actions are large. + const left = stringParam(req.query.left || 'HEAD'); + const right = stringParam(req.query.right || 'HEAD'); + const docSession = docSessionFromRequest(req); + const {states} = await this._getStates(docSession, activeDoc); + res.json(await this._getChanges(docSession, activeDoc, states, left, right)); + })); + // Do an import targeted at a specific workspace. Although the URL fits ApiServer, this // endpoint is handled only by DocWorker, so is handled here. (Note: this does not handle // actual file uploads, so no worries here about large request bodies.) @@ -437,6 +469,94 @@ export class DocWorkerApi { }; } + /** + * + * Calculate changes between two document versions identified by leftHash and rightHash. + * If rightHash is the latest version of the document, the ActionSummary for it will + * contain a copy of updated and added rows. + * + * Currently will fail if leftHash is not an ancestor of rightHash (this restriction could + * be lifted, but is adequate for now). + * + */ + private async _getChanges(docSession: OptDocSession, activeDoc: ActiveDoc, states: DocState[], + leftHash: string, rightHash: string): Promise { + const finder = new HashUtil(states); + const leftOffset = finder.hashToOffset(leftHash); + const rightOffset = finder.hashToOffset(rightHash); + if (rightOffset > leftOffset) { + throw new Error('Comparisons currently require left to be an ancestor of right'); + } + const actionNums: number[] = states.slice(rightOffset, leftOffset).map(state => state.n); + const actions = (await activeDoc.getActions(actionNums)).reverse(); + let totalAction = createEmptyActionSummary(); + for (const action of actions) { + if (!action) { continue; } + const summary = summarizeAction(action); + totalAction = concatenateSummaries([totalAction, summary]); + } + const result: DocStateComparison = { + left: states[leftOffset], + right: states[rightOffset], + parent: states[leftOffset], + summary: (leftOffset === rightOffset) ? 'same' : 'right', + details: { + leftChanges: {tableRenames: [], tableDeltas: {}}, + rightChanges: totalAction + } + }; + // Currently, as a bit of a hack, the full final state of updated/added rows + // is included, including formula columns, by looking at the current state + // of the document. + if (rightOffset === 0) { + await this._addRowsToActionSummary(docSession, activeDoc, totalAction); + } else { + // In the future final row content may not be needed, if formula cells end + // up included in ActionSummary. + log.debug('cannot add rows when not comparing to current state of doc'); + } + return result; + } + + /** + * Adds the content of updated and added rows to an ActionSummary. + * For visualizing differences, currently there's no other way to get formula + * information. This only makes sense for an ActionSummary between a previous + * version of the document and the current version, since it accesses the row + * content from the current version of the document. + */ + private async _addRowsToActionSummary(docSession: OptDocSession, activeDoc: ActiveDoc, + summary: ActionSummary) { + for (const tableId of Object.keys(summary.tableDeltas)) { + const tableDelta = summary.tableDeltas[tableId]; + const rowIds = new Set([...tableDelta.addRows, ...tableDelta.updateRows]); + try { + // Inefficient code that reads the entire table in order to pull out the few + // rows we need. + const [, , ids, columns] = await handleSandboxError(tableId, [], activeDoc.fetchQuery( + docSession, {tableId, filters: {}}, true)); + const rows: {[key: number]: RowRecord} = {}; + for (const rowId of rowIds) { + const rec: RowRecord = {id: rowId}; + const idx = ids.indexOf(rowId); + if (idx >= 0) { + for (const colId of Object.keys(columns)) { + rec[colId] = columns[colId][idx]; + } + rows[rowId] = rec; + } + } + tableDelta.finalRowContent = rows; + } catch (e) { + // ActionSummary has some rough spots - if there's some junk in it we just ignore + // that for now. + // TODO: add ids to doc actions and their undos so they can be aligned, so ActionSummary + // doesn't need to use heuristics. + log.error('_addRowsToChanges skipped a table'); + } + } + } + private async _removeDoc(req: Request, res: Response, permanent: boolean) { const scope = getDocScope(req); const docId = getDocId(req); diff --git a/app/server/lib/HashUtil.ts b/app/server/lib/HashUtil.ts new file mode 100644 index 00000000..43ab4ccb --- /dev/null +++ b/app/server/lib/HashUtil.ts @@ -0,0 +1,46 @@ +import { DocState } from 'app/common/UserAPI'; + +/** + * + * Helper class to support a small subset of git-style references for state hashes: + * HEAD = the most recent state + * [HASH]^1 = the parent of [HASH] + * [HASH]~1 = the parent of [HASH] + * [HASH]~2 = the grandparent of [HASH] + * [HASH]^1^1 = the grandparent of [HASH] + * [HASH]~3 = the great grandparent of [HASH] + * For git, where commits have multiple parents, "~" refers to the first parent, + * and "^1" also refers to the first parent. For grist, there are only first parents + * (unless/until we start tracking history across merges). + * + */ +export class HashUtil { + + /** + * To construct, provide a list of states, most recent first. + */ + constructor(private _state: DocState[]) {} + + /** + * Find the named hash in the list of states, allowing for aliases. + * Returns an index into the list of states provided in constructor. + */ + public hashToOffset(hash: string): number { + const parts = hash.split(/([~^][0-9]*)/); + hash = parts.shift() || ''; + let offset = hash === 'HEAD' ? 0 : this._state.findIndex(state => state.h === hash); + if (offset < 0) { throw new Error('Cannot read hash'); } + for (const part of parts) { + if (part === '^' || part === '^1') { + offset++; + } else if (part.startsWith('~')) { + offset += parseInt(part.slice(1) || '1', 10); + } else if (part === '') { + // pass + } else { + throw new Error('cannot parse hash'); + } + } + return offset; + } +}