(core) add more detail to /compare endpoint

Summary:
 * Extends `/api/docs/docId1/compare/docId2` endpoint with a `detail=1` option to include details of what changed in the document content.
 * Adds an `/api/docs/docId/compare?left=HASH&right=HASH` endpoint for comparing two versions of a single document. This is needed to implement the extension to `/api/docs/docId1/compare/docId2`.
 * Adds a `HashUtil` class to allow hash aliases like `HEAD` and `HEAD~`.

Everything is a bit crude:
 * Changes are expressed as ActionSummary objects, which aren't fully fleshed out.
 * Extra data about formula columns is inserted in an inflexible way.

This is extracted and cleaned up from https://phab.getgrist.com/D2600.

Test Plan: added tests

Reviewers: dsagal

Reviewed By: dsagal

Differential Revision: https://phab.getgrist.com/D2614
This commit is contained in:
Paul Fitzpatrick 2020-09-18 14:43:01 -04:00
parent b44e4a94ab
commit 87f2fd15fb
5 changed files with 220 additions and 8 deletions

View File

@ -1,5 +1,6 @@
import {CellDelta, TabularDiff, TabularDiffs} from 'app/common/TabularDiff'; import {CellDelta, TabularDiff, TabularDiffs} from 'app/common/TabularDiff';
import toPairs = require('lodash/toPairs'); import toPairs = require('lodash/toPairs');
import { RowRecord } from 'app/plugin/GristData';
/** /**
* An ActionSummary represents the overall effect of changes that took place * An ActionSummary represents the overall effect of changes that took place
@ -90,6 +91,13 @@ export interface TableDelta {
/** Partial record of cell-level changes - large bulk changes not included. */ /** Partial record of cell-level changes - large bulk changes not included. */
columnDeltas: {[colId: string]: ColumnDelta}; columnDeltas: {[colId: string]: ColumnDelta};
columnRenames: LabelDelta[]; /** a list of column renames/additions/removals */ columnRenames: LabelDelta[]; /** a list of column renames/additions/removals */
/*
* A snapshot of row content for added and updated rows, including formula columns.
* Not included in regular summaries, but used in comparisons. Hopefully this
* can evaporate in future.
*/
finalRowContent?: {[rowId: number]: RowRecord};
} }
/** /**

View File

@ -1,3 +1,4 @@
import {ActionSummary} from 'app/common/ActionSummary';
import {ApplyUAResult} from 'app/common/ActiveDocAPI'; import {ApplyUAResult} from 'app/common/ActiveDocAPI';
import {BaseAPI, IOptions} from 'app/common/BaseAPI'; import {BaseAPI, IOptions} from 'app/common/BaseAPI';
import {BillingAPI, BillingAPIImpl} from 'app/common/BillingAPI'; import {BillingAPI, BillingAPIImpl} from 'app/common/BillingAPI';
@ -218,6 +219,18 @@ export interface DocStateComparison {
// both: both documents have changes (possible divergence) // both: both documents have changes (possible divergence)
// unrelated: no common history found // unrelated: no common history found
summary: 'same' | 'left' | 'right' | 'both' | 'unrelated'; summary: 'same' | 'left' | 'right' | 'both' | 'unrelated';
// optionally, details of what changed may be included.
details?: DocStateComparisonDetails;
}
/**
* Detailed comparison between document versions. For now, this
* is provided as a pair of ActionSummary objects, relative to
* the most recent common ancestor.
*/
export interface DocStateComparisonDetails {
leftChanges: ActionSummary;
rightChanges: ActionSummary;
} }
export {UserProfile} from 'app/common/LoginSessionAPI'; export {UserProfile} from 'app/common/LoginSessionAPI';
@ -289,7 +302,13 @@ export interface DocAPI {
replace(source: DocReplacementOptions): Promise<void>; replace(source: DocReplacementOptions): Promise<void>;
getSnapshots(): Promise<DocSnapshots>; getSnapshots(): Promise<DocSnapshots>;
forceReload(): Promise<void>; forceReload(): Promise<void>;
compareState(remoteDocId: string): Promise<DocStateComparison>; // Compare two documents, optionally including details of the changes.
compareDoc(remoteDocId: string, options?: { detail: boolean }): Promise<DocStateComparison>;
// Compare two versions within a document, including details of the changes.
// Versions are identified by action hashes, or aliases understood by HashUtil.
// Currently, leftHash is expected to be an ancestor of rightHash. If rightHash
// is HEAD, the result will contain a copy of any rows added or updated.
compareVersion(leftHash: string, rightHash: string): Promise<DocStateComparison>;
} }
// Operations that are supported by a doc worker. // Operations that are supported by a doc worker.
@ -676,7 +695,17 @@ export class DocAPIImpl extends BaseAPI implements DocAPI {
}); });
} }
public async compareState(remoteDocId: string): Promise<DocStateComparison> { public async compareDoc(remoteDocId: string, options: {
return this.requestJson(`${this._url}/compare/${remoteDocId}`); detail?: boolean
} = {}): Promise<DocStateComparison> {
const q = options.detail ? '?detail=true' : '';
return this.requestJson(`${this._url}/compare/${remoteDocId}${q}`);
}
public async compareVersion(leftHash: string, rightHash: string): Promise<DocStateComparison> {
const url = new URL(`${this._url}/compare`);
url.searchParams.append('left', leftHash);
url.searchParams.append('right', rightHash);
return this.requestJson(url.href);
} }
} }

View File

@ -184,6 +184,15 @@ export class ActiveDoc extends EventEmitter {
return this._actionHistory.getRecentStates(maxStates); return this._actionHistory.getRecentStates(maxStates);
} }
/**
* Access specific actions identified by actionNum.
* TODO: for memory reasons on large docs, would be best not to hold many actions
* in memory at a time, so should e.g. fetch them one at a time.
*/
public getActions(actionNums: number[]): Promise<Array<LocalActionBundle|undefined>> {
return this._actionHistory.getActions(actionNums);
}
/** /**
* Get the most recent actions from the history. Results are ordered by * Get the most recent actions from the history. Results are ordered by
* earliest actions first, later actions later. If `summarize` is set, * earliest actions first, later actions later. If `summarize` is set,

View File

@ -1,12 +1,12 @@
import { Application, NextFunction, Request, RequestHandler, Response } from "express"; import { ActionSummary, createEmptyActionSummary } from "app/common/ActionSummary";
import { ApiError } from 'app/common/ApiError'; import { ApiError } from 'app/common/ApiError';
import { BrowserSettings } from "app/common/BrowserSettings"; import { BrowserSettings } from "app/common/BrowserSettings";
import { fromTableDataAction, TableColValues } from 'app/common/DocActions'; import { fromTableDataAction, RowRecord, TableColValues } from 'app/common/DocActions';
import { arrayRepeat } from "app/common/gutil"; import { arrayRepeat, isAffirmative } from "app/common/gutil";
import { SortFunc } from 'app/common/SortFunc'; import { SortFunc } from 'app/common/SortFunc';
import { DocReplacementOptions, DocState, DocStateComparison, DocStates, NEW_DOCUMENT_CODE} from 'app/common/UserAPI'; import { DocReplacementOptions, DocState, DocStateComparison, DocStates, NEW_DOCUMENT_CODE} from 'app/common/UserAPI';
import { HomeDBManager, makeDocAuthResult } from 'app/gen-server/lib/HomeDBManager'; import { HomeDBManager, makeDocAuthResult } from 'app/gen-server/lib/HomeDBManager';
import { concatenateSummaries, summarizeAction } from "app/server/lib/ActionSummary";
import { ActiveDoc } from "app/server/lib/ActiveDoc"; import { ActiveDoc } from "app/server/lib/ActiveDoc";
import { assertAccess, getOrSetDocAuth, getTransitiveHeaders, getUserId, isAnonymousUser, import { assertAccess, getOrSetDocAuth, getTransitiveHeaders, getUserId, isAnonymousUser,
RequestWithLogin } from 'app/server/lib/Authorizer'; RequestWithLogin } from 'app/server/lib/Authorizer';
@ -15,12 +15,15 @@ import { docSessionFromRequest, makeExceptionalDocSession, OptDocSession } from
import { DocWorker } from "app/server/lib/DocWorker"; import { DocWorker } from "app/server/lib/DocWorker";
import { expressWrap } from 'app/server/lib/expressWrap'; import { expressWrap } from 'app/server/lib/expressWrap';
import { GristServer } from 'app/server/lib/GristServer'; import { GristServer } from 'app/server/lib/GristServer';
import { HashUtil } from 'app/server/lib/HashUtil';
import { makeForkIds } from "app/server/lib/idUtils"; import { makeForkIds } from "app/server/lib/idUtils";
import * as log from 'app/server/lib/log';
import { getDocId, getDocScope, integerParam, isParameterOn, optStringParam, import { getDocId, getDocScope, integerParam, isParameterOn, optStringParam,
sendOkReply, sendReply } from 'app/server/lib/requestUtils'; sendOkReply, sendReply, stringParam } from 'app/server/lib/requestUtils';
import { SandboxError } from "app/server/lib/sandboxUtil"; import { SandboxError } from "app/server/lib/sandboxUtil";
import { handleOptionalUpload, handleUpload } from "app/server/lib/uploads"; import { handleOptionalUpload, handleUpload } from "app/server/lib/uploads";
import * as contentDisposition from 'content-disposition'; import * as contentDisposition from 'content-disposition';
import { Application, NextFunction, Request, RequestHandler, Response } from "express";
import fetch from 'node-fetch'; import fetch from 'node-fetch';
import * as path from 'path'; import * as path from 'path';
@ -281,6 +284,7 @@ export class DocWorkerApi {
})); }));
this._app.get('/api/docs/:docId/compare/:docId2', canView, withDoc(async (activeDoc, req, res) => { this._app.get('/api/docs/:docId/compare/:docId2', canView, withDoc(async (activeDoc, req, res) => {
const showDetails = isAffirmative(req.query.detail);
const docSession = docSessionFromRequest(req); const docSession = docSessionFromRequest(req);
const {states} = await this._getStates(docSession, activeDoc); const {states} = await this._getStates(docSession, activeDoc);
const ref = await fetch(this._grist.getHomeUrl(req, `/api/docs/${req.params.docId2}/states`), { const ref = await fetch(this._grist.getHomeUrl(req, `/api/docs/${req.params.docId2}/states`), {
@ -305,9 +309,37 @@ export class DocWorkerApi {
const comparison: DocStateComparison = { const comparison: DocStateComparison = {
left, right, parent, summary left, right, parent, summary
}; };
if (showDetails && parent) {
// Calculate changes from the parent to the current version of this document.
const leftChanges = (await this._getChanges(docSession, activeDoc, states, parent.h,
'HEAD')).details!.rightChanges;
// Calculate changes from the (common) parent to the current version of the other document.
const url = `/api/docs/${req.params.docId2}/compare?left=${parent.h}`;
const rightChangesReq = await fetch(this._grist.getHomeUrl(req, url), {
headers: {
...getTransitiveHeaders(req),
'Content-Type': 'application/json',
}
});
const rightChanges = (await rightChangesReq.json()).details!.rightChanges;
// Add the left and right changes as details to the result.
comparison.details = { leftChanges, rightChanges };
}
res.json(comparison); res.json(comparison);
})); }));
// Give details about what changed between two versions of a document.
this._app.get('/api/docs/:docId/compare', canView, withDoc(async (activeDoc, req, res) => {
// This could be a relatively slow operation if actions are large.
const left = stringParam(req.query.left || 'HEAD');
const right = stringParam(req.query.right || 'HEAD');
const docSession = docSessionFromRequest(req);
const {states} = await this._getStates(docSession, activeDoc);
res.json(await this._getChanges(docSession, activeDoc, states, left, right));
}));
// Do an import targeted at a specific workspace. Although the URL fits ApiServer, this // Do an import targeted at a specific workspace. Although the URL fits ApiServer, this
// endpoint is handled only by DocWorker, so is handled here. (Note: this does not handle // endpoint is handled only by DocWorker, so is handled here. (Note: this does not handle
// actual file uploads, so no worries here about large request bodies.) // actual file uploads, so no worries here about large request bodies.)
@ -437,6 +469,94 @@ export class DocWorkerApi {
}; };
} }
/**
*
* Calculate changes between two document versions identified by leftHash and rightHash.
* If rightHash is the latest version of the document, the ActionSummary for it will
* contain a copy of updated and added rows.
*
* Currently will fail if leftHash is not an ancestor of rightHash (this restriction could
* be lifted, but is adequate for now).
*
*/
private async _getChanges(docSession: OptDocSession, activeDoc: ActiveDoc, states: DocState[],
leftHash: string, rightHash: string): Promise<DocStateComparison> {
const finder = new HashUtil(states);
const leftOffset = finder.hashToOffset(leftHash);
const rightOffset = finder.hashToOffset(rightHash);
if (rightOffset > leftOffset) {
throw new Error('Comparisons currently require left to be an ancestor of right');
}
const actionNums: number[] = states.slice(rightOffset, leftOffset).map(state => state.n);
const actions = (await activeDoc.getActions(actionNums)).reverse();
let totalAction = createEmptyActionSummary();
for (const action of actions) {
if (!action) { continue; }
const summary = summarizeAction(action);
totalAction = concatenateSummaries([totalAction, summary]);
}
const result: DocStateComparison = {
left: states[leftOffset],
right: states[rightOffset],
parent: states[leftOffset],
summary: (leftOffset === rightOffset) ? 'same' : 'right',
details: {
leftChanges: {tableRenames: [], tableDeltas: {}},
rightChanges: totalAction
}
};
// Currently, as a bit of a hack, the full final state of updated/added rows
// is included, including formula columns, by looking at the current state
// of the document.
if (rightOffset === 0) {
await this._addRowsToActionSummary(docSession, activeDoc, totalAction);
} else {
// In the future final row content may not be needed, if formula cells end
// up included in ActionSummary.
log.debug('cannot add rows when not comparing to current state of doc');
}
return result;
}
/**
* Adds the content of updated and added rows to an ActionSummary.
* For visualizing differences, currently there's no other way to get formula
* information. This only makes sense for an ActionSummary between a previous
* version of the document and the current version, since it accesses the row
* content from the current version of the document.
*/
private async _addRowsToActionSummary(docSession: OptDocSession, activeDoc: ActiveDoc,
summary: ActionSummary) {
for (const tableId of Object.keys(summary.tableDeltas)) {
const tableDelta = summary.tableDeltas[tableId];
const rowIds = new Set([...tableDelta.addRows, ...tableDelta.updateRows]);
try {
// Inefficient code that reads the entire table in order to pull out the few
// rows we need.
const [, , ids, columns] = await handleSandboxError(tableId, [], activeDoc.fetchQuery(
docSession, {tableId, filters: {}}, true));
const rows: {[key: number]: RowRecord} = {};
for (const rowId of rowIds) {
const rec: RowRecord = {id: rowId};
const idx = ids.indexOf(rowId);
if (idx >= 0) {
for (const colId of Object.keys(columns)) {
rec[colId] = columns[colId][idx];
}
rows[rowId] = rec;
}
}
tableDelta.finalRowContent = rows;
} catch (e) {
// ActionSummary has some rough spots - if there's some junk in it we just ignore
// that for now.
// TODO: add ids to doc actions and their undos so they can be aligned, so ActionSummary
// doesn't need to use heuristics.
log.error('_addRowsToChanges skipped a table');
}
}
}
private async _removeDoc(req: Request, res: Response, permanent: boolean) { private async _removeDoc(req: Request, res: Response, permanent: boolean) {
const scope = getDocScope(req); const scope = getDocScope(req);
const docId = getDocId(req); const docId = getDocId(req);

View File

@ -0,0 +1,46 @@
import { DocState } from 'app/common/UserAPI';
/**
*
* Helper class to support a small subset of git-style references for state hashes:
* HEAD = the most recent state
* [HASH]^1 = the parent of [HASH]
* [HASH]~1 = the parent of [HASH]
* [HASH]~2 = the grandparent of [HASH]
* [HASH]^1^1 = the grandparent of [HASH]
* [HASH]~3 = the great grandparent of [HASH]
* For git, where commits have multiple parents, "~" refers to the first parent,
* and "^1" also refers to the first parent. For grist, there are only first parents
* (unless/until we start tracking history across merges).
*
*/
export class HashUtil {
/**
* To construct, provide a list of states, most recent first.
*/
constructor(private _state: DocState[]) {}
/**
* Find the named hash in the list of states, allowing for aliases.
* Returns an index into the list of states provided in constructor.
*/
public hashToOffset(hash: string): number {
const parts = hash.split(/([~^][0-9]*)/);
hash = parts.shift() || '';
let offset = hash === 'HEAD' ? 0 : this._state.findIndex(state => state.h === hash);
if (offset < 0) { throw new Error('Cannot read hash'); }
for (const part of parts) {
if (part === '^' || part === '^1') {
offset++;
} else if (part.startsWith('~')) {
offset += parseInt(part.slice(1) || '1', 10);
} else if (part === '') {
// pass
} else {
throw new Error('cannot parse hash');
}
}
return offset;
}
}