(core) New API to collect timing information from formula evaluation.

Summary:
- /timing/start endpoint to start collecting information
- /timing/stop endpoint to stop collecting
- /timing to retrive data gatherd so far

Timings are collected for all columns (including hidden/helpers/system)

Test Plan: Added new

Reviewers: paulfitz

Reviewed By: paulfitz

Differential Revision: https://phab.getgrist.com/D4230
This commit is contained in:
Jarosław Sadziński 2024-04-18 14:13:16 +02:00
parent c187ca3093
commit bd07e9c026
12 changed files with 530 additions and 46 deletions

View File

@ -48,6 +48,8 @@ export class DocComm extends Disposable implements ActiveDocAPI {
public getUsersForViewAs = this._wrapMethod("getUsersForViewAs");
public getAccessToken = this._wrapMethod("getAccessToken");
public getShare = this._wrapMethod("getShare");
public startTiming = this._wrapMethod("startTiming");
public stopTiming = this._wrapMethod("stopTiming");
public changeUrlIdEmitter = this.autoDispose(new Emitter());

View File

@ -288,6 +288,48 @@ export interface RemoteShareInfo {
key: string;
}
/**
* Metrics gathered during formula calculations.
*/
export interface TimingInfo {
/**
* Total time spend evaluating a formula.
*/
total: number;
/**
* Number of times the formula was evaluated (for all rows).
*/
count: number;
average: number;
max: number;
}
/**
* Metrics attached to a particular column in a table. Contains also marks if they were gathered.
* Currently we only mark the `OrderError` exception (so when formula calculation was restarted due to
* order dependency).
*/
export interface FormulaTimingInfo extends TimingInfo {
tableId: string;
colId: string;
marks?: Array<TimingInfo & {name: string}>;
}
/*
* Status of timing info collection. Contains intermediate results if engine is not busy at the moment.
*/
export interface TimingStatus {
/**
* If true, timing info is being collected.
*/
status: boolean;
/**
* Will be undefined if we can't get the timing info (e.g. if the document is locked by other call).
* Otherwise, contains the intermediate results gathered so far.
*/
timing?: FormulaTimingInfo[];
}
export interface ActiveDocAPI {
/**
* Closes a document, and unsubscribes from its userAction events.
@ -449,5 +491,18 @@ export interface ActiveDocAPI {
*/
getUsersForViewAs(): Promise<PermissionDataWithExtraUsers>;
getShare(linkId: string): Promise<RemoteShareInfo>;
/**
* Get a share info associated with the document.
*/
getShare(linkId: string): Promise<RemoteShareInfo|null>;
/**
* Starts collecting timing information from formula evaluations.
*/
startTiming(): Promise<void>;
/**
* Stops collecting timing information and returns the collected data.
*/
stopTiming(): Promise<TimingInfo[]>;
}

View File

@ -507,6 +507,16 @@ export interface DocAPI {
flushWebhook(webhookId: string): Promise<void>;
getAssistance(params: AssistanceRequest): Promise<AssistanceResponse>;
/**
* Check if the document is currently in timing mode.
*/
timing(): Promise<{status: boolean}>;
/**
* Starts recording timing information for the document. Throws exception if timing is already
* in progress or you don't have permission to start timing.
*/
startTiming(): Promise<void>;
stopTiming(): Promise<void>;
}
// Operations that are supported by a doc worker.
@ -1121,6 +1131,18 @@ export class DocAPIImpl extends BaseAPI implements DocAPI {
});
}
public async timing(): Promise<{status: boolean}> {
return this.requestJson(`${this._url}/timing`);
}
public async startTiming(): Promise<void> {
await this.request(`${this._url}/timing/start`, {method: 'POST'});
}
public async stopTiming(): Promise<void> {
await this.request(`${this._url}/timing/stop`, {method: 'POST'});
}
private _getRecords(tableId: string, endpoint: 'data' | 'records', options?: GetRowsParams): Promise<any> {
const url = new URL(`${this._url}/tables/${tableId}/${endpoint}`);
if (options?.filters) {

View File

@ -70,6 +70,9 @@ export class DocApiForwarder {
app.use('/api/docs/:docId/webhooks', withDoc);
app.use('/api/docs/:docId/assistant', withDoc);
app.use('/api/docs/:docId/sql', withDoc);
app.use('/api/docs/:docId/timing', withDoc);
app.use('/api/docs/:docId/timing/start', withDoc);
app.use('/api/docs/:docId/timing/stop', withDoc);
app.use('/api/docs/:docId/forms/:vsId', withDoc);
app.use('^/api/docs$', withoutDoc);
}

View File

@ -22,6 +22,7 @@ import {
ApplyUAResult,
DataSourceTransformed,
ForkResult,
FormulaTimingInfo,
ImportOptions,
ImportResult,
ISuggestionWithValue,
@ -220,6 +221,7 @@ export class ActiveDoc extends EventEmitter {
public docData: DocData|null = null;
// Used by DocApi to only allow one webhook-related endpoint to run at a time.
public readonly triggersLock: Mutex = new Mutex();
public isTimingOn = false;
protected _actionHistory: ActionHistory;
protected _docManager: DocManager;
@ -1366,6 +1368,7 @@ export class ActiveDoc extends EventEmitter {
*/
public async reloadDoc(docSession?: DocSession) {
this._log.debug(docSession || null, 'ActiveDoc.reloadDoc starting shutdown');
this._docManager.restoreTimingOn(this.docName, this.isTimingOn);
return this.shutdown();
}
@ -1870,6 +1873,40 @@ export class ActiveDoc extends EventEmitter {
return await this._getHomeDbManagerOrFail().getShareByLinkId(this.docName, linkId);
}
public async startTiming(): Promise<void> {
// Set the flag to indicate that timing is on.
this.isTimingOn = true;
try {
// Call the data engine to start timing.
await this._doStartTiming();
} catch (e) {
this.isTimingOn = false;
throw e;
}
// Mark self as in timing mode, in case we get reloaded.
this._docManager.restoreTimingOn(this.docName, true);
}
public async stopTiming(): Promise<FormulaTimingInfo[]> {
// First call the data engine to stop timing, and gather results.
const timingResults = await this._pyCall('stop_timing');
// Toggle the flag and clear the reminder.
this.isTimingOn = false;
this._docManager.restoreTimingOn(this.docName, false);
return timingResults;
}
public async getTimings(): Promise<FormulaTimingInfo[]|void> {
if (this._modificationLock.isLocked()) {
return;
}
return await this._pyCall('get_timings');
}
/**
* Loads an open document from DocStorage. Returns a list of the tables it contains.
*/
@ -2377,6 +2414,10 @@ export class ActiveDoc extends EventEmitter {
});
await this._pyCall('initialize', this._options?.docUrl);
if (this.isTimingOn) {
await this._doStartTiming();
}
// Calculations are not associated specifically with the user opening the document.
// TODO: be careful with which users can create formulas.
await this._applyUserActions(makeExceptionalDocSession('system'), [['Calculate']]);
@ -2686,7 +2727,9 @@ export class ActiveDoc extends EventEmitter {
}
private async _getEngine(): Promise<ISandbox> {
if (this._shuttingDown) { throw new Error('shutting down, data engine unavailable'); }
if (this._shuttingDown) {
throw new Error('shutting down, data engine unavailable');
}
if (this._dataEngine) { return this._dataEngine; }
this._dataEngine = this._isSnapshot ? this._makeNullEngine() : this._makeEngine();
@ -2830,6 +2873,10 @@ export class ActiveDoc extends EventEmitter {
return dbManager;
}
private _doStartTiming() {
return this._pyCall('start_timing');
}
}
// Helper to initialize a sandbox action bundle with no values.

View File

@ -1556,6 +1556,40 @@ export class DocWorkerApi {
});
})
);
// GET /api/docs/:docId/timings
// Checks if timing is on for the document.
this._app.get('/api/docs/:docId/timing', isOwner, withDoc(async (activeDoc, req, res) => {
if (!activeDoc.isTimingOn) {
res.json({status: 'disabled'});
} else {
const timing = await activeDoc.getTimings();
const status = timing ? 'active' : 'pending';
res.json({status, timing});
}
}));
// POST /api/docs/:docId/timings/start
// Start a timing for the document.
this._app.post('/api/docs/:docId/timing/start', isOwner, withDoc(async (activeDoc, req, res) => {
if (activeDoc.isTimingOn) {
res.status(400).json({error:`Timing already started for ${activeDoc.docName}`});
return;
}
// isTimingOn flag is switched synchronously.
await activeDoc.startTiming();
res.sendStatus(200);
}));
// POST /api/docs/:docId/timings/stop
// Stop a timing for the document.
this._app.post('/api/docs/:docId/timing/stop', isOwner, withDoc(async (activeDoc, req, res) => {
if (!activeDoc.isTimingOn) {
res.status(400).json({error:`Timing not started for ${activeDoc.docName}`});
return;
}
res.json(await activeDoc.stopTiming());
}));
}
/**

View File

@ -43,7 +43,10 @@ export const DEFAULT_CACHE_TTL = 10000;
// How long to remember that a document has been explicitly set in a
// recovery mode.
export const RECOVERY_CACHE_TTL = 30000;
export const RECOVERY_CACHE_TTL = 30000; // 30 seconds
// How long to remember the timing mode of a document.
export const TIMING_ON_CACHE_TTL = 30000; // 30 seconds
/**
* DocManager keeps track of "active" Grist documents, i.e. those loaded
@ -56,6 +59,9 @@ export class DocManager extends EventEmitter {
// Remember recovery mode of documents.
private _inRecovery = new MapWithTTL<string, boolean>(RECOVERY_CACHE_TTL);
// Remember timing mode of documents, when document is recreated it is put in the same mode.
private _inTimingOn = new MapWithTTL<string, boolean>(TIMING_ON_CACHE_TTL);
constructor(
public readonly storageManager: IDocStorageManager,
public readonly pluginManager: PluginManager|null,
@ -69,6 +75,13 @@ export class DocManager extends EventEmitter {
this._inRecovery.set(docId, recovery);
}
/**
* Will restore timing on a document when it is reloaded.
*/
public restoreTimingOn(docId: string, timingOn: boolean) {
this._inTimingOn.set(docId, timingOn);
}
// attach a home database to the DocManager. During some tests, it
// is awkward to have this set up at the point of construction.
public testSetHomeDbManager(dbManager: HomeDBManager) {
@ -437,6 +450,10 @@ export class DocManager extends EventEmitter {
log.error('DocManager had problem shutting down storage: %s', err.message);
}
// Clear any timeouts we might have.
this._inRecovery.clear();
this._inTimingOn.clear();
// Clear the setInterval that the pidusage module sets up internally.
pidusage.clear();
}
@ -601,7 +618,10 @@ export class DocManager extends EventEmitter {
const doc = await this._getDoc(docSession, docName);
// Get URL for document for use with SELF_HYPERLINK().
const docUrls = doc && await this._getDocUrls(doc);
return new ActiveDoc(this, docName, {...docUrls, safeMode, doc});
const activeDoc = new ActiveDoc(this, docName, {...docUrls, safeMode, doc});
// Restore the timing mode of the document.
activeDoc.isTimingOn = this._inTimingOn.get(docName) || false;
return activeDoc;
}
/**

View File

@ -132,6 +132,8 @@ export class DocWorker {
getUsersForViewAs: activeDocMethod.bind(null, 'viewers', 'getUsersForViewAs'),
getAccessToken: activeDocMethod.bind(null, 'viewers', 'getAccessToken'),
getShare: activeDocMethod.bind(null, 'owners', 'getShare'),
startTiming: activeDocMethod.bind(null, 'owners', 'startTiming'),
stopTiming: activeDocMethod.bind(null, 'owners', 'stopTiming'),
});
}

View File

@ -36,6 +36,7 @@ import sandbox
import schema
from schema import RecalcWhen
import table as table_module
from timing import DummyTiming
from user import User # pylint:disable=wrong-import-order
import useractions
import column
@ -263,6 +264,8 @@ class Engine(object):
# make multiple different requests without needing to keep all the responses in memory.
self._cached_request_keys = set()
self._timing = DummyTiming()
@property
def autocomplete_context(self):
# See the comment on _autocomplete_context in __init__ above.
@ -969,6 +972,7 @@ class Engine(object):
assert not cycle
record = AttributeRecorder(record, "rec", record_attributes)
value = None
with self._timing.measure(col.node):
try:
if cycle:
raise depend.CircularRefError("Circular Reference")
@ -1004,6 +1008,7 @@ class Engine(object):
# Now we can raise the order error, if there was one. Cell evaluation will be reordered
# in response.
if order_error:
self._timing.mark("order_error")
self._cell_required_error = None
raise order_error # pylint: disable=raising-bad-type

View File

@ -5,6 +5,8 @@ and starts the grist sandbox. See engine.py for the API documentation.
import os
import random
import sys
from timing import DummyTiming, Timing
sys.path.append('thirdparty')
# pylint: disable=wrong-import-position
@ -158,6 +160,20 @@ def run(sandbox):
def evaluate_formula(table_id, col_id, row_id):
return formula_prompt.evaluate_formula(eng, table_id, col_id, row_id)
@export
def start_timing():
eng._timing = Timing()
@export
def stop_timing():
stats = eng._timing.get()
eng._timing = DummyTiming()
return stats
@export
def get_timings():
return eng._timing.get()
export(parse_acl_formula)
export(eng.load_empty)
export(eng.load_done)

115
sandbox/grist/timing.py Normal file
View File

@ -0,0 +1,115 @@
import contextlib
import time
import six
class Timing(object):
def __init__(self):
self._items = {}
self._marks_stack = []
@contextlib.contextmanager
def measure(self, key):
start = time.time()
stack_start_len = len(self._marks_stack)
try:
yield
finally:
end = time.time()
self._record_time(key, end - start)
# Handle the marks added while in this invocation.
n = len(self._marks_stack) - stack_start_len
if n > 0:
next_mark = ("end", end)
while n > 0:
mark = self._marks_stack.pop()
self._record_time("{}@{}={}:{}".format(key, n, mark[0], next_mark[0]),
next_mark[1] - mark[1])
next_mark = mark
n -= 1
self._record_time("{}@{}={}:{}".format(key, n, "start", next_mark[0]), next_mark[1] - start)
def mark(self, mark_name):
self._marks_stack.append((mark_name, time.time()))
def get(self, clear = True):
# Copy it and clear immediately if requested.
timing_log = self._items.copy()
if clear:
self.clear()
# Stats will contain a json like structure with table_id, col_id, sum, count, average, max
# and optionally a array of marks (in similar format)
stats = []
for key, t in sorted(timing_log.items(), key=lambda x: str(x[0])):
# Key can be either a node (tuple with table_id and col_id) or a string with a mark.
# The list is sorted so, we always first get the stats for the node and then the marks.
# We will add marks to the last node.
if isinstance(key, tuple):
stats.append({"tableId": key[0], "colId": key[1], "sum": t.sum, "count": t.count,
"average": t.average, "max": t.max})
else:
# Create a marks array for the last node or append to the existing one.
if stats:
prev = stats[-1].get("marks", [])
stats[-1]["marks"] = prev + [{
"name": key, "sum": t.sum,
"count": t.count, "average": t.average,
"max": t.max
}]
return stats
def dump(self):
out = []
for key, t in sorted(self._items.items(), key=lambda x: str(x[0])):
out.append("%6d, %10f, %10f, %10f, %s" % (t.count, t.average, t.max, t.sum, key))
print("Timing\n" + "\n".join(out))
self.clear()
def _record_time(self, key, time_sec):
t = self._items.get(key)
if not t:
t = self._items[key] = TimingStats()
t.add(time_sec)
def clear(self):
self._items.clear()
# An implementation that adds minimal overhead.
class DummyTiming(object):
# pylint: disable=no-self-use,unused-argument,no-member
def measure(self, key):
if six.PY2:
return contextlib.nested()
return contextlib.nullcontext()
def mark(self, mark_name):
pass
def dump(self):
pass
def get(self, clear = True):
return []
def clear(self):
pass
class TimingStats(object):
def __init__(self):
self.count = 0
self.sum = 0
self.max = 0
@property
def average(self):
return self.sum / self.count if self.count > 0 else 0
def add(self, value):
self.count += 1
self.sum += value
if value > self.max:
self.max = value

View File

@ -9,11 +9,12 @@ import {openClient} from 'test/server/gristClient';
import * as testUtils from 'test/server/testUtils';
const chimpy = configForUser('Chimpy');
const kiwi = configForUser('Kiwi');
describe('DocApi2', function() {
this.timeout(40000);
let server: TestServer;
let serverUrl: string;
let homeUrl: string;
let owner: UserAPI;
let wsId: number;
testUtils.setTmpLogLevel('error');
@ -30,7 +31,7 @@ describe('DocApi2', function() {
}
server = new TestServer(this);
serverUrl = await server.start(['home', 'docs']);
homeUrl = await server.start(['home', 'docs']);
const api = await server.createHomeApi('chimpy', 'docs', true);
await api.newOrg({name: 'testy', domain: 'testy'});
owner = await server.createHomeApi('chimpy', 'testy', true);
@ -63,7 +64,7 @@ describe('DocApi2', function() {
assert.equal(await fse.pathExists(forkPath2), true);
// Delete the trunk via API.
const deleteDocResponse = await axios.delete(`${serverUrl}/api/docs/${docId}`, chimpy);
const deleteDocResponse = await axios.delete(`${homeUrl}/api/docs/${docId}`, chimpy);
assert.equal(deleteDocResponse.status, 200);
// Check that files for the trunk and forks were deleted.
@ -72,4 +73,166 @@ describe('DocApi2', function() {
assert.equal(await fse.pathExists(forkPath2), false);
});
});
describe('/docs/{did}/timing', async () => {
let docId: string;
before(async function() {
docId = await owner.newDoc({name: 'doc2'}, wsId);
});
after(async function() {
await owner.deleteDoc(docId);
});
// There are two endpoints here /timing/start and /timing/stop.
// Here we just test that it is operational, available only for owners
// and that it returns sane results. Exact tests are done in python.
// Smoke test.
it('POST /docs/{did}/timing smoke tests', async function() {
// We are disabled.
let resp = await axios.get(`${homeUrl}/api/docs/${docId}/timing`, chimpy);
assert.equal(resp.status, 200);
assert.deepEqual(resp.data, {status: 'disabled'});
// Start it.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/start`, {}, chimpy);
assert.equal(resp.status, 200);
// Stop it.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/stop`, {}, chimpy);
assert.equal(resp.status, 200);
assert.deepEqual(resp.data, []);
});
it('POST /docs/{did}/timing/start', async function() {
// Start timing as non owner, should fail.
let resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/start`, {}, kiwi);
assert.equal(resp.status, 403);
// Query status as non owner, should fail.
resp = await axios.get(`${homeUrl}/api/docs/${docId}/timing`, kiwi);
assert.equal(resp.status, 403);
// Check as owner.
resp = await axios.get(`${homeUrl}/api/docs/${docId}/timing`, chimpy);
assert.equal(resp.status, 200);
assert.deepEqual(resp.data, {status: 'disabled'});
// Start timing as owner.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/start`, {}, chimpy);
assert.equal(resp.status, 200);
// Check we are started.
resp = await axios.get(`${homeUrl}/api/docs/${docId}/timing`, chimpy);
assert.equal(resp.status, 200);
assert.deepEqual(resp.data, {status: 'active', timing: []});
// Starting timing again works as expected, returns 400 as this is already.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/start`, {}, chimpy);
assert.equal(resp.status, 400);
// As non owner
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/stop`, {}, kiwi);
assert.equal(resp.status, 403);
});
it('POST /docs/{did}/timing/stop', async function() {
// Timings are turned on, so we can stop them.
// First as non owner, we should fail.
let resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/stop`, {}, kiwi);
assert.equal(resp.status, 403);
// Next as owner.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/stop`, {}, chimpy);
assert.equal(resp.status, 200);
// Now do it once again, we should got 400, as we are not timing.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/stop`, {}, chimpy);
assert.equal(resp.status, 400);
});
it('GET /docs/{did}/timing', async function() {
// Now we can check the results. Start timing and check that we got [] in response.
let resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/start`, {}, chimpy);
assert.equal(resp.status, 200);
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/stop`, {}, chimpy);
assert.equal(resp.status, 200);
assert.deepEqual(resp.data, []);
// Now create a table with a formula column and make sure we see it in the results.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/apply`, [
['AddTable', 'Timings', [
{id: 'A', formula: '$id' }
]],
], chimpy);
assert.equal(resp.status, 200);
// Now start it again,
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/start`, {}, chimpy);
assert.equal(resp.status, 200);
// Make sure we see that it is active and we have some intermediate results
resp = await axios.get(`${homeUrl}/api/docs/${docId}/timing`, chimpy);
assert.equal(resp.status, 200);
assert.deepEqual(resp.data, {status: 'active', timing: []});
// And trigger some formula calculations.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/apply`, [
['BulkAddRecord', 'Timings', [null, null], {}],
], chimpy);
assert.equal(resp.status, 200, JSON.stringify(resp.data));
// Make sure we can't stop it as non owner.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/stop`, {}, kiwi);
assert.equal(resp.status, 403);
// Now stop it as owner and make sure the result is sane.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/stop`, {}, chimpy);
assert.equal(resp.status, 200, JSON.stringify(resp.data));
const data = resp.data as Array<{
tableId: string;
colId: string;
sum: number;
count: number;
average: number;
max: number;
markers?: Array<{
name: string;
sum: number;
count: number;
average: number;
max: number;
}>
}>;
assert.isAbove(data.length, 0);
assert.equal(data[0].tableId, 'Timings');
assert.isTrue(typeof data[0].sum === 'number');
assert.isTrue(typeof data[0].count === 'number');
assert.isTrue(typeof data[0].average === 'number');
assert.isTrue(typeof data[0].max === 'number');
});
it('POST /docs/{did}/timing/start remembers state after reload', async function() {
// Make sure we are off.
let resp = await axios.get(`${homeUrl}/api/docs/${docId}/timing`, chimpy);
assert.equal(resp.status, 200);
assert.deepEqual(resp.data, {status: 'disabled'});
// Now start it.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/timing/start`, {}, chimpy);
assert.equal(resp.status, 200);
// Now reload document.
resp = await axios.post(`${homeUrl}/api/docs/${docId}/force-reload`, {}, chimpy);
assert.equal(resp.status, 200);
// And check that we are still on.
resp = await axios.get(`${homeUrl}/api/docs/${docId}/timing`, chimpy);
assert.equal(resp.status, 200, JSON.stringify(resp.data));
assert.deepEqual(resp.data, {status: 'active', timing: []});
});
});
});