From e900f39da303628d2a0641f39691812a25c9429f Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Thu, 21 Oct 2021 15:03:37 +0200 Subject: [PATCH] (core) Log statistics about table sizes Summary: Record numbers of rows, columns, cells, and bytes of marshalled data for most calls to table_data_from_db Export new function get_table_stats in the sandbox, which gives the raw numbers and totals. Get and log these stats in ActiveDoc right after loading tables, before Calculate, so they are logged even in case of errors. Tweak logging about number of tables, especially number of on-demand tables, to not only show in debug logging. Test Plan: Updated doc regression tests, that shows what the data looks like nicely. Reviewers: dsagal, paulfitz Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D3081 --- app/server/lib/ActiveDoc.ts | 28 +++++++++++++++++++--------- sandbox/grist/engine.py | 33 ++++++++++++++++++++++++++++++++- sandbox/grist/main.py | 15 ++++++++++++--- 3 files changed, 63 insertions(+), 13 deletions(-) diff --git a/app/server/lib/ActiveDoc.ts b/app/server/lib/ActiveDoc.ts index 447f9d9f..146956bb 100644 --- a/app/server/lib/ActiveDoc.ts +++ b/app/server/lib/ActiveDoc.ts @@ -432,12 +432,12 @@ export class ActiveDoc extends EventEmitter { }, }); } - const tableNames = await this._loadOpenDoc(docSession); + const [tableNames, onDemandNames] = await this._loadOpenDoc(docSession); const desiredTableNames = tableNames.filter(name => name.startsWith('_grist_')); this._startLoadingTables(docSession, desiredTableNames); const pendingTableNames = tableNames.filter(name => !name.startsWith('_grist_')); await this._initDoc(docSession); - this._initializationPromise = this._finishInitialization(docSession, pendingTableNames, startTime); + this._initializationPromise = this._finishInitialization(docSession, pendingTableNames, onDemandNames, startTime); } catch (err) { await this.shutdown(); throw err; @@ -1305,7 +1305,7 @@ export class ActiveDoc extends EventEmitter { /** * Loads an open document from DocStorage. Returns a list of the tables it contains. */ - protected async _loadOpenDoc(docSession: OptDocSession): Promise { + protected async _loadOpenDoc(docSession: OptDocSession): Promise { // Check the schema version of document and sandbox, and migrate if the sandbox is newer. const schemaVersion = SCHEMA_VERSION; @@ -1350,12 +1350,12 @@ export class ActiveDoc extends EventEmitter { const onDemandMap = zipObject(tablesParsed.tableId as string[], tablesParsed.onDemand); const onDemandNames = remove(tableNames, (t) => onDemandMap[t]); - this.logDebug(docSession, "found %s tables: %s", tableNames.length, - tableNames.join(", ")); - this.logDebug(docSession, "skipping %s on-demand tables: %s", onDemandNames.length, - onDemandNames.join(", ")); + this.logInfo(docSession, "Loading %s normal tables, skipping %s on-demand tables", + tableNames.length, onDemandNames.length); + this.logDebug(docSession, "Normal tables: %s", tableNames.join(", ")); + this.logDebug(docSession, "On-demand tables: %s", onDemandNames.join(", ")); - return tableNames; + return [tableNames, onDemandNames]; } /** @@ -1567,11 +1567,21 @@ export class ActiveDoc extends EventEmitter { // inactivityTimer, since a user formulas with an infinite loop can disable it forever. // TODO find a solution to this issue. @ActiveDoc.keepDocOpen - private async _finishInitialization(docSession: OptDocSession, pendingTableNames: string[], startTime: number) { + private async _finishInitialization( + docSession: OptDocSession, pendingTableNames: string[], onDemandNames: string[], startTime: number + ) { try { await this._tableMetadataLoader.wait(); await this._tableMetadataLoader.clean(); await this._loadTables(docSession, pendingTableNames); + + const tableStats = await this._pyCall('get_table_stats'); + log.rawInfo("Loading complete, table statistics retrieved...", { + ...this.getLogMeta(docSession), + ...tableStats, + num_on_demand_tables: onDemandNames.length, + }); + if (this._options?.docUrl) { await this._pyCall('set_doc_url', this._options.docUrl); } diff --git a/sandbox/grist/engine.py b/sandbox/grist/engine.py index 63f41079..e3e9498b 100644 --- a/sandbox/grist/engine.py +++ b/sandbox/grist/engine.py @@ -10,7 +10,7 @@ import rlcompleter import sys import time import traceback -from collections import namedtuple, OrderedDict, Hashable +from collections import namedtuple, OrderedDict, Hashable, defaultdict import six from six.moves import zip @@ -234,6 +234,37 @@ class Engine(object): # Initial empty context for autocompletions; we update it when we generate the usercode module. self._autocomplete_context = AutocompleteContext({}) + self._table_stats = {"meta": [], "user": []} + + def record_table_stats(self, table_data, table_data_repr): + table_id = table_data.table_id + category = "meta" if table_id.startswith("_grist") else "user" + result = dict( + rows=len(table_data.row_ids), + columns=len(table_data.columns), + bytes=len(table_data_repr or ""), + table_id=table_id, + ) + result["cells"] = result["rows"] * result["columns"] + self._table_stats[category].append(result) + + def get_table_stats(self): + result = defaultdict(int, num_user_tables=len(self._table_stats["user"])) + + for table in self._table_stats["meta"]: + for field in ["rows", "bytes"]: + key = "%s_%s" % (table["table_id"], field) + result[key] = table[field] + + for table in self._table_stats["user"]: + for field in table: + if field == "table_id": + continue + key = "user_%s" % field + result[key] += table[field] + + return dict(result) + def load_empty(self): """ Initialize an empty document, e.g. a newly-created one. diff --git a/sandbox/grist/main.py b/sandbox/grist/main.py index e1bba677..b5331413 100644 --- a/sandbox/grist/main.py +++ b/sandbox/grist/main.py @@ -56,6 +56,11 @@ def run(sandbox): sandbox.register(method.__name__, wrapper) + def load_and_record_table_data(table_name, table_data_repr): + result = table_data_from_db(table_name, table_data_repr) + eng.record_table_stats(result, table_data_repr) + return result + @export def apply_user_actions(action_reprs, user=None): action_group = eng.apply_user_actions([useractions.from_repr(u) for u in action_reprs], user) @@ -84,12 +89,16 @@ def run(sandbox): @export def load_meta_tables(meta_tables, meta_columns): - return eng.load_meta_tables(table_data_from_db("_grist_Tables", meta_tables), - table_data_from_db("_grist_Tables_column", meta_columns)) + return eng.load_meta_tables(load_and_record_table_data("_grist_Tables", meta_tables), + load_and_record_table_data("_grist_Tables_column", meta_columns)) @export def load_table(table_name, table_data): - return eng.load_table(table_data_from_db(table_name, table_data)) + return eng.load_table(load_and_record_table_data(table_name, table_data)) + + @export + def get_table_stats(): + return eng.get_table_stats() @export def create_migrations(all_tables, metadata_only=False):