(core) Log statistics about table sizes

Summary:
Record numbers of rows, columns, cells, and bytes of marshalled data for most calls to table_data_from_db

Export new function get_table_stats in the sandbox, which gives the raw numbers and totals.

Get and log these stats in ActiveDoc right after loading tables, before Calculate, so they are logged even in case of errors.

Tweak logging about number of tables, especially number of on-demand tables, to not only show in debug logging.

Test Plan: Updated doc regression tests, that shows what the data looks like nicely.

Reviewers: dsagal, paulfitz

Reviewed By: dsagal

Differential Revision: https://phab.getgrist.com/D3081
This commit is contained in:
Alex Hall 2021-10-21 15:03:37 +02:00
parent f2e11a5329
commit e900f39da3
3 changed files with 63 additions and 13 deletions

View File

@ -432,12 +432,12 @@ export class ActiveDoc extends EventEmitter {
},
});
}
const tableNames = await this._loadOpenDoc(docSession);
const [tableNames, onDemandNames] = await this._loadOpenDoc(docSession);
const desiredTableNames = tableNames.filter(name => name.startsWith('_grist_'));
this._startLoadingTables(docSession, desiredTableNames);
const pendingTableNames = tableNames.filter(name => !name.startsWith('_grist_'));
await this._initDoc(docSession);
this._initializationPromise = this._finishInitialization(docSession, pendingTableNames, startTime);
this._initializationPromise = this._finishInitialization(docSession, pendingTableNames, onDemandNames, startTime);
} catch (err) {
await this.shutdown();
throw err;
@ -1305,7 +1305,7 @@ export class ActiveDoc extends EventEmitter {
/**
* Loads an open document from DocStorage. Returns a list of the tables it contains.
*/
protected async _loadOpenDoc(docSession: OptDocSession): Promise<string[]> {
protected async _loadOpenDoc(docSession: OptDocSession): Promise<string[][]> {
// Check the schema version of document and sandbox, and migrate if the sandbox is newer.
const schemaVersion = SCHEMA_VERSION;
@ -1350,12 +1350,12 @@ export class ActiveDoc extends EventEmitter {
const onDemandMap = zipObject(tablesParsed.tableId as string[], tablesParsed.onDemand);
const onDemandNames = remove(tableNames, (t) => onDemandMap[t]);
this.logDebug(docSession, "found %s tables: %s", tableNames.length,
tableNames.join(", "));
this.logDebug(docSession, "skipping %s on-demand tables: %s", onDemandNames.length,
onDemandNames.join(", "));
this.logInfo(docSession, "Loading %s normal tables, skipping %s on-demand tables",
tableNames.length, onDemandNames.length);
this.logDebug(docSession, "Normal tables: %s", tableNames.join(", "));
this.logDebug(docSession, "On-demand tables: %s", onDemandNames.join(", "));
return tableNames;
return [tableNames, onDemandNames];
}
/**
@ -1567,11 +1567,21 @@ export class ActiveDoc extends EventEmitter {
// inactivityTimer, since a user formulas with an infinite loop can disable it forever.
// TODO find a solution to this issue.
@ActiveDoc.keepDocOpen
private async _finishInitialization(docSession: OptDocSession, pendingTableNames: string[], startTime: number) {
private async _finishInitialization(
docSession: OptDocSession, pendingTableNames: string[], onDemandNames: string[], startTime: number
) {
try {
await this._tableMetadataLoader.wait();
await this._tableMetadataLoader.clean();
await this._loadTables(docSession, pendingTableNames);
const tableStats = await this._pyCall('get_table_stats');
log.rawInfo("Loading complete, table statistics retrieved...", {
...this.getLogMeta(docSession),
...tableStats,
num_on_demand_tables: onDemandNames.length,
});
if (this._options?.docUrl) {
await this._pyCall('set_doc_url', this._options.docUrl);
}

View File

@ -10,7 +10,7 @@ import rlcompleter
import sys
import time
import traceback
from collections import namedtuple, OrderedDict, Hashable
from collections import namedtuple, OrderedDict, Hashable, defaultdict
import six
from six.moves import zip
@ -234,6 +234,37 @@ class Engine(object):
# Initial empty context for autocompletions; we update it when we generate the usercode module.
self._autocomplete_context = AutocompleteContext({})
self._table_stats = {"meta": [], "user": []}
def record_table_stats(self, table_data, table_data_repr):
table_id = table_data.table_id
category = "meta" if table_id.startswith("_grist") else "user"
result = dict(
rows=len(table_data.row_ids),
columns=len(table_data.columns),
bytes=len(table_data_repr or ""),
table_id=table_id,
)
result["cells"] = result["rows"] * result["columns"]
self._table_stats[category].append(result)
def get_table_stats(self):
result = defaultdict(int, num_user_tables=len(self._table_stats["user"]))
for table in self._table_stats["meta"]:
for field in ["rows", "bytes"]:
key = "%s_%s" % (table["table_id"], field)
result[key] = table[field]
for table in self._table_stats["user"]:
for field in table:
if field == "table_id":
continue
key = "user_%s" % field
result[key] += table[field]
return dict(result)
def load_empty(self):
"""
Initialize an empty document, e.g. a newly-created one.

View File

@ -56,6 +56,11 @@ def run(sandbox):
sandbox.register(method.__name__, wrapper)
def load_and_record_table_data(table_name, table_data_repr):
result = table_data_from_db(table_name, table_data_repr)
eng.record_table_stats(result, table_data_repr)
return result
@export
def apply_user_actions(action_reprs, user=None):
action_group = eng.apply_user_actions([useractions.from_repr(u) for u in action_reprs], user)
@ -84,12 +89,16 @@ def run(sandbox):
@export
def load_meta_tables(meta_tables, meta_columns):
return eng.load_meta_tables(table_data_from_db("_grist_Tables", meta_tables),
table_data_from_db("_grist_Tables_column", meta_columns))
return eng.load_meta_tables(load_and_record_table_data("_grist_Tables", meta_tables),
load_and_record_table_data("_grist_Tables_column", meta_columns))
@export
def load_table(table_name, table_data):
return eng.load_table(table_data_from_db(table_name, table_data))
return eng.load_table(load_and_record_table_data(table_name, table_data))
@export
def get_table_stats():
return eng.get_table_stats()
@export
def create_migrations(all_tables, metadata_only=False):