mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) Perform migrations of Grist schema using only metadata tables when possible.
Summary: Loading all user data to run a migration is risky (creates more than usual memory pressure), and almost never needed (only one migration requires it). This diff attempts to run migrations using only metadata (_grist_* tables), but retries if the sandbox tells it that all data is needed. The intent is for new migrations to avoid needing all data. Test Plan: Added a somewhat contrived unittest. Reviewers: paulfitz Reviewed By: paulfitz Differential Revision: https://phab.getgrist.com/D2659
This commit is contained in:
parent
5a9fe0ea27
commit
5b2de988b5
@ -27,7 +27,7 @@ import {toTableDataAction} from 'app/common/DocActions';
|
|||||||
import {DocData} from 'app/common/DocData';
|
import {DocData} from 'app/common/DocData';
|
||||||
import {DocSnapshots} from 'app/common/DocSnapshot';
|
import {DocSnapshots} from 'app/common/DocSnapshot';
|
||||||
import {EncActionBundleFromHub} from 'app/common/EncActionBundle';
|
import {EncActionBundleFromHub} from 'app/common/EncActionBundle';
|
||||||
import {byteString} from 'app/common/gutil';
|
import {byteString, countIf} from 'app/common/gutil';
|
||||||
import {InactivityTimer} from 'app/common/InactivityTimer';
|
import {InactivityTimer} from 'app/common/InactivityTimer';
|
||||||
import * as marshal from 'app/common/marshal';
|
import * as marshal from 'app/common/marshal';
|
||||||
import {Peer} from 'app/common/sharing';
|
import {Peer} from 'app/common/sharing';
|
||||||
@ -1125,13 +1125,44 @@ export class ActiveDoc extends EventEmitter {
|
|||||||
* collaborators.
|
* collaborators.
|
||||||
*/
|
*/
|
||||||
private async _migrate(docSession: OptDocSession): Promise<void> {
|
private async _migrate(docSession: OptDocSession): Promise<void> {
|
||||||
// TODO fetchAllTables() creates more memory pressure than usual since full data is present in
|
const tableNames = await this.docStorage.getAllTableNames();
|
||||||
// memory at once both in node and in Python. This is liable to cause crashes. This doesn't
|
|
||||||
// even skip onDemand tables. We should try to migrate using metadata only. Data migrations
|
// Fetch only metadata tables first, and try to migrate with only those.
|
||||||
// would need to be done table-by-table, and differently still for on-demand tables.
|
const tableData: {[key: string]: Buffer|null} = {};
|
||||||
const allTables = await this.docStorage.fetchAllTables();
|
for (const tableName of tableNames) {
|
||||||
const docActions: DocAction[] = await this._dataEngine.pyCall('create_migrations', allTables);
|
if (tableName.startsWith('_grist_')) {
|
||||||
this.logInfo(docSession, "_migrate: applying %d migration actions", docActions.length);
|
tableData[tableName] = await this.docStorage.fetchTable(tableName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let docActions: DocAction[];
|
||||||
|
try {
|
||||||
|
// The last argument tells create_migrations() that only metadata is included.
|
||||||
|
docActions = await this._dataEngine.pyCall('create_migrations', tableData, true);
|
||||||
|
} catch (e) {
|
||||||
|
if (!/need all tables/.test(e.message)) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
// If the migration failed because it needs all tables (i.e. involves changes to data), then
|
||||||
|
// fetch them all. TODO: This is used for some older migrations, and is relied on by tests.
|
||||||
|
// If a new migration needs this flag, more work is needed. The current approach creates
|
||||||
|
// more memory pressure than usual since full data is present in memory at once both in node
|
||||||
|
// and in Python; and it doesn't skip onDemand tables. This is liable to cause crashes.
|
||||||
|
this.logWarn(docSession, "_migrate: retrying with all tables");
|
||||||
|
for (const tableName of tableNames) {
|
||||||
|
if (!tableData[tableName] && !tableName.startsWith('_gristsys_')) {
|
||||||
|
tableData[tableName] = await this.docStorage.fetchTable(tableName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
docActions = await this._dataEngine.pyCall('create_migrations', tableData);
|
||||||
|
}
|
||||||
|
|
||||||
|
const processedTables = Object.keys(tableData);
|
||||||
|
const numSchema = countIf(processedTables, t => t.startsWith("_grist_"));
|
||||||
|
const numUser = countIf(processedTables, t => !t.startsWith("_grist_"));
|
||||||
|
this.logInfo(docSession, "_migrate: applying %d migration actions (processed %s schema, %s user tables)",
|
||||||
|
docActions.length, numSchema, numUser);
|
||||||
|
|
||||||
docActions.forEach((action, i) => this.logInfo(docSession, "_migrate: docAction %s: %s", i, shortDesc(action)));
|
docActions.forEach((action, i) => this.logInfo(docSession, "_migrate: docAction %s: %s", i, shortDesc(action)));
|
||||||
await this.docStorage.execTransaction(() => this.docStorage.applyStoredActions(docActions));
|
await this.docStorage.execTransaction(() => this.docStorage.applyStoredActions(docActions));
|
||||||
}
|
}
|
||||||
|
@ -714,22 +714,11 @@ export class DocStorage implements ISQLiteDB {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetches all tables from the database, and returns a dictionary mapping table names to encoded
|
* Fetches and returns the names of all tables in the database (including _gristsys_ tables).
|
||||||
* TableData objects (marshalled dicts mapping column ids to arrays of values).
|
|
||||||
*/
|
*/
|
||||||
public fetchAllTables(): Promise<{[key: string]: Buffer|null}> {
|
public async getAllTableNames(): Promise<string[]> {
|
||||||
const tables: {[key: string]: Buffer|null} = {};
|
const rows = await this.all("SELECT name FROM sqlite_master WHERE type='table'");
|
||||||
return bluebird.Promise.each(
|
return rows.map(row => row.name);
|
||||||
this.all("SELECT name FROM sqlite_master WHERE type='table'"),
|
|
||||||
(row: ResultRow) => {
|
|
||||||
if (!row.name.startsWith('_gristsys_')) {
|
|
||||||
return this.fetchTable(row.name)
|
|
||||||
.then(data => {
|
|
||||||
tables[row.name] = data;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.return(tables);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,9 +97,9 @@ def main():
|
|||||||
return eng.load_table(table_data_from_db(table_name, table_data))
|
return eng.load_table(table_data_from_db(table_name, table_data))
|
||||||
|
|
||||||
@export
|
@export
|
||||||
def create_migrations(all_tables):
|
def create_migrations(all_tables, metadata_only=False):
|
||||||
doc_actions = migrations.create_migrations(
|
doc_actions = migrations.create_migrations(
|
||||||
{t: table_data_from_db(t, data) for t, data in all_tables.iteritems()})
|
{t: table_data_from_db(t, data) for t, data in all_tables.iteritems()}, metadata_only)
|
||||||
return map(actions.get_action_repr, doc_actions)
|
return map(actions.get_action_repr, doc_actions)
|
||||||
|
|
||||||
@export
|
@export
|
||||||
|
@ -28,12 +28,18 @@ all_migrations = {}
|
|||||||
def noop_migration(_all_tables):
|
def noop_migration(_all_tables):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Each migration function includes a .need_all_tables attribute. See migration() decorator.
|
||||||
|
noop_migration.need_all_tables = False
|
||||||
|
|
||||||
def create_migrations(all_tables):
|
|
||||||
|
def create_migrations(all_tables, metadata_only=False):
|
||||||
"""
|
"""
|
||||||
Creates and returns a list of DocActions needed to bring this document to
|
Creates and returns a list of DocActions needed to bring this document to
|
||||||
schema.SCHEMA_VERSION. It requires as input all of the documents tables.
|
schema.SCHEMA_VERSION.
|
||||||
all_tables: all tables as a dictionary mapping table name to TableData.
|
all_tables: all tables or just the metadata tables (those named with _grist_ prefix) as a
|
||||||
|
dictionary mapping table name to TableData.
|
||||||
|
metadata_only: should be set if only metadata tables are passed in. If ALL tables are
|
||||||
|
required to process migrations, this method will raise a "need all tables..." exception.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
doc_version = all_tables['_grist_DocInfo'].columns["schemaVersion"][0]
|
doc_version = all_tables['_grist_DocInfo'].columns["schemaVersion"][0]
|
||||||
@ -57,6 +63,7 @@ def create_migrations(all_tables):
|
|||||||
new_schema = {a.table_id: a for a in schema.schema_create_actions()}
|
new_schema = {a.table_id: a for a in schema.schema_create_actions()}
|
||||||
for table_id, data in sorted(all_tables.iteritems()):
|
for table_id, data in sorted(all_tables.iteritems()):
|
||||||
# User tables should already be in tdset; the rest must be metadata tables.
|
# User tables should already be in tdset; the rest must be metadata tables.
|
||||||
|
# (If metadata_only is true, there is simply nothing to skip here.)
|
||||||
if table_id not in tdset.all_tables:
|
if table_id not in tdset.all_tables:
|
||||||
new_col_info = {}
|
new_col_info = {}
|
||||||
if table_id in new_schema:
|
if table_id in new_schema:
|
||||||
@ -71,6 +78,9 @@ def create_migrations(all_tables):
|
|||||||
|
|
||||||
migration_actions = []
|
migration_actions = []
|
||||||
for version in xrange(doc_version + 1, schema.SCHEMA_VERSION + 1):
|
for version in xrange(doc_version + 1, schema.SCHEMA_VERSION + 1):
|
||||||
|
migration_func = all_migrations.get(version, noop_migration)
|
||||||
|
if migration_func.need_all_tables and metadata_only:
|
||||||
|
raise Exception("need all tables for migration to %s" % version)
|
||||||
migration_actions.extend(all_migrations.get(version, noop_migration)(tdset))
|
migration_actions.extend(all_migrations.get(version, noop_migration)(tdset))
|
||||||
|
|
||||||
# Note that if we are downgrading versions (i.e. doc_version is higher), then the following is
|
# Note that if we are downgrading versions (i.e. doc_version is higher), then the following is
|
||||||
@ -86,12 +96,19 @@ def get_last_migration_version():
|
|||||||
"""
|
"""
|
||||||
return max(all_migrations.iterkeys())
|
return max(all_migrations.iterkeys())
|
||||||
|
|
||||||
def migration(schema_version):
|
def migration(schema_version, need_all_tables=False):
|
||||||
"""
|
"""
|
||||||
Decorator for migrations that associates the decorated migration function with the given
|
Decorator for migrations that associates the decorated migration function with the given
|
||||||
schema_version. This decorate function will be run to migrate forward to schema_version.
|
schema_version. This decorated function will be run to migrate forward to schema_version.
|
||||||
|
|
||||||
|
Migrations are first attempted with only metadata tables, but if any required migration function
|
||||||
|
is marked with need_all_tables=True, then the migration will be retried with all tables.
|
||||||
|
|
||||||
|
NOTE: new migrations should NOT set need_all_tables=True; it would require more work to process
|
||||||
|
very large documents safely (incuding those containing on-demand tables).
|
||||||
"""
|
"""
|
||||||
def add_migration(migration_func):
|
def add_migration(migration_func):
|
||||||
|
migration_func.need_all_tables = need_all_tables
|
||||||
all_migrations[schema_version] = migration_func
|
all_migrations[schema_version] = migration_func
|
||||||
return migration_func
|
return migration_func
|
||||||
return add_migration
|
return add_migration
|
||||||
@ -653,7 +670,9 @@ def migration16(tdset):
|
|||||||
|
|
||||||
return tdset.apply_doc_actions(doc_actions)
|
return tdset.apply_doc_actions(doc_actions)
|
||||||
|
|
||||||
@migration(schema_version=17)
|
# This is actually the only migration that requires all tables because it modifies user data
|
||||||
|
# (specifically, any columns of the deprecated "Image" type).
|
||||||
|
@migration(schema_version=17, need_all_tables=True)
|
||||||
def migration17(tdset):
|
def migration17(tdset):
|
||||||
"""
|
"""
|
||||||
There is no longer an "Image" type for columns, as "Attachments" now serves as a
|
There is no longer an "Image" type for columns, as "Attachments" now serves as a
|
||||||
|
Loading…
Reference in New Issue
Block a user