mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) Fuller guessing of type and options when adding first data to blank columns
Summary: Adds `common/ValueGuesser.ts` with logic for guessing column type and widget options (only for dates/datetimes) from an array of strings, and converting the strings to the guessed type in a lossless manner, so that converting back to Text gives the original values. Changes `_ensure_column_accepts_data` in Python to call an exported JS method using the new logic where possible. Test Plan: Added `test/common/ValueGuesser.ts` to unit test the core guessing logic and a DocApi end-to-end test for what happens to new columns. Reviewers: georgegevoian Reviewed By: georgegevoian Differential Revision: https://phab.getgrist.com/D3290
This commit is contained in:
parent
ae6c857ac5
commit
599545fb11
@ -105,7 +105,7 @@ export async function prepTransformColInfo(docModel: DocModel, origCol: ColumnRe
|
||||
const colValues = tableData.getColValues(sourceCol.colId()) || [];
|
||||
dateFormat = guessDateFormat(colValues.map(String)) || "YYYY-MM-DD";
|
||||
}
|
||||
widgetOptions = dateTimeWidgetOptions(dateFormat);
|
||||
widgetOptions = dateTimeWidgetOptions(dateFormat, true);
|
||||
break;
|
||||
}
|
||||
case 'Choice': {
|
||||
|
@ -3,13 +3,17 @@
|
||||
* subscribes to actions which change it, and forwards those actions to individual tables.
|
||||
* It also provides the interface to apply actions to data.
|
||||
*/
|
||||
import {DocumentSettings} from 'app/common/DocumentSettings';
|
||||
import {safeJsonParse} from 'app/common/gutil';
|
||||
import {schema, SchemaTypes} from 'app/common/schema';
|
||||
import fromPairs = require('lodash/fromPairs');
|
||||
import groupBy = require('lodash/groupBy');
|
||||
import {ActionDispatcher} from './ActionDispatcher';
|
||||
import {BulkColValues, ColInfo, ColInfoWithId, ColValues, DocAction,
|
||||
RowRecord, TableDataAction} from './DocActions';
|
||||
import {ColTypeMap, MetaTableData, TableData} from './TableData';
|
||||
import {
|
||||
BulkColValues, ColInfo, ColInfoWithId, ColValues, DocAction,
|
||||
RowRecord, TableDataAction
|
||||
} from './DocActions';
|
||||
import {ColTypeMap, MetaRowRecord, MetaTableData, TableData} from './TableData';
|
||||
|
||||
type FetchTableFunc = (tableId: string) => Promise<TableDataAction>;
|
||||
|
||||
@ -108,6 +112,15 @@ export class DocData extends ActionDispatcher {
|
||||
}
|
||||
}
|
||||
|
||||
public docInfo(): MetaRowRecord<'_grist_DocInfo'> {
|
||||
const docInfoTable = this.getMetaTable('_grist_DocInfo');
|
||||
return docInfoTable.getRecord(1)!;
|
||||
}
|
||||
|
||||
public docSettings(): DocumentSettings {
|
||||
return safeJsonParse(this.docInfo().documentSettings, {});
|
||||
}
|
||||
|
||||
// ---- The following methods implement ActionDispatcher interface ----
|
||||
|
||||
protected onAddTable(action: DocAction, tableId: string, columns: ColInfoWithId[]): void {
|
||||
|
155
app/common/ValueGuesser.ts
Normal file
155
app/common/ValueGuesser.ts
Normal file
@ -0,0 +1,155 @@
|
||||
import {DocData} from 'app/common/DocData';
|
||||
import {DocumentSettings} from 'app/common/DocumentSettings';
|
||||
import {dateTimeWidgetOptions, guessDateFormat} from 'app/common/parseDate';
|
||||
import {createFormatter} from 'app/common/ValueFormatter';
|
||||
import * as moment from 'moment-timezone';
|
||||
|
||||
interface GuessedColInfo {
|
||||
type: string;
|
||||
widgetOptions?: object;
|
||||
}
|
||||
|
||||
interface GuessResult {
|
||||
values?: any[];
|
||||
colInfo: GuessedColInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for guessing if an array of values should be interpreted as a specific column type.
|
||||
* T is the type of values that strings should be parsed to and is stored in the column.
|
||||
*/
|
||||
abstract class ValueGuesser<T> {
|
||||
/**
|
||||
* Guessed column type and maybe widget options.
|
||||
*/
|
||||
public abstract colInfo(): GuessedColInfo;
|
||||
|
||||
/**
|
||||
* Parse a single string to a typed value in such a way that formatting the value returns the original string.
|
||||
* If the string cannot be parsed, return the original string.
|
||||
*/
|
||||
public abstract parse(value: string): T | string;
|
||||
|
||||
/**
|
||||
* Attempt to parse at least 90% the string values losslessly according to the guessed colInfo.
|
||||
* Return null if this cannot be done.
|
||||
*/
|
||||
public guess(values: Array<string | null>, docSettings: DocumentSettings): GuessResult | null {
|
||||
const colInfo = this.colInfo();
|
||||
const {type, widgetOptions} = colInfo;
|
||||
const formatter = createFormatter(type, widgetOptions || {}, docSettings);
|
||||
const result: any[] = [];
|
||||
const maxUnparsed = values.length * 0.1; // max number of non-parsed strings to allow before giving up
|
||||
let unparsed = 0;
|
||||
|
||||
for (const value of values) {
|
||||
if (!value) {
|
||||
if (this.allowBlank()) {
|
||||
result.push(null);
|
||||
continue;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const parsed = this.parse(value);
|
||||
// Give up if too many strings failed to parse or if the parsed value changes when converted back to text
|
||||
if (typeof parsed === "string" && ++unparsed > maxUnparsed || formatter.formatAny(parsed) !== value) {
|
||||
return null;
|
||||
}
|
||||
result.push(parsed);
|
||||
}
|
||||
return {values: result, colInfo};
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this type of column can store nulls directly.
|
||||
*/
|
||||
protected allowBlank(): boolean {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
class BoolGuesser extends ValueGuesser<boolean> {
|
||||
public colInfo(): GuessedColInfo {
|
||||
return {type: 'Bool'};
|
||||
}
|
||||
|
||||
public parse(value: string): boolean | string {
|
||||
if (value === "true") {
|
||||
return true;
|
||||
} else if (value === "false") {
|
||||
return false;
|
||||
} else {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the only type that can't store nulls, it converts them to false.
|
||||
*/
|
||||
protected allowBlank(): boolean {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
class NumericGuesser extends ValueGuesser<number> {
|
||||
public colInfo(): GuessedColInfo {
|
||||
// TODO parse and guess options for formatted numbers, e.g. currency amounts
|
||||
return {type: 'Numeric'};
|
||||
}
|
||||
|
||||
public parse(value: string): number | string {
|
||||
const parsed = Number(value);
|
||||
return !isNaN(parsed) ? parsed : value;
|
||||
}
|
||||
}
|
||||
|
||||
class DateGuesser extends ValueGuesser<number> {
|
||||
// _format should be a full moment format string
|
||||
// _tz should be the document's default timezone
|
||||
constructor(private _format: string, private _tz: string) {
|
||||
super();
|
||||
}
|
||||
|
||||
public colInfo(): GuessedColInfo {
|
||||
const widgetOptions = dateTimeWidgetOptions(this._format, false);
|
||||
let type;
|
||||
if (widgetOptions.timeFormat) {
|
||||
type = 'DateTime:' + this._tz;
|
||||
} else {
|
||||
type = 'Date';
|
||||
this._tz = "UTC";
|
||||
}
|
||||
return {widgetOptions, type};
|
||||
}
|
||||
|
||||
// Note that this parsing is much stricter than parseDate to prevent loss of information.
|
||||
// Dates which can be parsed by parseDate based on the guessed widget options may not be parsed here.
|
||||
public parse(value: string): number | string {
|
||||
const m = moment.tz(value, this._format, true, this._tz);
|
||||
return m.isValid() ? m.valueOf() / 1000 : value;
|
||||
}
|
||||
}
|
||||
|
||||
export function guessColInfoWithDocData(values: Array<string | null>, docData: DocData) {
|
||||
return guessColInfo(values, docData.docSettings(), docData.docInfo().timezone);
|
||||
}
|
||||
|
||||
export function guessColInfo(
|
||||
values: Array<string | null>, docSettings: DocumentSettings, timezone: string
|
||||
): GuessResult {
|
||||
// Use short-circuiting of || to only do as much work as needed,
|
||||
// in particular not guessing date formats before trying other types.
|
||||
return (
|
||||
new BoolGuesser()
|
||||
.guess(values, docSettings) ||
|
||||
new NumericGuesser()
|
||||
.guess(values, docSettings) ||
|
||||
new DateGuesser(guessDateFormat(values, timezone) || "YYYY-MM-DD", timezone)
|
||||
.guess(values, docSettings) ||
|
||||
// Don't return the same values back if there's no conversion to be done,
|
||||
// as they have to be serialized and transferred over a pipe to Python.
|
||||
{colInfo: {type: 'Text'}}
|
||||
);
|
||||
}
|
@ -293,8 +293,6 @@ export function createParserOrFormatterArgumentsRaw(
|
||||
visibleColRef: number,
|
||||
): [string, object, DocumentSettings] {
|
||||
const columnsTable = docData.getMetaTable('_grist_Tables_column');
|
||||
const docInfoTable = docData.getMetaTable('_grist_DocInfo');
|
||||
|
||||
const widgetOpts = safeJsonParse(widgetOptions, {});
|
||||
|
||||
if (isFullReferencingType(type)) {
|
||||
@ -305,10 +303,7 @@ export function createParserOrFormatterArgumentsRaw(
|
||||
widgetOpts.tableData = docData.getTable(getReferencedTableId(type)!);
|
||||
}
|
||||
|
||||
const docInfo = docInfoTable.getRecord(1);
|
||||
const docSettings = safeJsonParse(docInfo!.documentSettings, {}) as DocumentSettings;
|
||||
|
||||
return [type, widgetOpts, docSettings];
|
||||
return [type, widgetOpts, docData.docSettings()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,6 +1,6 @@
|
||||
import escapeRegExp = require('lodash/escapeRegExp');
|
||||
import memoize = require('lodash/memoize');
|
||||
import {getDistinctValues} from 'app/common/gutil';
|
||||
import {getDistinctValues, isObject} from 'app/common/gutil';
|
||||
// Simply importing 'moment-guess' inconsistently imports bundle.js or bundle.esm.js depending on environment
|
||||
import * as guessFormat from '@gristlabs/moment-guess/dist/bundle.js';
|
||||
import * as moment from 'moment-timezone';
|
||||
@ -325,8 +325,9 @@ function standardizeTime(timeString: string): { remaining: string, time: string
|
||||
return {remaining: timeString.slice(0, match.index).trim(), time: `${hh}:${mm}:${ss}`};
|
||||
}
|
||||
|
||||
export function guessDateFormat(values: string[], timezone: string = 'UTC'): string | null {
|
||||
const sample = getDistinctValues(values, 100);
|
||||
export function guessDateFormat(values: Array<string | null>, timezone: string = 'UTC'): string | null {
|
||||
const dateStrings: string[] = values.filter(isObject);
|
||||
const sample = getDistinctValues(dateStrings, 100);
|
||||
const formats: Record<string, number> = {};
|
||||
for (const dateString of sample) {
|
||||
let guessed: string | string[];
|
||||
@ -348,7 +349,7 @@ export function guessDateFormat(values: string[], timezone: string = 'UTC'): str
|
||||
}
|
||||
|
||||
for (const format of formatKeys) {
|
||||
for (const dateString of values) {
|
||||
for (const dateString of dateStrings) {
|
||||
const m = moment.tz(dateString, format, true, timezone);
|
||||
if (m.isValid()) {
|
||||
formats[format] += 1;
|
||||
@ -380,10 +381,15 @@ export const timeFormatOptions = [
|
||||
'HH:mm:ss z',
|
||||
];
|
||||
|
||||
export function dateTimeWidgetOptions(fullFormat: string) {
|
||||
/**
|
||||
* Construct widget options for a Date or DateTime column based on a single moment string
|
||||
* which may or may not contain both date and time parts.
|
||||
* If defaultTimeFormat is true, fallback to a non-empty default time format when none is found in fullFormat.
|
||||
*/
|
||||
export function dateTimeWidgetOptions(fullFormat: string, defaultTimeFormat: boolean) {
|
||||
const index = fullFormat.match(/[hHkaAmsSzZT]|$/)!.index!;
|
||||
const dateFormat = fullFormat.substr(0, index).trim();
|
||||
const timeFormat = fullFormat.substr(index).trim() || timeFormatOptions[0];
|
||||
const timeFormat = fullFormat.substr(index).trim() || (defaultTimeFormat ? timeFormatOptions[0] : "");
|
||||
return {
|
||||
dateFormat,
|
||||
timeFormat,
|
||||
|
@ -42,6 +42,7 @@ import {MetaRowRecord} from 'app/common/TableData';
|
||||
import {FetchUrlOptions, UploadResult} from 'app/common/uploads';
|
||||
import {DocReplacementOptions, DocState, DocStateComparison} from 'app/common/UserAPI';
|
||||
import {convertFromColumn} from 'app/common/ValueConverter';
|
||||
import {guessColInfoWithDocData} from 'app/common/ValueGuesser';
|
||||
import {parseUserAction} from 'app/common/ValueParser';
|
||||
import {ParseOptions} from 'app/plugin/FileParserAPI';
|
||||
import {GristDocAPI} from 'app/plugin/GristAPI';
|
||||
@ -1724,6 +1725,8 @@ export class ActiveDoc extends EventEmitter {
|
||||
preferredPythonVersion,
|
||||
sandboxOptions: {
|
||||
exports: {
|
||||
guessColInfo: (values: Array<string | null>) =>
|
||||
guessColInfoWithDocData(values, this.docData!),
|
||||
convertFromColumn: (...args: Parameters<ReturnType<typeof convertFromColumn>>) =>
|
||||
convertFromColumn(this.docData!)(...args)
|
||||
}
|
||||
|
@ -205,7 +205,7 @@ class TestColumnActions(test_engine.EngineTestCase):
|
||||
Column(14, "manualSort", "ManualSortPos", False, "", 0),
|
||||
Column(15, "A", "Text", False, "", 0),
|
||||
Column(16, "B", "Text", False, "", 0),
|
||||
Column(17, "C", "Text", False, "", 0),
|
||||
Column(17, "C", "Any", True, "", 0),
|
||||
]),
|
||||
Table(3, "GristSummary_7_Address", 0, 1, columns=[
|
||||
Column(18, "state", "Text", False, "", summarySourceCol=12),
|
||||
@ -244,9 +244,9 @@ class TestColumnActions(test_engine.EngineTestCase):
|
||||
self.assertTableData('Address', data=self.address_table_data)
|
||||
self.assertTableData('Table1', data=[
|
||||
["id", "A", "B", "C", "manualSort"],
|
||||
[ 1, "a", "d", "", 1.0],
|
||||
[ 2, "b", "e", "", 2.0],
|
||||
[ 3, "c", "f", "", 3.0],
|
||||
[ 1, "a", "d", None, 1.0],
|
||||
[ 2, "b", "e", None, 2.0],
|
||||
[ 3, "c", "f", None, 3.0],
|
||||
])
|
||||
self.assertTableData("GristSummary_7_Address", cols="subset", data=[
|
||||
[ "id", "state", "count", "amount" ],
|
||||
@ -294,7 +294,7 @@ class TestColumnActions(test_engine.EngineTestCase):
|
||||
Table(2, "Table1", 2, 0, columns=[
|
||||
Column(14, "manualSort", "ManualSortPos", False, "", 0),
|
||||
Column(15, "A", "Text", False, "", 0),
|
||||
Column(17, "C", "Text", False, "", 0),
|
||||
Column(17, "C", "Any", True, "", 0),
|
||||
]),
|
||||
Table(3, "GristSummary_7_Address", 0, 1, columns=[
|
||||
Column(18, "state", "Text", False, "", summarySourceCol=12),
|
||||
@ -352,7 +352,7 @@ class TestColumnActions(test_engine.EngineTestCase):
|
||||
Table(2, "Table1", 2, 0, columns=[
|
||||
Column(14, "manualSort", "ManualSortPos", False, "", 0),
|
||||
Column(15, "A", "Text", False, "", 0),
|
||||
Column(17, "C", "Text", False, "", 0),
|
||||
Column(17, "C", "Any", True, "", 0),
|
||||
]),
|
||||
# Note that the summary table here switches to a new one, without the deleted group-by.
|
||||
Table(4, "GristSummary_7_Address2", 0, 1, columns=[
|
||||
@ -401,9 +401,9 @@ class TestColumnActions(test_engine.EngineTestCase):
|
||||
])
|
||||
self.assertTableData('Table1', data=[
|
||||
["id", "A", "C", "manualSort"],
|
||||
[ 1, "a", "", 1.0],
|
||||
[ 2, "b", "", 2.0],
|
||||
[ 3, "c", "", 3.0],
|
||||
[ 1, "a", None, 1.0],
|
||||
[ 2, "b", None, 2.0],
|
||||
[ 3, "c", None, 3.0],
|
||||
])
|
||||
self.assertTableData("GristSummary_7_Address2", cols="subset", data=[
|
||||
[ "id", "count", "amount" ],
|
||||
|
@ -139,6 +139,34 @@ def _make_clean_col_info(col_info, col_id=None):
|
||||
return ret
|
||||
|
||||
|
||||
def guess_col_info(values):
|
||||
"""
|
||||
Returns a pair col_info, values
|
||||
where col_info is a dict which may contain a type and widgetOptions
|
||||
and `values` is similar to the given argument but maybe converted to the guessed type.
|
||||
"""
|
||||
# If the values are all strings/None...
|
||||
if set(map(type, values)) <= {str, six.text_type, type(None)}:
|
||||
# If the values are all blank (None or empty string) leave the column empty
|
||||
if not any(values):
|
||||
return {}, [None] * len(values)
|
||||
|
||||
# Use the exported guessColInfo if we're connected to JS
|
||||
from sandbox import default_sandbox
|
||||
if default_sandbox:
|
||||
guess = default_sandbox.call_external("guessColInfo", values)
|
||||
# When the result doesn't contain `values`, that means the guessed type is Text
|
||||
# so there was nothing to convert.
|
||||
values = guess.get("values", values)
|
||||
col_info = guess["colInfo"]
|
||||
if "widgetOptions" in col_info:
|
||||
col_info["widgetOptions"] = json.dumps(col_info["widgetOptions"])
|
||||
return col_info, values
|
||||
|
||||
# Fallback to the older guessing method, particularly for pure python tests.
|
||||
return {'type': guess_type(values, convert=True)}, values
|
||||
|
||||
|
||||
def guess_type(values, convert=False):
|
||||
"""
|
||||
Returns a suitable type for the given iterable of values, optionally attempting conversions.
|
||||
@ -305,7 +333,7 @@ class UserActions(object):
|
||||
def BulkAddRecord(self, table_id, row_ids, column_values):
|
||||
column_values = actions.decode_bulk_values(column_values)
|
||||
for col_id, values in six.iteritems(column_values):
|
||||
self._ensure_column_accepts_data(table_id, col_id, values)
|
||||
column_values[col_id] = self._ensure_column_accepts_data(table_id, col_id, values)
|
||||
method = self._overrides.get(('BulkAddRecord', table_id), self.doBulkAddOrReplace)
|
||||
return method(table_id, row_ids, column_values)
|
||||
|
||||
@ -456,7 +484,7 @@ class UserActions(object):
|
||||
|
||||
# Check that the update is valid.
|
||||
for col_id, values in six.iteritems(columns):
|
||||
self._ensure_column_accepts_data(table_id, col_id, values)
|
||||
columns[col_id] = self._ensure_column_accepts_data(table_id, col_id, values)
|
||||
|
||||
# Additionally check that we are not trying to modify group-by values in a summary column
|
||||
# (this check is only for updating records, not for adding). Note that col_rec will not be
|
||||
@ -799,23 +827,31 @@ class UserActions(object):
|
||||
When we store values (via Add or Update), check that the column is a data column. If it is an
|
||||
empty column (formula column with an empty formula), convert to data. If it's a real formula
|
||||
column, then fail.
|
||||
Return a list of values which may be the same as the original argument
|
||||
or may have values converted to the newly guessed type of the column.
|
||||
"""
|
||||
schema_col = self._engine.schema[table_id].columns[col_id]
|
||||
if not schema_col.isFormula:
|
||||
# Plain old data column, OK to enter values.
|
||||
return
|
||||
return values
|
||||
|
||||
if schema_col.formula:
|
||||
# This is an error. We can't save individual values to formula columns.
|
||||
raise ValueError("Can't save value to formula column %s" % col_id)
|
||||
|
||||
if not schema_col.formula:
|
||||
# An empty column (isFormula=True, formula=""), now is the time to convert it to data.
|
||||
if schema_col.type == 'Any':
|
||||
# Guess the type when it starts out as Any. We unfortunately need to a separate
|
||||
# ModifyColumn call for type conversion, to recompute type-specific defaults
|
||||
# Guess the type when it starts out as Any. We unfortunately need to update the column
|
||||
# separately for type conversion, to recompute type-specific defaults
|
||||
# before they are used in formula->data conversion.
|
||||
self.ModifyColumn(table_id, col_id, {'type': guess_type(values, convert=True)})
|
||||
col_info, values = guess_col_info(values)
|
||||
# If the values are all blank (None or empty string) leave the column empty
|
||||
if not col_info:
|
||||
return values
|
||||
col_rec = self._docmodel.get_column_rec(table_id, col_id)
|
||||
self._docmodel.update([col_rec], **col_info)
|
||||
self.ModifyColumn(table_id, col_id, {'isFormula': False})
|
||||
else:
|
||||
# Otherwise, this is an error. We can't save individual values to formula columns.
|
||||
raise ValueError("Can't save value to formula column %s" % col_id)
|
||||
return values
|
||||
|
||||
@useraction
|
||||
def AddOrUpdateRecord(self, table_id, require, col_values, options):
|
||||
|
Loading…
Reference in New Issue
Block a user