mirror of
https://github.com/gristlabs/grist-core.git
synced 2026-03-02 04:09:24 +00:00
(core) Lossless imports
Summary: - Removed string parsing and some type guessing code from parse_data.py. That logic is now implicitly done by ValueGuesser by leaving the initial column type as Any. parse_data.py mostly comes into play when importing files (e.g. Excel) containing values that already have types, i.e. numbers and dates. - 0s and 1s are treated as numbers instead of booleans to keep imports lossless. - Removed dateguess.py and test_dateguess.py. - Changed what `guessDateFormat` does when multiple date formats work equally well for the given data, in order to be consistent with the old dateguess.py. - Columns containing numbers are now always imported as Numeric, never Int. - Removed `NullIfEmptyParser` because it was interfering with the new system. Its purpose was to avoid pointlessly changing a column from Any to Text when no actual data was inserted. A different solution to that problem was already added to `_ensure_column_accepts_data` in the data engine in a recent related diff. Test Plan: - Added 2 `nbrowser/Importer2` tests. - Updated various existing tests. - Extended testing of `guessDateFormat`. Added `guessDateFormats` to show how ambiguous dates are handled internally. Reviewers: georgegevoian Reviewed By: georgegevoian Differential Revision: https://phab.getgrist.com/D3302
This commit is contained in:
@@ -353,6 +353,7 @@ export class Importer extends DisposableWithEvents {
|
||||
label: field.label(),
|
||||
colId: destTableId ? field.colId() : null, // if inserting into new table, colId isn't defined
|
||||
type: field.column().type(),
|
||||
widgetOptions: field.column().widgetOptions(),
|
||||
formula: field.column().formula()
|
||||
})),
|
||||
sourceCols: sourceFields.map((field) => field.colId())
|
||||
|
||||
@@ -105,7 +105,7 @@ export async function prepTransformColInfo(docModel: DocModel, origCol: ColumnRe
|
||||
let {dateFormat} = prevOptions;
|
||||
if (!dateFormat) {
|
||||
const colValues = tableData.getColValues(sourceCol.colId()) || [];
|
||||
dateFormat = guessDateFormat(colValues.map(String)) || "YYYY-MM-DD";
|
||||
dateFormat = guessDateFormat(colValues.map(String));
|
||||
}
|
||||
widgetOptions = dateTimeWidgetOptions(dateFormat, true);
|
||||
break;
|
||||
|
||||
@@ -49,6 +49,7 @@ export interface TransformColumn {
|
||||
colId: string|null;
|
||||
type: string;
|
||||
formula: string;
|
||||
widgetOptions: string;
|
||||
}
|
||||
|
||||
export interface ImportResult {
|
||||
|
||||
@@ -3,7 +3,7 @@ import {ApplyUAResult, QueryFilters} from 'app/common/ActiveDocAPI';
|
||||
import {BaseAPI, IOptions} from 'app/common/BaseAPI';
|
||||
import {BillingAPI, BillingAPIImpl} from 'app/common/BillingAPI';
|
||||
import {BrowserSettings} from 'app/common/BrowserSettings';
|
||||
import {BulkColValues, TableColValues, UserAction} from 'app/common/DocActions';
|
||||
import {BulkColValues, TableColValues, TableRecordValue, TableRecordValues, UserAction} from 'app/common/DocActions';
|
||||
import {DocCreationInfo, OpenDocMode} from 'app/common/DocListAPI';
|
||||
import {Features} from 'app/common/Features';
|
||||
import {ICustomWidget} from 'app/common/CustomWidget';
|
||||
@@ -402,6 +402,11 @@ export interface UserAPI {
|
||||
filters?: string;
|
||||
}
|
||||
|
||||
interface GetRowsParams {
|
||||
filters?: QueryFilters;
|
||||
immediate?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect endpoints related to the content of a single document that we've been thinking
|
||||
* of as the (restful) "Doc API". A few endpoints that could be here are not, for historical
|
||||
@@ -411,8 +416,8 @@ export interface DocAPI {
|
||||
// Immediate flag is a currently not-advertised feature, allowing a query to proceed without
|
||||
// waiting for a document to be initialized. This is useful if the calculations done when
|
||||
// opening a document are irrelevant.
|
||||
getRows(tableId: string, options?: { filters?: QueryFilters,
|
||||
immediate?: boolean }): Promise<TableColValues>;
|
||||
getRows(tableId: string, options?: GetRowsParams): Promise<TableColValues>;
|
||||
getRecords(tableId: string, options?: GetRowsParams): Promise<TableRecordValue[]>;
|
||||
updateRows(tableId: string, changes: TableColValues): Promise<number[]>;
|
||||
addRows(tableId: string, additions: BulkColValues): Promise<number[]>;
|
||||
removeRows(tableId: string, removals: number[]): Promise<number[]>;
|
||||
@@ -869,16 +874,13 @@ export class DocAPIImpl extends BaseAPI implements DocAPI {
|
||||
this._url = `${url}/api/docs/${docId}`;
|
||||
}
|
||||
|
||||
public async getRows(tableId: string, options?: { filters?: QueryFilters,
|
||||
immediate?: boolean }): Promise<TableColValues> {
|
||||
const url = new URL(`${this._url}/tables/${tableId}/data`);
|
||||
if (options?.filters) {
|
||||
url.searchParams.append('filter', JSON.stringify(options.filters));
|
||||
}
|
||||
if (options?.immediate) {
|
||||
url.searchParams.append('immediate', 'true');
|
||||
}
|
||||
return this.requestJson(url.href);
|
||||
public async getRows(tableId: string, options?: GetRowsParams): Promise<TableColValues> {
|
||||
return this._getRecords(tableId, 'data', options);
|
||||
}
|
||||
|
||||
public async getRecords(tableId: string, options?: GetRowsParams): Promise<TableRecordValue[]> {
|
||||
const response: TableRecordValues = await this._getRecords(tableId, 'records', options);
|
||||
return response.records;
|
||||
}
|
||||
|
||||
public async updateRows(tableId: string, changes: TableColValues): Promise<number[]> {
|
||||
@@ -967,6 +969,17 @@ export class DocAPIImpl extends BaseAPI implements DocAPI {
|
||||
url.searchParams.append('code', code);
|
||||
return this.requestJson(url.href);
|
||||
}
|
||||
|
||||
private _getRecords(tableId: string, endpoint: 'data' | 'records', options?: GetRowsParams): Promise<any> {
|
||||
const url = new URL(`${this._url}/tables/${tableId}/${endpoint}`);
|
||||
if (options?.filters) {
|
||||
url.searchParams.append('filter', JSON.stringify(options.filters));
|
||||
}
|
||||
if (options?.immediate) {
|
||||
url.searchParams.append('immediate', 'true');
|
||||
}
|
||||
return this.requestJson(url.href);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -162,7 +162,7 @@ export function guessColInfo(
|
||||
NumberParse.fromSettings(docSettings).guessOptions(values)
|
||||
)
|
||||
.guess(values, docSettings) ||
|
||||
new DateGuesser(guessDateFormat(values, timezone) || "YYYY-MM-DD", timezone)
|
||||
new DateGuesser(guessDateFormat(values, timezone), timezone)
|
||||
.guess(values, docSettings) ||
|
||||
// Don't return the same values back if there's no conversion to be done,
|
||||
// as they have to be serialized and transferred over a pipe to Python.
|
||||
|
||||
@@ -36,18 +36,6 @@ export class ValueParser {
|
||||
class IdentityParser extends ValueParser {
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as basic Value parser, but will return null if a value is an empty string.
|
||||
*/
|
||||
class NullIfEmptyParser extends ValueParser {
|
||||
public cleanParse(value: string): any {
|
||||
if (value === "") {
|
||||
return null;
|
||||
}
|
||||
return super.cleanParse(value);
|
||||
}
|
||||
}
|
||||
|
||||
export class NumericParser extends ValueParser {
|
||||
private _parse: NumberParse;
|
||||
|
||||
@@ -225,7 +213,6 @@ export class ReferenceListParser extends ReferenceParser {
|
||||
}
|
||||
|
||||
export const valueParserClasses: { [type: string]: typeof ValueParser } = {
|
||||
Any: NullIfEmptyParser,
|
||||
Numeric: NumericParser,
|
||||
Int: NumericParser,
|
||||
Date: DateParser,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import escapeRegExp = require('lodash/escapeRegExp');
|
||||
import last = require('lodash/last');
|
||||
import memoize = require('lodash/memoize');
|
||||
import {getDistinctValues, isObject} from 'app/common/gutil';
|
||||
// Simply importing 'moment-guess' inconsistently imports bundle.js or bundle.esm.js depending on environment
|
||||
@@ -325,7 +326,26 @@ function standardizeTime(timeString: string): { remaining: string, time: string
|
||||
return {remaining: timeString.slice(0, match.index).trim(), time: `${hh}:${mm}:${ss}`};
|
||||
}
|
||||
|
||||
export function guessDateFormat(values: Array<string | null>, timezone: string = 'UTC'): string | null {
|
||||
/**
|
||||
* Guesses a full date[time] format that best matches the given strings.
|
||||
* If several formats match equally well, picks the last one lexicographically to match the old date guessing.
|
||||
* This means formats with an early Y and/or M are favoured.
|
||||
* If no formats match, returns the default YYYY-MM-DD.
|
||||
*/
|
||||
export function guessDateFormat(values: Array<string | null>, timezone: string = 'UTC'): string {
|
||||
const formats = guessDateFormats(values, timezone);
|
||||
if (!formats) {
|
||||
return "YYYY-MM-DD";
|
||||
}
|
||||
return last(formats)!;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all full date[time] formats that best match the given strings.
|
||||
* If several formats match equally well, returns them all.
|
||||
* May return null if there are no matching formats or choosing one is too expensive.
|
||||
*/
|
||||
export function guessDateFormats(values: Array<string | null>, timezone: string = 'UTC'): string[] | null {
|
||||
const dateStrings: string[] = values.filter(isObject);
|
||||
const sample = getDistinctValues(dateStrings, 100);
|
||||
const formats: Record<string, number> = {};
|
||||
@@ -358,7 +378,9 @@ export function guessDateFormat(values: Array<string | null>, timezone: string =
|
||||
}
|
||||
|
||||
const maxCount = Math.max(...Object.values(formats));
|
||||
return formatKeys.find(format => formats[format] === maxCount)!;
|
||||
// Return all formats that tied for first place.
|
||||
// Sort lexicographically for consistency in tests and with the old dateguess.py.
|
||||
return formatKeys.filter(format => formats[format] === maxCount).sort();
|
||||
}
|
||||
|
||||
export const dateFormatOptions = [
|
||||
|
||||
@@ -294,7 +294,7 @@ export class ActiveDocImport {
|
||||
const origTableName = table.table_name ? table.table_name : '';
|
||||
const transformRule = transformRuleMap && transformRuleMap.hasOwnProperty(origTableName) ?
|
||||
transformRuleMap[origTableName] : null;
|
||||
const columnMetadata = addLabelsIfPossible(table.column_metadata);
|
||||
const columnMetadata = cleanColumnMetadata(table.column_metadata);
|
||||
const result: ApplyUAResult = await this._activeDoc.applyUserActions(docSession,
|
||||
[["AddTable", hiddenTableName, columnMetadata]]);
|
||||
const retValue: AddTableRetValue = result.retValues[0];
|
||||
@@ -313,7 +313,9 @@ export class ActiveDocImport {
|
||||
const ruleCanBeApplied = (transformRule != null) &&
|
||||
_.difference(transformRule.sourceCols, hiddenTableColIds).length === 0;
|
||||
await this._activeDoc.applyUserActions(docSession,
|
||||
[["ReplaceTableData", hiddenTableId, rowIdColumn, columnValues]], {parseStrings: true});
|
||||
// BulkAddRecord rather than ReplaceTableData so that type guessing is applied to Any columns.
|
||||
// Don't use parseStrings, only use the strict parsing in ValueGuesser to make the import lossless.
|
||||
[["BulkAddRecord", hiddenTableId, rowIdColumn, columnValues]]);
|
||||
|
||||
// data parsed and put into hiddenTableId
|
||||
// For preview_table (isHidden) do GenImporterView to make views and formulas and cols
|
||||
@@ -433,14 +435,15 @@ export class ActiveDocImport {
|
||||
|
||||
// If destination is a new table, we need to create it.
|
||||
if (intoNewTable) {
|
||||
const colSpecs = destCols.map(({type, colId: id, label}) => ({type, id, label}));
|
||||
const colSpecs = destCols.map(({type, colId: id, label, widgetOptions}) => ({type, id, label, widgetOptions}));
|
||||
const newTable = await this._activeDoc.applyUserActions(docSession, [['AddTable', destTableId, colSpecs]]);
|
||||
destTableId = newTable.retValues[0].table_id;
|
||||
}
|
||||
|
||||
await this._activeDoc.applyUserActions(docSession,
|
||||
[['BulkAddRecord', destTableId, gutil.arrayRepeat(hiddenTableData.id.length, null), columnData]],
|
||||
{parseStrings: true});
|
||||
// Don't use parseStrings for new tables to make the import lossless.
|
||||
{parseStrings: !intoNewTable});
|
||||
|
||||
return destTableId;
|
||||
}
|
||||
@@ -586,6 +589,7 @@ export class ActiveDocImport {
|
||||
colId: destTableId ? id as string : null,
|
||||
label: fields.label as string,
|
||||
type: fields.type as string,
|
||||
widgetOptions: fields.widgetOptions as string,
|
||||
formula: srcColIds.includes(id as string) ? `$${id}` : ''
|
||||
});
|
||||
}
|
||||
@@ -730,10 +734,21 @@ function getMergeFunction({type}: MergeStrategy): MergeFunction {
|
||||
}
|
||||
|
||||
/**
|
||||
* Tweak the column metadata used in the AddTable action.
|
||||
* If `columns` is populated with non-blank column ids, adds labels to all
|
||||
* columns using the values set for the column ids. Otherwise, returns
|
||||
* a copy of columns with no modifications made.
|
||||
* columns using the values set for the column ids.
|
||||
* Ensure that columns of type Any start out as formula columns, i.e. empty columns,
|
||||
* so that type guessing is triggered when new data is added.
|
||||
*/
|
||||
function addLabelsIfPossible(columns: GristColumn[]) {
|
||||
return columns.map(c => (c.id ? {...c, label: c.id} : c));
|
||||
function cleanColumnMetadata(columns: GristColumn[]) {
|
||||
return columns.map(c => {
|
||||
const newCol: any = {...c};
|
||||
if (c.id) {
|
||||
newCol.label = c.id;
|
||||
}
|
||||
if (c.type === "Any") {
|
||||
newCol.isFormula = true;
|
||||
}
|
||||
return newCol;
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user