Summary: This is https://phab.getgrist.com/D3205 plus some changes (https://github.com/dsagal/grist/compare/type-convert...type-convert-server?expand=1) that move the conversion process to the backend. A new user action ConvertFromColumn uses `call_external` so that the data engine can delegate back to ActiveDoc. Code for creating formatters and parsers is significantly refactored so that most of the logic is in `common` and can be used in different ways. Test Plan: The original diff adds plenty of tests. Reviewers: georgegevoian Reviewed By: georgegevoian Subscribers: dsagal Differential Revision: https://phab.getgrist.com/D3240pull/126/head
parent
4890a1fe89
commit
5d671bf0b3
@ -0,0 +1,255 @@
|
||||
import {DocData} from 'app/common/DocData';
|
||||
import * as gristTypes from 'app/common/gristTypes';
|
||||
import {isList} from 'app/common/gristTypes';
|
||||
import {BaseFormatter, createFullFormatterFromDocData} from 'app/common/ValueFormatter';
|
||||
import {
|
||||
createParserOrFormatterArgumentsRaw,
|
||||
createParserRaw,
|
||||
ReferenceListParser,
|
||||
ReferenceParser,
|
||||
ValueParser
|
||||
} from 'app/common/ValueParser';
|
||||
import {CellValue, GristObjCode} from 'app/plugin/GristData';
|
||||
|
||||
|
||||
/**
|
||||
* Base class for converting values from one type to another with the convert() method.
|
||||
* Has a formatter for the source column
|
||||
* and a parser for the destination column.
|
||||
*
|
||||
* The default convert() is for non-list destination types, so if the source value
|
||||
* is a list it only converts nicely if the list contains exactly one element.
|
||||
*/
|
||||
export class ValueConverter {
|
||||
constructor(public formatter: BaseFormatter, public parser: ValueParser) {
|
||||
}
|
||||
|
||||
public convert(value: any): any {
|
||||
if (isList(value)) {
|
||||
if (value.length === 1) {
|
||||
// Empty list: ['L']
|
||||
return null;
|
||||
} else if (value.length === 2) {
|
||||
// Singleton list: ['L', value]
|
||||
// Convert just that one value.
|
||||
value = value[1];
|
||||
} else {
|
||||
// List with multiple values. Since we're converting to just one value,
|
||||
// format the whole thing as text, which is an error for most types.
|
||||
return this.formatter.formatAny(value);
|
||||
}
|
||||
}
|
||||
return this.convertInner(value);
|
||||
}
|
||||
|
||||
protected convertInner(value: any): any {
|
||||
const formatted = this.formatter.formatAny(value);
|
||||
return this.parser.cleanParse(formatted);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for converting to a list type (Reference List or Choice List).
|
||||
*
|
||||
* Wraps single values in a list, and converts lists elementwise.
|
||||
*/
|
||||
class ListConverter extends ValueConverter {
|
||||
// Don't parse strings like "Smith, John" which may look like lists but represent a single choice.
|
||||
// TODO this works when the source is a Choice column, but not when it's a Reference to a Choice column.
|
||||
// But the guessed choices are also broken in that case.
|
||||
private _choices: Set<string> = new Set((this.formatter.widgetOpts as any).choices || []);
|
||||
|
||||
public convert(value: any): any {
|
||||
if (typeof value === "string" && !this._choices.has(value)) {
|
||||
// Parse CSV/JSON
|
||||
return this.parser.cleanParse(value);
|
||||
}
|
||||
const values = isList(value) ? value.slice(1) : [value];
|
||||
if (!values.length || value == null) {
|
||||
return null;
|
||||
}
|
||||
return this.handleValues(value, values.map(v => this.convertInner(v)));
|
||||
}
|
||||
|
||||
protected handleValues(originalValue: any, values: any[]) {
|
||||
return ['L', ...values];
|
||||
}
|
||||
}
|
||||
|
||||
class ChoiceListConverter extends ListConverter {
|
||||
/**
|
||||
* Convert each source value to a 'Choice'
|
||||
*/
|
||||
protected convertInner(value: any): any {
|
||||
return this.formatter.formatAny(value);
|
||||
}
|
||||
}
|
||||
|
||||
class ReferenceListConverter extends ListConverter {
|
||||
private _innerConverter = new ReferenceConverter(
|
||||
this.formatter,
|
||||
new ReferenceParser("Ref", this.parser.widgetOpts, this.parser.docSettings),
|
||||
);
|
||||
|
||||
constructor(public formatter: BaseFormatter, public parser: ReferenceListParser) {
|
||||
super(formatter, parser);
|
||||
// Prevent the parser from looking up reference values in the frontend.
|
||||
// Leave it to the data engine which has a much more efficient algorithm for long lists of values.
|
||||
delete parser.tableData;
|
||||
}
|
||||
|
||||
public handleValues(originalValue: any, values: any[]): any {
|
||||
const result = [];
|
||||
let lookupColumn: string = "";
|
||||
const raw = this.formatter.formatAny(originalValue); // AltText if the reference lookup fails
|
||||
for (const value of values) {
|
||||
if (typeof value === "string") {
|
||||
// Failed to parse one of the references, so return a raw string for the whole thing
|
||||
return raw;
|
||||
} else {
|
||||
// value is a lookup tuple: ['l', value, options]
|
||||
result.push(value[1]);
|
||||
lookupColumn = value[2].column;
|
||||
}
|
||||
}
|
||||
return ['l', result, {column: lookupColumn, raw}];
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert each source value to a 'Reference'
|
||||
*/
|
||||
protected convertInner(value: any): any {
|
||||
return this._innerConverter.convert(value);
|
||||
}
|
||||
}
|
||||
|
||||
class ReferenceConverter extends ValueConverter {
|
||||
private _innerConverter: ValueConverter = createConverter(this.formatter, this.parser.visibleColParser);
|
||||
|
||||
constructor(public formatter: BaseFormatter, public parser: ReferenceParser) {
|
||||
super(formatter, parser);
|
||||
// Prevent the parser from looking up reference values in the frontend.
|
||||
// Leave it to the data engine which has a much more efficient algorithm for long lists of values.
|
||||
delete parser.tableData;
|
||||
}
|
||||
|
||||
protected convertInner(value: any): any {
|
||||
// Convert to the type of the visible column.
|
||||
const converted = this._innerConverter.convert(value);
|
||||
return this.parser.lookup(converted, this.formatter.formatAny(value));
|
||||
}
|
||||
}
|
||||
|
||||
class NumericConverter extends ValueConverter {
|
||||
protected convertInner(value: any): any {
|
||||
if (typeof value === "boolean") {
|
||||
return value ? 1 : 0;
|
||||
}
|
||||
return super.convertInner(value);
|
||||
}
|
||||
}
|
||||
|
||||
class DateConverter extends ValueConverter {
|
||||
private _sourceType = gristTypes.extractInfoFromColType(this.formatter.type);
|
||||
|
||||
protected convertInner(value: any): any {
|
||||
// When converting Date->DateTime, DateTime->Date, or between DateTime timezones,
|
||||
// it's important to send an encoded Date/DateTime object rather than just a timestamp number
|
||||
// so that the data engine knows what to do in do_convert, especially regarding timezones.
|
||||
// If the source column is a Reference to a Date/DateTime then `value` is already
|
||||
// an encoded object from the display column which has type Any.
|
||||
value = gristTypes.reencodeAsAny(value, this._sourceType);
|
||||
if (Array.isArray(value) && (
|
||||
value[0] === GristObjCode.Date ||
|
||||
value[0] === GristObjCode.DateTime
|
||||
)) {
|
||||
return value;
|
||||
}
|
||||
return super.convertInner(value);
|
||||
}
|
||||
}
|
||||
|
||||
export const valueConverterClasses: { [type: string]: typeof ValueConverter } = {
|
||||
Date: DateConverter,
|
||||
DateTime: DateConverter,
|
||||
ChoiceList: ChoiceListConverter,
|
||||
Ref: ReferenceConverter,
|
||||
RefList: ReferenceListConverter,
|
||||
Numeric: NumericConverter,
|
||||
Int: NumericConverter,
|
||||
};
|
||||
|
||||
export function createConverter(formatter: BaseFormatter, parser: ValueParser) {
|
||||
const cls = valueConverterClasses[gristTypes.extractTypeFromColType(parser.type)] || ValueConverter;
|
||||
return new cls(formatter, parser);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by the ConvertFromColumn user action in the data engine.
|
||||
* The higher order function separates docData (passed by ActiveDoc)
|
||||
* from the arguments passed to call_external in Python.
|
||||
*/
|
||||
export function convertFromColumn(docData: DocData) {
|
||||
return function(
|
||||
sourceColRef: number,
|
||||
type: string,
|
||||
widgetOpts: string,
|
||||
visibleColRef: number,
|
||||
values: ReadonlyArray<CellValue>,
|
||||
displayColValues?: ReadonlyArray<CellValue>,
|
||||
): CellValue[] {
|
||||
const formatter = createFullFormatterFromDocData(docData, sourceColRef);
|
||||
const parser = createParserRaw(
|
||||
...createParserOrFormatterArgumentsRaw(docData, type, widgetOpts, visibleColRef)
|
||||
);
|
||||
const converter = createConverter(formatter, parser);
|
||||
return convertValues(converter, values, displayColValues || values);
|
||||
};
|
||||
}
|
||||
|
||||
export function convertValues(
|
||||
converter: ValueConverter,
|
||||
// Raw values from the actual column, e.g. row IDs for reference columns
|
||||
values: ReadonlyArray<CellValue>,
|
||||
// Values from the display column, which is the same as the raw values for non-referencing columns.
|
||||
// In almost all cases these are the values that actually matter and get converted.
|
||||
displayColValues: ReadonlyArray<CellValue>,
|
||||
): CellValue[] {
|
||||
// Converting Ref <-> RefList without changing the target table is a special case - see prepTransformColInfo.
|
||||
// In this case we deal with the actual row IDs stored in the real column,
|
||||
// whereas in all other cases we use display column values.
|
||||
const sourceType = gristTypes.extractInfoFromColType(converter.formatter.type);
|
||||
const targetType = gristTypes.extractInfoFromColType(converter.parser.type);
|
||||
const refToRefList = (
|
||||
sourceType.type === "Ref" &&
|
||||
targetType.type === "RefList" &&
|
||||
sourceType.tableId === targetType.tableId
|
||||
);
|
||||
const refListToRef = (
|
||||
sourceType.type === "RefList" &&
|
||||
targetType.type === "Ref" &&
|
||||
sourceType.tableId === targetType.tableId
|
||||
);
|
||||
|
||||
return displayColValues.map((displayVal, i) => {
|
||||
const actualValue = values[i];
|
||||
|
||||
if (refToRefList && typeof actualValue === "number") {
|
||||
if (actualValue === 0) {
|
||||
return null;
|
||||
} else {
|
||||
return ["L", actualValue];
|
||||
}
|
||||
} else if (refListToRef && isList(actualValue)) {
|
||||
if (actualValue.length === 1) {
|
||||
// Empty list: ['L']
|
||||
return 0;
|
||||
} else if (actualValue.length === 2) {
|
||||
// Singleton list: ['L', rowId]
|
||||
return actualValue[1];
|
||||
}
|
||||
}
|
||||
|
||||
return converter.convert(displayVal);
|
||||
});
|
||||
}
|
@ -1,11 +1,11 @@
|
||||
import {RowId} from 'app/client/models/rowset';
|
||||
import {TableData} from 'app/client/models/TableData';
|
||||
import {UIRowId} from 'app/common/UIRowId';
|
||||
import {TableData} from "./TableData";
|
||||
|
||||
/**
|
||||
* Return whether a table identified by the rowId of its metadata record, should normally be
|
||||
* hidden from the user (e.g. as an option in the page-widget picker).
|
||||
*/
|
||||
export function isHiddenTable(tablesData: TableData, tableRef: RowId): boolean {
|
||||
export function isHiddenTable(tablesData: TableData, tableRef: UIRowId): boolean {
|
||||
const tableId = tablesData.getValue(tableRef, 'tableId') as string|undefined;
|
||||
return tablesData.getValue(tableRef, 'summarySourceTable') !== 0 ||
|
||||
Boolean(tableId?.startsWith('GristHidden'));
|
@ -1,161 +0,0 @@
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from datetime import datetime
|
||||
import moment
|
||||
|
||||
# Regex list of lowercase months with characters after the first three made optional
|
||||
MONTH_NAMES = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
|
||||
'september', 'october', 'november', 'december']
|
||||
MONTHS = [m[:3]+"(?:"+m[3:]+")?" if len(m) > 3 else m[:3] for m in MONTH_NAMES]
|
||||
# Regex list of lowercase weekdays with characters after the first three made optional
|
||||
DAY_NAMES = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']
|
||||
WEEKDAYS = [d[:3]+"(?:"+d[3:]+")?" for d in DAY_NAMES]
|
||||
|
||||
# Acceptable format tokens mapped to what they should match in the date string
|
||||
# Ordered so that larger configurations are matched first
|
||||
DATE_TOKENS = OrderedDict([
|
||||
("HH", r"(?P<H>\d{1,2})"), # 24 hr
|
||||
("H", r"(?P<H>\d{1,2})"),
|
||||
("hh", r"(?P<h>\d{1,2})"), # 12 hr
|
||||
("h", r"(?P<h>\d{1,2})"),
|
||||
("mm", r"(?P<m>\d{1,2})"), # min
|
||||
("m", r"(?P<m>\d{1,2})"),
|
||||
("A", r"(?P<A>[ap]m?)"), # am/pm
|
||||
("a", r"(?P<A>[ap]m?)"),
|
||||
("ss", r"(?P<s>\d{1,2})"), # sec
|
||||
("s", r"(?P<s>\d{1,2})"),
|
||||
("SSSSSS", r"(?P<S>\d{1,6})"), # fractional second
|
||||
("SSSSS", r"(?P<S>\d{1,6})"),
|
||||
("SSSS", r"(?P<S>\d{1,6})"),
|
||||
("SSS", r"(?P<S>\d{1,6})"),
|
||||
("SS", r"(?P<S>\d{1,6})"),
|
||||
("S", r"(?P<S>\d{1,6})"),
|
||||
("YYYY", r"(?P<YY>\d{4}|\d{2})"), # 4 or 2 digit year
|
||||
("YY", r"(?P<YY>\d{2})"), # 2 digit year
|
||||
("MMMM", r"(?P<MMM>" + ("|".join(MONTHS)) + ")"), # month name, abbr or not
|
||||
("MMM", r"(?P<MMM>" + ("|".join(MONTHS)) + ")"),
|
||||
("MM", r"(?P<M>\d{1,2})"), # month num
|
||||
("M", r"(?P<M>\d{1,2})"),
|
||||
("DD", r"(?P<D>\d{1,2})"), # day num
|
||||
("Do", r"(?P<D>\d{1,2})(st|nd|rd|th)"),
|
||||
("D", r"(?P<D>\d{1,2})"),
|
||||
("dddd", r"(" + ("|".join(WEEKDAYS)) + ")"), # day name, abbr or not (ignored)
|
||||
("ddd", r"(" + ("|".join(WEEKDAYS)) + ")")
|
||||
])
|
||||
DATE_TOKENS_REGEX = re.compile("("+("|".join(DATE_TOKENS))+")")
|
||||
|
||||
# List of separators to replace and match any standard date/time separators
|
||||
SEP = r"[\s/.\-:,]*"
|
||||
SEP_REGEX = re.compile(SEP)
|
||||
SEP_REPLACEMENT = SEP.replace("\\", "\\\\")
|
||||
|
||||
# Maps date parse format to compile regex
|
||||
FORMAT_CACHE = {}
|
||||
|
||||
# Parses date_string using parse_format in the style of moment.js
|
||||
# See: http://momentjs.com/docs/#/parsing
|
||||
# Supports the following tokens:
|
||||
# H HH 0..23 24 hour time
|
||||
# h hh 1..12 12 hour time used with a A.
|
||||
# a A am pm Post or ante meridiem
|
||||
# m mm 0..59 Minutes
|
||||
# s ss 0..59 Seconds
|
||||
# S SS SSS 0..999 Fractional seconds
|
||||
# YYYY 2014 4 or 2 digit year
|
||||
# YY 14 2 digit year
|
||||
# M MM 1..12 Month number
|
||||
# MMM MMMM Jan..December Month name in locale set by moment.locale()
|
||||
# D DD 1..31 Day of month
|
||||
# Do 1st..31st Day of month with ordinal
|
||||
def parse(date_string, parse_format, zonelabel='UTC', override_current_date=None):
|
||||
"""Parse a date string via a moment.js style parse format and a timezone string.
|
||||
Supported tokens are documented above. Returns seconds since epoch"""
|
||||
|
||||
if parse_format in FORMAT_CACHE:
|
||||
# Check if parse_format has been cache, and retrieve if so
|
||||
parser = FORMAT_CACHE[parse_format]
|
||||
else:
|
||||
# e.g. "MM-YY" -> "(?P<mm>\d{1,2})-(?P<yy>\d{2})"
|
||||
# Note that DATE_TOKENS is ordered so that the longer letter chains are recognized first
|
||||
tokens = DATE_TOKENS_REGEX.split(parse_format)
|
||||
tokens = [DATE_TOKENS[t] if t in DATE_TOKENS else SEP_REGEX.sub(SEP_REPLACEMENT, t)
|
||||
for t in tokens]
|
||||
|
||||
# Compile new token string ignoring case (for month names)
|
||||
parser = re.compile(''.join(tokens), re.I)
|
||||
FORMAT_CACHE[parse_format] = parser
|
||||
|
||||
match = parser.match(date_string)
|
||||
|
||||
# Throw error if matching failed
|
||||
if match is None:
|
||||
raise Exception("Failed to parse %s with %s" % (date_string, parse_format))
|
||||
|
||||
# Create datetime from the results of parsing
|
||||
current_date = override_current_date or moment.CURRENT_DATE
|
||||
m = match.groupdict()
|
||||
dt = datetime(
|
||||
year=getYear(m, current_date.year),
|
||||
month=getMonth(m, current_date.month),
|
||||
day=int(m['D']) if ('D' in m) else current_date.day,
|
||||
hour=getHour(m),
|
||||
minute=int(m['m']) if ('m' in m) else 0,
|
||||
second=int(m['s']) if ('s' in m) else 0,
|
||||
microsecond=getMicrosecond(m)
|
||||
)
|
||||
|
||||
# Parses the datetime with the given timezone to return the seconds since EPOCH
|
||||
return moment.tz(dt, zonelabel).timestamp_s()
|
||||
|
||||
|
||||
def getYear(match_dict, current_year):
|
||||
if 'YYYY' in match_dict:
|
||||
return int(match_dict['YYYY'])
|
||||
elif 'YY' in match_dict:
|
||||
match = match_dict['YY']
|
||||
if len(match) == 2:
|
||||
# Must guess on the century, choose so the result is closest to the current year
|
||||
# The first year that could be meant by YY is the current year - 50.
|
||||
first = current_year - 50
|
||||
# We are seeking k such that 100k + YY is between first and first + 100.
|
||||
# first <= 100k + YY < first + 100
|
||||
# 0 <= 100k + YY - first < 100
|
||||
# The value inside the comparison operators is precisely (YY - first) % 100.
|
||||
# So we can calculate the century 100k as (YY - first) % 100 - (YY - first).
|
||||
return first + (int(match) - first) % 100
|
||||
else:
|
||||
return int(match)
|
||||
else:
|
||||
return current_year
|
||||
|
||||
def getMonth(match_dict, current_month):
|
||||
if 'M' in match_dict:
|
||||
return int(match_dict['M'])
|
||||
elif 'MMM' in match_dict:
|
||||
return lazy_index(MONTHS, match_dict['MMM'][:3].lower()) + 1
|
||||
else:
|
||||
return current_month
|
||||
|
||||
def getHour(match_dict):
|
||||
if 'H' in match_dict:
|
||||
return int(match_dict['H'])
|
||||
elif 'h' in match_dict:
|
||||
hr = int(match_dict['h']) % 12
|
||||
merid = 12 if 'A' in match_dict and match_dict['A'][0] == "p" else 0
|
||||
return hr + merid
|
||||
else:
|
||||
return 0
|
||||
|
||||
def getMicrosecond(match_dict):
|
||||
if 'S' in match_dict:
|
||||
match = match_dict['S']
|
||||
return int(match + ("0"*(6-len(match))) if len(match) < 6 else match[:6])
|
||||
else:
|
||||
return 0
|
||||
|
||||
# Gets the index of the first string from iter that starts with startswith
|
||||
def lazy_index(l, startswith, missing=None):
|
||||
for i, token in enumerate(l):
|
||||
if token[:len(startswith)] == startswith:
|
||||
return i
|
||||
return missing
|
Loading…
Reference in new issue