(core) New type conversion in the backend

Summary: This is https://phab.getgrist.com/D3205 plus some changes (https://github.com/dsagal/grist/compare/type-convert...type-convert-server?expand=1) that move the conversion process to the backend. A new user action ConvertFromColumn uses `call_external` so that the data engine can delegate back to ActiveDoc. Code for creating formatters and parsers is significantly refactored so that most of the logic is in `common` and can be used in different ways.

Test Plan: The original diff adds plenty of tests.

Reviewers: georgegevoian

Reviewed By: georgegevoian

Subscribers: dsagal

Differential Revision: https://phab.getgrist.com/D3240
This commit is contained in:
Alex Hall
2022-02-04 13:13:03 +02:00
parent 4890a1fe89
commit 5d671bf0b3
25 changed files with 593 additions and 492 deletions

View File

@@ -473,8 +473,23 @@ class BaseReferenceColumn(BaseColumn):
.get_column_rec(self.table_id, self.col_id).visibleCol.colId
or "id"
)
target_value = self._target_table.get_column(col_id)._convert_raw_value(value)
return self._target_table.lookup_one_record(**{col_id: target_value})
column = self._target_table.get_column(col_id)
# `value` is an object encoded for transmission from JS to Python,
# which is decoded to `decoded_value`.
# `raw_value` is the kind of value that would be stored in `column`.
# `rich_value` is the type of value used in formulas, especially with `lookupRecords`.
# For example, for a Date column, `raw_value` is a numerical timestamp
# and `rich_value` is a `datetime.date` object,
# assuming `value` isn't of an invalid type.
# However `value` could either be just a number
# (in which case `decoded_value` would be a number as well)
# or an encoded date (or even datetime) object like ['d', number]
# (in which case `decoded_value` would be a `datetime.date` object,
# which would get converted back to a number and then back to a date object again!)
decoded_value = objtypes.decode_object(value)
raw_value = column.convert(decoded_value)
rich_value = column._convert_raw_value(raw_value)
return self._target_table.lookup_one_record(**{col_id: rich_value})
class ReferenceColumn(BaseReferenceColumn):

View File

@@ -500,6 +500,14 @@ def N(value):
return 0
def CURRENT_CONVERSION(rec):
"""
Special function used only when changing the type of a column.
Doesn't work in normal formulas.
"""
return rec.gristHelper_Converted
def NA():
"""
Returns the "value not available" error, `#N/A`.

View File

@@ -1,12 +1,16 @@
from datetime import datetime, timedelta
import re
from .date import DATEADD, NOW, DTIME
from moment_parse import MONTH_NAMES, DAY_NAMES
# Limit exports to schedule, so that upper-case constants like MONTH_NAMES, DAY_NAMES don't end up
# exposed as if Excel-style functions (or break docs generation).
__all__ = ['SCHEDULE']
MONTH_NAMES = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
'september', 'october', 'november', 'december']
# Regex list of lowercase weekdays with characters after the first three made optional
DAY_NAMES = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']
def SCHEDULE(schedule, start=None, count=10, end=None):
"""
Returns the list of `datetime` objects generated according to the `schedule` string. Starts at

View File

@@ -4,7 +4,6 @@ import marshal
from time import time
import bisect
import os
import moment_parse
import iso8601
import six
from six.moves import zip
@@ -13,7 +12,6 @@ from six.moves import zip
ZoneRecord = namedtuple("ZoneRecord", ("name", "abbrs", "offsets", "untils"))
# moment.py mirrors core functionality of moment-timezone.js
# moment.py includes function parse, located and documented in moment_parse.py
# Documentation: http://momentjs.com/timezone/docs/
EPOCH = datetime(1970, 1, 1)
@@ -67,10 +65,6 @@ def date_to_ts(date, timezone=None):
ts = (date - DATE_EPOCH).total_seconds()
return ts if not timezone else ts - timezone.offset(ts * 1000).total_seconds()
# Calls parse from moment_parse.py
def parse(date_string, parse_format, zonelabel='UTC'):
return moment_parse.parse(date_string, parse_format, zonelabel)
# Parses a datetime in the ISO format, YYYY-MM-DDTHH:MM:SS.mmmmmm+HH:MM. Most parts are optional;
# see https://pypi.org/project/iso8601/ for details. Returns a timestamp in seconds.
def parse_iso(date_string, timezone=None):

View File

@@ -1,161 +0,0 @@
import re
from collections import OrderedDict
from datetime import datetime
import moment
# Regex list of lowercase months with characters after the first three made optional
MONTH_NAMES = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
'september', 'october', 'november', 'december']
MONTHS = [m[:3]+"(?:"+m[3:]+")?" if len(m) > 3 else m[:3] for m in MONTH_NAMES]
# Regex list of lowercase weekdays with characters after the first three made optional
DAY_NAMES = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']
WEEKDAYS = [d[:3]+"(?:"+d[3:]+")?" for d in DAY_NAMES]
# Acceptable format tokens mapped to what they should match in the date string
# Ordered so that larger configurations are matched first
DATE_TOKENS = OrderedDict([
("HH", r"(?P<H>\d{1,2})"), # 24 hr
("H", r"(?P<H>\d{1,2})"),
("hh", r"(?P<h>\d{1,2})"), # 12 hr
("h", r"(?P<h>\d{1,2})"),
("mm", r"(?P<m>\d{1,2})"), # min
("m", r"(?P<m>\d{1,2})"),
("A", r"(?P<A>[ap]m?)"), # am/pm
("a", r"(?P<A>[ap]m?)"),
("ss", r"(?P<s>\d{1,2})"), # sec
("s", r"(?P<s>\d{1,2})"),
("SSSSSS", r"(?P<S>\d{1,6})"), # fractional second
("SSSSS", r"(?P<S>\d{1,6})"),
("SSSS", r"(?P<S>\d{1,6})"),
("SSS", r"(?P<S>\d{1,6})"),
("SS", r"(?P<S>\d{1,6})"),
("S", r"(?P<S>\d{1,6})"),
("YYYY", r"(?P<YY>\d{4}|\d{2})"), # 4 or 2 digit year
("YY", r"(?P<YY>\d{2})"), # 2 digit year
("MMMM", r"(?P<MMM>" + ("|".join(MONTHS)) + ")"), # month name, abbr or not
("MMM", r"(?P<MMM>" + ("|".join(MONTHS)) + ")"),
("MM", r"(?P<M>\d{1,2})"), # month num
("M", r"(?P<M>\d{1,2})"),
("DD", r"(?P<D>\d{1,2})"), # day num
("Do", r"(?P<D>\d{1,2})(st|nd|rd|th)"),
("D", r"(?P<D>\d{1,2})"),
("dddd", r"(" + ("|".join(WEEKDAYS)) + ")"), # day name, abbr or not (ignored)
("ddd", r"(" + ("|".join(WEEKDAYS)) + ")")
])
DATE_TOKENS_REGEX = re.compile("("+("|".join(DATE_TOKENS))+")")
# List of separators to replace and match any standard date/time separators
SEP = r"[\s/.\-:,]*"
SEP_REGEX = re.compile(SEP)
SEP_REPLACEMENT = SEP.replace("\\", "\\\\")
# Maps date parse format to compile regex
FORMAT_CACHE = {}
# Parses date_string using parse_format in the style of moment.js
# See: http://momentjs.com/docs/#/parsing
# Supports the following tokens:
# H HH 0..23 24 hour time
# h hh 1..12 12 hour time used with a A.
# a A am pm Post or ante meridiem
# m mm 0..59 Minutes
# s ss 0..59 Seconds
# S SS SSS 0..999 Fractional seconds
# YYYY 2014 4 or 2 digit year
# YY 14 2 digit year
# M MM 1..12 Month number
# MMM MMMM Jan..December Month name in locale set by moment.locale()
# D DD 1..31 Day of month
# Do 1st..31st Day of month with ordinal
def parse(date_string, parse_format, zonelabel='UTC', override_current_date=None):
"""Parse a date string via a moment.js style parse format and a timezone string.
Supported tokens are documented above. Returns seconds since epoch"""
if parse_format in FORMAT_CACHE:
# Check if parse_format has been cache, and retrieve if so
parser = FORMAT_CACHE[parse_format]
else:
# e.g. "MM-YY" -> "(?P<mm>\d{1,2})-(?P<yy>\d{2})"
# Note that DATE_TOKENS is ordered so that the longer letter chains are recognized first
tokens = DATE_TOKENS_REGEX.split(parse_format)
tokens = [DATE_TOKENS[t] if t in DATE_TOKENS else SEP_REGEX.sub(SEP_REPLACEMENT, t)
for t in tokens]
# Compile new token string ignoring case (for month names)
parser = re.compile(''.join(tokens), re.I)
FORMAT_CACHE[parse_format] = parser
match = parser.match(date_string)
# Throw error if matching failed
if match is None:
raise Exception("Failed to parse %s with %s" % (date_string, parse_format))
# Create datetime from the results of parsing
current_date = override_current_date or moment.CURRENT_DATE
m = match.groupdict()
dt = datetime(
year=getYear(m, current_date.year),
month=getMonth(m, current_date.month),
day=int(m['D']) if ('D' in m) else current_date.day,
hour=getHour(m),
minute=int(m['m']) if ('m' in m) else 0,
second=int(m['s']) if ('s' in m) else 0,
microsecond=getMicrosecond(m)
)
# Parses the datetime with the given timezone to return the seconds since EPOCH
return moment.tz(dt, zonelabel).timestamp_s()
def getYear(match_dict, current_year):
if 'YYYY' in match_dict:
return int(match_dict['YYYY'])
elif 'YY' in match_dict:
match = match_dict['YY']
if len(match) == 2:
# Must guess on the century, choose so the result is closest to the current year
# The first year that could be meant by YY is the current year - 50.
first = current_year - 50
# We are seeking k such that 100k + YY is between first and first + 100.
# first <= 100k + YY < first + 100
# 0 <= 100k + YY - first < 100
# The value inside the comparison operators is precisely (YY - first) % 100.
# So we can calculate the century 100k as (YY - first) % 100 - (YY - first).
return first + (int(match) - first) % 100
else:
return int(match)
else:
return current_year
def getMonth(match_dict, current_month):
if 'M' in match_dict:
return int(match_dict['M'])
elif 'MMM' in match_dict:
return lazy_index(MONTHS, match_dict['MMM'][:3].lower()) + 1
else:
return current_month
def getHour(match_dict):
if 'H' in match_dict:
return int(match_dict['H'])
elif 'h' in match_dict:
hr = int(match_dict['h']) % 12
merid = 12 if 'A' in match_dict and match_dict['A'][0] == "p" else 0
return hr + merid
else:
return 0
def getMicrosecond(match_dict):
if 'S' in match_dict:
match = match_dict['S']
return int(match + ("0"*(6-len(match))) if len(match) < 6 else match[:6])
else:
return 0
# Gets the index of the first string from iter that starts with startswith
def lazy_index(l, startswith, missing=None):
for i, token in enumerate(l):
if token[:len(startswith)] == startswith:
return i
return missing

View File

@@ -1,7 +1,6 @@
from datetime import datetime, date, timedelta
import unittest
import moment
import moment_parse
# Helpful strftime() format that imcludes all parts of the date including the time zone.
fmt = "%Y-%m-%d %H:%M:%S %Z"
@@ -60,78 +59,6 @@ class TestMoment(unittest.TestCase):
[datetime(2037, 11, 1, 1, 0, 0, 0), 2140675200000, "PDT", 420, 1, 0],
]
parse_samples = [
# Basic set
['MM-DD-YYYY', '12-02-1999', 944092800.000000],
['DD-MM-YYYY', '12-02-1999', 918777600.000000],
['DD/MM/YYYY', '12/02/1999', 918777600.000000],
['DD_MM_YYYY', '12_02_1999', 918777600.000000],
['DD:MM:YYYY', '12:02:1999', 918777600.000000],
['D-M-YY', '2-2-99', 917913600.000000],
['YY', '99', 922060800.000000],
['DD-MM-YYYY h:m:s', '12-02-1999 2:45:10', 918787510.000000],
['DD-MM-YYYY h:m:s a', '12-02-1999 2:45:10 am', 918787510.000000],
['DD-MM-YYYY h:m:s a', '12-02-1999 2:45:10 pm', 918830710.000000],
['h:mm a', '12:00 pm', 1458648000.000000],
['h:mm a', '12:30 pm', 1458649800.000000],
['h:mm a', '12:00 am', 1458604800.000000],
['h:mm a', '12:30 am', 1458606600.000000],
['HH:mm', '12:00', 1458648000.000000],
['YYYY-MM-DDTHH:mm:ss', '2011-11-11T11:11:11', 1321009871.000000],
['ddd MMM DD HH:mm:ss YYYY', 'Tue Apr 07 22:52:51 2009', 1239144771.000000],
['ddd MMMM DD HH:mm:ss YYYY', 'Tue April 07 22:52:51 2009', 1239144771.000000],
['HH:mm:ss', '12:00:00', 1458648000.000000],
['HH:mm:ss', '12:30:00', 1458649800.000000],
['HH:mm:ss', '00:00:00', 1458604800.000000],
['HH:mm:ss S', '00:30:00 1', 1458606600.100000],
['HH:mm:ss SS', '00:30:00 12', 1458606600.120000],
['HH:mm:ss SSS', '00:30:00 123', 1458606600.123000],
['HH:mm:ss S', '00:30:00 7', 1458606600.700000],
['HH:mm:ss SS', '00:30:00 78', 1458606600.780000],
['HH:mm:ss SSS', '00:30:00 789', 1458606600.789000],
# Dropped m
['MM/DD/YYYY h:m:s a', '05/1/2012 12:25:00 p', 1335875100.000000],
['MM/DD/YYYY h:m:s a', '05/1/2012 12:25:00 a', 1335831900.000000],
# 2 digit year with YYYY
['D/M/YYYY', '9/2/99', 918518400.000000],
['D/M/YYYY', '9/2/1999', 918518400.000000],
['D/M/YYYY', '9/2/66', -122860800.000000],
['D/M/YYYY', '9/2/65', 3001363200.000000],
# No separators
['MMDDYYYY', '12021999', 944092800.000000],
['DDMMYYYY', '12021999', 918777600.000000],
['YYYYMMDD', '19991202', 944092800.000000],
['DDMMMYYYY', '10Sep2001', 1000080000.000000],
# Error forgiveness
['MM/DD/YYYY', '12-02-1999', 944092800.000000],
['DD/MM/YYYY', '12/02 /1999', 918777600.000000],
['DD:MM:YYYY', '12:02 :1999', 918777600.000000],
['D-M-YY', '2 2 99', 917913600.000000],
['DD-MM-YYYY h:m:s', '12-02-1999 2:45:10.00', 918787510.000000],
['h:mm a', '12:00pm', 1458648000.000000],
['HH:mm', '1200', 1458648000.000000],
['dddd MMMM DD HH:mm:ss YYYY', 'Tue Apr 7 22:52:51 2009', 1239144771.000000],
['ddd MMM DD HH:mm:ss YYYY', 'Tuesday April 7 22:52:51 2009', 1239144771.000000],
['ddd MMM Do HH:mm:ss YYYY', 'Tuesday April 7th 22:52:51 2009', 1239144771.000000]
]
parse_timezone_samples = [
# Timezone corner cases
['MM-DD-YYYY h:ma', '3-13-2016 1:59am', 'America/New_York', 1457852340], # EST
['MM-DD-YYYY h:ma', '3-13-2016 2:00am', 'America/New_York', 1457848800], # Invalid, -1hr
['MM-DD-YYYY h:ma', '3-13-2016 2:59am', 'America/New_York', 1457852340], # Invalid, -1hr
['MM-DD-YYYY h:ma', '3-13-2016 3:00am', 'America/New_York', 1457852400], # EDT
['MM-DD-YYYY h:ma', '3-13-2016 1:59am', 'America/Los_Angeles', 1457863140], # PST
['MM-DD-YYYY h:ma', '3-13-2016 2:00am', 'America/Los_Angeles', 1457859600], # Invalid, -1hr
['MM-DD-YYYY h:ma', '3-13-2016 2:59am', 'America/Los_Angeles', 1457863140], # Invalid, -1hr
['MM-DD-YYYY h:ma', '3-13-2016 3:00am', 'America/Los_Angeles', 1457863200] # PDT
]
def assertMatches(self, data_entry, moment_obj):
date, timestamp, abbr, offset, hour, minute = data_entry
dt = moment_obj.datetime()
@@ -183,12 +110,6 @@ class TestMoment(unittest.TestCase):
self.assertEqual(dt.tzname(), abbr)
self.assertEqual(dt.utcoffset(), timedelta(minutes=-offset))
def test_parse(self):
for s in self.parse_samples:
self.assertEqual(moment_parse.parse(s[1], s[0], 'UTC', date(2016, 3, 22)), s[2])
for s in self.parse_timezone_samples:
self.assertEqual(moment_parse.parse(s[1], s[0], s[2], date(2016, 3, 22)), s[3])
def test_ts_to_dt(self):
# Verify that ts_to_dt works as expected.
value_sec = 1426291200 # 2015-03-14 00:00:00 in UTC

View File

@@ -64,7 +64,7 @@ class TestRefListRelation(test_engine.EngineTestCase):
self.apply_user_action(
['AddColumn', 'TableC', 'gristHelper_Transform', {
"type": 'Ref:TableA', "isFormula": True,
"formula": "grist.Reference.typeConvert($ColB, TableA, 'ColA')", "visibleCol": 2,
"formula": "TableA.lookupOne(ColA=$ColB)", "visibleCol": 2,
}])
self.apply_user_action(
['SetDisplayFormula', 'TableC', None, 7, '$gristHelper_Transform.ColA'])

View File

@@ -1037,7 +1037,13 @@ class UserActions(object):
if not clean_colinfo["isFormula"]:
raise ValueError("AddColumn: cannot add a non-formula column to a summary table")
transform = col_id is not None and col_id.startswith('gristHelper_Transform')
transform = (
col_id is not None and
col_id.startswith((
'gristHelper_Transform',
'gristHelper_Converted',
))
)
if transform:
# Delete any currently existing transform columns with the same id
@@ -1256,6 +1262,29 @@ class UserActions(object):
finally:
self._engine.out_actions.undo.append(mod_action)
@useraction
def ConvertFromColumn(self, table_id, src_col_id, dst_col_id, typ, widgetOptions, visibleColRef):
from sandbox import call_external
table = self._engine.tables[table_id]
src_col = self._docmodel.get_column_rec(table_id, src_col_id)
src_column = table.get_column(src_col_id)
row_ids = list(table.row_ids)
src_values = [encode_object(src_column.raw_get(r)) for r in row_ids]
display_values = None
if src_col.displayCol:
display_col = table.get_column(src_col.displayCol.colId)
display_values = [encode_object(display_col.raw_get(r)) for r in row_ids]
converted_values = call_external(
"convertFromColumn",
src_col.id,
typ,
widgetOptions,
visibleColRef,
src_values,
display_values,
)
self.ModifyColumn(table_id, dst_col_id, {"type": typ})
self.BulkUpdateRecord(table_id, row_ids, {dst_col_id: converted_values})
@useraction
def CopyFromColumn(self, table_id, src_col_id, dst_col_id, widgetOptions):

View File

@@ -138,17 +138,6 @@ class BaseColumnType(object):
return objtypes.safe_repr(value_to_convert)
# This is a user-facing method, hence the camel-case naming, as for `lookupRecords` and such.
@classmethod
def typeConvert(cls, value):
"""
Convert a value from a different type to something that this type can accept, as when
explicitly converting a column type. Note that usual conversion (such as converting numbers to
strings or vice versa) will still apply to the returned value.
"""
return value
class Text(BaseColumnType):
"""
Text is the type for a field holding string (text) data.
@@ -180,18 +169,6 @@ class Text(BaseColumnType):
def is_right_type(cls, value):
return isinstance(value, (six.string_types, NoneType))
@classmethod
def typeConvert(cls, value):
if value is None:
# When converting NULLs (that typically show up as a plain empty cell for Numeric or Date
# columns) to Text, it makes more sense to end up with a plain blank text cell.
return ''
elif isinstance(value, bool):
# Normalize True/False to true/false (Toggle columns use true/false).
return str(value).lower()
else:
return value
class Blob(BaseColumnType):
"""
@@ -302,13 +279,6 @@ class Date(Numeric):
def is_right_type(cls, value):
return isinstance(value, (float, six.integer_types, NoneType))
@classmethod
def typeConvert(cls, value, date_format, timezone='UTC'): # pylint: disable=arguments-differ
# Note: the timezone argument is used in DateTime conversions, allows sharing this method.
try:
return moment.parse(value, date_format, timezone)
except Exception:
return value
class DateTime(Date):
"""
@@ -370,21 +340,6 @@ class ChoiceList(BaseColumnType):
return value is None or (isinstance(value, (tuple, list)) and
all(isinstance(item, six.string_types) for item in value))
@classmethod
def typeConvert(cls, value):
if value is None:
return value
if isinstance(value, six.string_types) and not value.startswith('['):
# Try to parse as CSV. If this doesn't work, we'll still try usual conversions later.
try:
tags = next(csv.reader([value]))
return tuple(t.strip() for t in tags if t.strip())
except Exception:
pass
if not isinstance(value, (tuple, list)):
value = [Choice.typeConvert(value)]
return value
@classmethod
def toString(cls, value):
if isinstance(value, (tuple, list)):
@@ -458,13 +413,6 @@ class Reference(Id):
def typename(cls):
return "Ref"
@classmethod
def typeConvert(cls, value, ref_table, visible_col=None): # pylint: disable=arguments-differ
if value and ref_table and visible_col:
return ref_table.lookupOne(**{visible_col: value}) or six.text_type(value)
else:
return value
class ReferenceList(BaseColumnType):
"""
@@ -500,15 +448,6 @@ class ReferenceList(BaseColumnType):
return value is None or (isinstance(value, list) and
all(Reference.is_right_type(val) for val in value))
@classmethod
def typeConvert(cls, value, ref_table, visible_col=None): # noqa # pylint: disable=arguments-differ
# TODO this is based on Reference.typeConvert.
# It doesn't make much sense as a conversion but I don't know what would
if value and ref_table and visible_col:
return ref_table.lookupRecords(**{visible_col: value}) or six.text_type(value)
else:
return value
class Attachments(ReferenceList):
"""
@@ -516,8 +455,3 @@ class Attachments(ReferenceList):
"""
def __init__(self):
super(Attachments, self).__init__('_grist_Attachments')
@classmethod
def typeConvert(cls, value): # noqa # pylint: disable=arguments-differ
# Don't use ReferenceList.typeConvert which is called with a different number of arguments
return value