mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) Fix import parsing from choking up on Python isdigit() surprises
Summary: Python isdigit() returns true for unicode characters such as "²", which fail when used as an argument to int(). Instead, be explicit about only considering characters 0-9 to be digits. Test Plan: Added a test case which produces an error without this change. Reviewers: alexmojaki Reviewed By: alexmojaki Differential Revision: https://phab.getgrist.com/D3027
This commit is contained in:
parent
cd241a633a
commit
64d9faed5a
@ -63,39 +63,44 @@ TZ_VALID_NAMES = {z[0] for z in moment.get_tz_data().items()}
|
|||||||
AM_PM = {'am', 'pm'}
|
AM_PM = {'am', 'pm'}
|
||||||
DAYS_OF_WEEK_NAME = calendar.day_name
|
DAYS_OF_WEEK_NAME = calendar.day_name
|
||||||
DAYS_OF_WEEK_ABBR = calendar.day_abbr
|
DAYS_OF_WEEK_ABBR = calendar.day_abbr
|
||||||
|
ASCII_DIGITS_RE = re.compile(r'^[0-9]+$')
|
||||||
|
|
||||||
|
# Using x.isdigit() matches strings like u'\xb2' (superscripts) which we don't want.
|
||||||
|
# Use isdigit(x) instead, to only match ASCII digits 0-9.
|
||||||
|
isdigit = ASCII_DIGITS_RE.match
|
||||||
|
|
||||||
DATE_ELEMENTS = [
|
DATE_ELEMENTS = [
|
||||||
# Name Pattern Predicate Group (mutual exclusive) Consumes N prev elements
|
# Name Pattern Predicate Group (mutual exclusive) Consumes N prev elements
|
||||||
("Year", "%Y", lambda x, p, v: x.isdigit() and len(x) == 4, "Y", 0),
|
("Year", "%Y", lambda x, p, v: isdigit(x) and len(x) == 4, "Y", 0),
|
||||||
("Year short", "%y", lambda x, p, v: x.isdigit() and len(x) == 2, "Y", 0),
|
("Year short", "%y", lambda x, p, v: isdigit(x) and len(x) == 2, "Y", 0),
|
||||||
("Month", "%m", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 < int(x) <= 12, "m", 0),
|
("Month", "%m", lambda x, p, v: isdigit(x) and len(x) <= 2 and 0 < int(x) <= 12, "m", 0),
|
||||||
("Month name full", "%B", lambda x, p, v: x.isalpha() and x.capitalize() in MONTH_NAME, "m", 0),
|
("Month name full", "%B", lambda x, p, v: x.isalpha() and x.capitalize() in MONTH_NAME, "m", 0),
|
||||||
("Month name abbr", "%b", lambda x, p, v: x.isalpha() and x.capitalize() in MONTH_ABBR, "m", 0),
|
("Month name abbr", "%b", lambda x, p, v: x.isalpha() and x.capitalize() in MONTH_ABBR, "m", 0),
|
||||||
("Day", "%d", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 < int(x) <= 31, "d", 0),
|
("Day", "%d", lambda x, p, v: isdigit(x) and len(x) <= 2 and 0 < int(x) <= 31, "d", 0),
|
||||||
("Day of week", "%A", lambda x, p, v: x.isalpha()
|
("Day of week", "%A", lambda x, p, v: x.isalpha()
|
||||||
and x.capitalize() in DAYS_OF_WEEK_NAME, "a", 0),
|
and x.capitalize() in DAYS_OF_WEEK_NAME, "a", 0),
|
||||||
("Day of week abbr", "%a", lambda x, p, v: x.isalpha()
|
("Day of week abbr", "%a", lambda x, p, v: x.isalpha()
|
||||||
and x.capitalize() in DAYS_OF_WEEK_ABBR, "a", 0),
|
and x.capitalize() in DAYS_OF_WEEK_ABBR, "a", 0),
|
||||||
|
|
||||||
("Compound HHMMSS", "%H%M%S", lambda x, p, v: x.isdigit() and len(x) == 6
|
("Compound HHMMSS", "%H%M%S", lambda x, p, v: isdigit(x) and len(x) == 6
|
||||||
and 0 <= int(x[0:2]) < 24
|
and 0 <= int(x[0:2]) < 24
|
||||||
and 0 <= int(x[2:4]) < 60
|
and 0 <= int(x[2:4]) < 60
|
||||||
and 0 <= int(x[4:6]) < 60, "HMS", 0),
|
and 0 <= int(x[4:6]) < 60, "HMS", 0),
|
||||||
|
|
||||||
("Hour", "%H", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 <= int(x) <= 23, "H", 0),
|
("Hour", "%H", lambda x, p, v: isdigit(x) and len(x) <= 2 and 0 <= int(x) <= 23, "H", 0),
|
||||||
("Hour in 12hr mode", "%I", lambda x, p, v: x.isdigit() and len(x) <= 2
|
("Hour in 12hr mode", "%I", lambda x, p, v: isdigit(x) and len(x) <= 2
|
||||||
and 0 <= int(x) <= 11, "H", 0),
|
and 0 <= int(x) <= 11, "H", 0),
|
||||||
("AM/PM", "%p", lambda x, p, v: x.isalpha() and len(x) == 2 and x.lower() in AM_PM, "p", 0),
|
("AM/PM", "%p", lambda x, p, v: x.isalpha() and len(x) == 2 and x.lower() in AM_PM, "p", 0),
|
||||||
("Minutes", "%M", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 <= int(x) <= 59, "M", 0),
|
("Minutes", "%M", lambda x, p, v: isdigit(x) and len(x) <= 2 and 0 <= int(x) <= 59, "M", 0),
|
||||||
("Seconds", "%S", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 <= int(x) <= 59, "S", 0),
|
("Seconds", "%S", lambda x, p, v: isdigit(x) and len(x) <= 2 and 0 <= int(x) <= 59, "S", 0),
|
||||||
("Fraction of second", "%f", lambda x, p, v: x.isdigit() and p is not None
|
("Fraction of second", "%f", lambda x, p, v: isdigit(x) and p is not None
|
||||||
and p.val == '.', "f", 0),
|
and p.val == '.', "f", 0),
|
||||||
("Timezone name", "%Z", lambda x, p, v: x.isalpha() and len(x) > 2
|
("Timezone name", "%Z", lambda x, p, v: x.isalpha() and len(x) > 2
|
||||||
and x in TZ_VALID_NAMES, "Z", 0),
|
and x in TZ_VALID_NAMES, "Z", 0),
|
||||||
("Timezone +HHMM", "%z", lambda x, p, v: x.isdigit() and len(x) == 4 and 0 <= int(x[0:2]) < 15
|
("Timezone +HHMM", "%z", lambda x, p, v: isdigit(x) and len(x) == 4 and 0 <= int(x[0:2]) < 15
|
||||||
and 0 <= int(x[2:4]) < 60 and p is not None
|
and 0 <= int(x[2:4]) < 60 and p is not None
|
||||||
and p.val == '+', "Z", 1),
|
and p.val == '+', "Z", 1),
|
||||||
("Timezone -HHMM", "%z", lambda x, p, v: x.isdigit() and len(x) == 4 and 0 <= int(x[0:2]) < 15
|
("Timezone -HHMM", "%z", lambda x, p, v: isdigit(x) and len(x) == 4 and 0 <= int(x[0:2]) < 15
|
||||||
and 0 <= int(x[2:4]) < 60 and p is not None
|
and 0 <= int(x[2:4]) < 60 and p is not None
|
||||||
and p.val == '-', "Z", 1),
|
and p.val == '-', "Z", 1),
|
||||||
]
|
]
|
||||||
|
2
sandbox/grist/imports/fixtures/test_isdigit.csv
Normal file
2
sandbox/grist/imports/fixtures/test_isdigit.csv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
PHONE,VALUE,DATE
|
||||||
|
201-¾᠓𑄺꤈꤈꧐꤆,¹5,2018-0²-27 16:08:39 +0000
|
|
@ -336,6 +336,14 @@ class TestImportCSV(unittest.TestCase):
|
|||||||
self._check_col(sheet, 0, "ID", "Int", [17])
|
self._check_col(sheet, 0, "ID", "Int", [17])
|
||||||
self._check_col(sheet, 1, "LongText", "Text", [long_cell])
|
self._check_col(sheet, 1, "LongText", "Text", [long_cell])
|
||||||
|
|
||||||
|
def test_csv_with_surprising_isdigit(self):
|
||||||
|
parsed_file = import_csv.parse_file(_get_fixture('test_isdigit.csv'), parse_options='')
|
||||||
|
sheet = parsed_file[1][0]
|
||||||
|
self._check_num_cols(sheet, 3)
|
||||||
|
self._check_col(sheet, 0, "PHONE", "Text", [u'201-¾᠓𑄺꤈꤈꧐꤆'])
|
||||||
|
self._check_col(sheet, 1, "VALUE", "Text", [u'¹5'])
|
||||||
|
self._check_col(sheet, 2, "DATE", "Text", [u'2018-0²-27 16:08:39 +0000'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Loading…
Reference in New Issue
Block a user