(core) Allow using MIN and MAX functions with dates/datetimes

Summary:
The MIN and MAX functions for formulas previously only considered numbers, ignoring other types, including dates. An example of this being a problem is here: https://community.getgrist.com/t/last-field-circularreferror-what-is-it/1114/4 . Using `MIN` on a column of dates would return 0 (the default) which gets converted to 1970-01-01. Users have to use `min` instead, which is confusing, and doesn't work when some values are empty.

This diff lets the functions operate on date and datetime values. A mixture of dates and datetimes is allowed, even though these cannot usually be compared in Python. Mixing dates and numbers will raise an exception.

Test Plan: Extended doctests

Reviewers: jarek, paulfitz

Reviewed By: jarek

Subscribers: paulfitz

Differential Revision: https://phab.getgrist.com/D3560
This commit is contained in:
Alex Hall 2022-08-04 14:24:23 +02:00
parent 787b70f0f6
commit 083a0ec000
2 changed files with 54 additions and 19 deletions

View File

@ -1,6 +1,8 @@
# pylint: disable=unused-argument # pylint: disable=unused-argument
from __future__ import absolute_import from __future__ import absolute_import
import datetime
import math as _math import math as _math
import operator import operator
import os import os
@ -38,6 +40,13 @@ def _chain_numeric_a(*values_or_iterables):
yield int(v) if ISLOGICAL(v) else v if ISNUMBER(v) else 0 yield int(v) if ISLOGICAL(v) else v if ISNUMBER(v) else 0
# Iterates through iterable or other arguments, only including numbers, dates, and datetimes.
def _chain_numeric_or_date(*values_or_iterables):
for v in _chain(*values_or_iterables):
if ISNUMBER(v) and not ISLOGICAL(v) or isinstance(v, (datetime.date, datetime.datetime)):
yield v
def _round_toward_zero(value): def _round_toward_zero(value):
return _math.floor(value) if value >= 0 else _math.ceil(value) return _math.floor(value) if value >= 0 else _math.ceil(value)

View File

@ -1,8 +1,9 @@
# pylint: disable=redefined-builtin, line-too-long, unused-argument # pylint: disable=redefined-builtin, line-too-long, unused-argument
import datetime
from .math import _chain, _chain_numeric, _chain_numeric_a from .math import _chain, _chain_numeric, _chain_numeric_a, _chain_numeric_or_date
from .info import ISNUMBER, ISLOGICAL from .info import ISNUMBER, ISLOGICAL
from .date import DATE # pylint: disable=unused-import from .date import DATE, DTIME # pylint: disable=unused-import
from .unimplemented import unimplemented from .unimplemented import unimplemented
def _average(iterable): def _average(iterable):
@ -127,21 +128,24 @@ def CORREL(data_y, data_x):
def COUNT(value, *more_values): def COUNT(value, *more_values):
""" """
Returns the count of numerical values in a dataset, ignoring non-numerical values. Returns the count of numerical and date/datetime values in a dataset,
ignoring other types of values.
Each argument may be a value or an array. Values that are not numbers, including logical Each argument may be a value or an array. Values that are not numbers or dates, including logical
and blank values, and text representations of numbers, are ignored. and blank values, and text representations of numbers, are ignored.
>>> COUNT([2, -1.0, 11]) >>> COUNT([2, -1.0, 11])
3 3
>>> COUNT([2, -1, 11, "Hello"]) >>> COUNT([2, -1, 11, "Hello"])
3 3
>>> COUNT([2, -1, "Hello", DATE(2015,1,1)], True, [False, "123", "", 11.5]) >>> COUNT([DATE(2000, 1, 1), DATE(2000, 1, 2), DATE(2000, 1, 3), "Hello"])
3 3
>>> COUNT([2, -1, "Hello", DATE(2015,1,1)], True, [False, "123", "", 11.5])
4
>>> COUNT(False, True) >>> COUNT(False, True)
0 0
""" """
return sum(1 for v in _chain_numeric(value, *more_values)) return sum(1 for _ in _chain_numeric_or_date(value, *more_values))
def COUNTA(value, *more_values): def COUNTA(value, *more_values):
@ -159,7 +163,7 @@ def COUNTA(value, *more_values):
>>> COUNTA(False, True) >>> COUNTA(False, True)
2 2
""" """
return sum(1 for v in _chain(value, *more_values)) return sum(1 for _ in _chain(value, *more_values))
@unimplemented @unimplemented
@ -267,24 +271,31 @@ def LOGNORMDIST(x, mean, standard_deviation):
def MAX(value, *more_values): def MAX(value, *more_values):
""" """
Returns the maximum value in a dataset, ignoring non-numerical values. Returns the maximum value in a dataset, ignoring values other than numbers and dates/datetimes.
Each argument may be a value or an array. Values that are not numbers, including logical Each argument may be a value or an array. Values that are not numbers or dates, including logical
and blank values, and text representations of numbers, are ignored. Returns 0 if the arguments and blank values, and text representations of numbers, are ignored. Returns 0 if the arguments
contain no numbers. contain no numbers or dates.
>>> MAX([2, -1.5, 11.5]) >>> MAX([2, -1.5, 11.5])
11.5 11.5
>>> MAX([2, -1.5, "Hello", DATE(2015, 1, 1)], True, [False, "123", "", 11.5]) >>> MAX([2, -1.5, "Hello"], True, [False, "123", "", 11.5])
11.5 11.5
>>> MAX(True, -123) >>> MAX(True, -123)
-123 -123
>>> MAX("123", -123) >>> MAX("123", -123)
-123 -123
>>> MAX("Hello", "123", DATE(2015, 1, 1)) >>> MAX("Hello", "123", True, False)
0 0
>>> MAX(DATE(2015, 1, 1), DATE(2015, 1, 2))
datetime.date(2015, 1, 2)
>>> MAX(DATE(2015, 1, 1), datetime.datetime(2015, 1, 1, 12, 34, 56))
datetime.datetime(2015, 1, 1, 12, 34, 56)
>>> MAX(DATE(2015, 1, 2), datetime.datetime(2015, 1, 1, 12, 34, 56))
datetime.date(2015, 1, 2)
""" """
return max(_default_if_empty(_chain_numeric(value, *more_values), 0)) values = _default_if_empty(_chain_numeric_or_date(value, *more_values), 0)
return max(values, key=_compare_date_datetime_key)
def MAXA(value, *more_values): def MAXA(value, *more_values):
@ -345,26 +356,41 @@ def MEDIAN(value, *more_values):
return values[(count - 1) // 2] return values[(count - 1) // 2]
def _compare_date_datetime_key(x):
# Convert dates and naive datetimes to timezone-aware datetimes for sorting.
if isinstance(x, (datetime.date, datetime.datetime)):
return DTIME(x)
else:
return x
def MIN(value, *more_values): def MIN(value, *more_values):
""" """
Returns the minimum value in a dataset, ignoring non-numerical values. Returns the minimum value in a dataset, ignoring values other than numbers and dates/datetimes.
Each argument may be a value or an array. Values that are not numbers, including logical Each argument may be a value or an array. Values that are not numbers or dates, including logical
and blank values, and text representations of numbers, are ignored. Returns 0 if the arguments and blank values, and text representations of numbers, are ignored. Returns 0 if the arguments
contain no numbers. contain no numbers or dates.
>>> MIN([2, -1.5, 11.5]) >>> MIN([2, -1.5, 11.5])
-1.5 -1.5
>>> MIN([2, -1.5, "Hello", DATE(2015, 1, 1)], True, [False, "123", "", 11.5]) >>> MIN([2, -1.5, "Hello"], True, [False, "123", "", 11.5])
-1.5 -1.5
>>> MIN(True, 123) >>> MIN(True, 123)
123 123
>>> MIN("-123", 123) >>> MIN("-123", 123)
123 123
>>> MIN("Hello", "123", DATE(2015, 1, 1)) >>> MIN("Hello", "123", True, False)
0 0
>>> MIN(DATE(2015, 1, 1), DATE(2015, 1, 2))
datetime.date(2015, 1, 1)
>>> MIN(DATE(2015, 1, 1), datetime.datetime(2015, 1, 1, 12, 34, 56))
datetime.date(2015, 1, 1)
>>> MIN(DATE(2015, 1, 2), datetime.datetime(2015, 1, 1, 12, 34, 56))
datetime.datetime(2015, 1, 1, 12, 34, 56)
""" """
return min(_default_if_empty(_chain_numeric(value, *more_values), 0)) values = _default_if_empty(_chain_numeric_or_date(value, *more_values), 0)
return min(values, key=_compare_date_datetime_key)
def MINA(value, *more_values): def MINA(value, *more_values):
""" """