# -*- coding: UTF-8 -*- import datetime import numbers import re import dateutil.parser import phonenumbers import six from six import unichr # pylint: disable=redefined-builtin from six.moves import xrange from usertypes import AltText # pylint: disable=import-error from .math import ROUND from .unimplemented import unimplemented def CHAR(table_number): """ Convert a number into a character according to the current Unicode table. Same as `unichr(number)`. >>> CHAR(65) u'A' >>> CHAR(33) u'!' """ return unichr(table_number) # See http://stackoverflow.com/a/93029/328565 _control_chars = ''.join(map(unichr, list(xrange(0,32)) + list(xrange(127,160)))) _control_char_re = re.compile('[%s]' % re.escape(_control_chars)) def CLEAN(text): """ Returns the text with the non-printable characters removed. This removes both characters with values 0 through 31, and other Unicode characters in the "control characters" category. >>> CLEAN(CHAR(9) + "Monthly report" + CHAR(10)) u'Monthly report' """ return _control_char_re.sub('', text) def CODE(string): """ Returns the numeric Unicode map value of the first character in the string provided. Same as `ord(string[0])`. >>> CODE("A") 65 >>> CODE("!") 33 >>> CODE("!A") 33 """ return ord(string[0]) def CONCATENATE(string, *more_strings): u""" Joins together any number of text strings into one string. Also available under the name `CONCAT`. Similar to the Python expression `"".join(array_of_strings)`. >>> CONCATENATE("Stream population for ", "trout", " ", "species", " is ", 32, "/mile.") u'Stream population for trout species is 32/mile.' >>> CONCATENATE("In ", 4, " days it is ", datetime.date(2016,1,1)) u'In 4 days it is 2016-01-01' >>> CONCATENATE("abc") u'abc' >>> CONCATENATE(0, "abc") u'0abc' >>> assert CONCATENATE(2, u" crème ", u"brûlée") == u'2 crème brûlée' >>> assert CONCATENATE(2, " crème ", u"brûlée") == u'2 crème brûlée' >>> assert CONCATENATE(2, " crème ", "brûlée") == u'2 crème brûlée' """ return u''.join( val.decode('utf8') if isinstance(val, six.binary_type) else # pylint:disable=no-member six.text_type(val) for val in (string,) + more_strings ) def CONCAT(string, *more_strings): """ Joins together any number of text strings into one string. Also available under the name `CONCATENATE`. Similar to the Python expression `"".join(array_of_strings)`. >>> CONCAT("Stream population for ", "trout", " ", "species", " is ", 32, "/mile.") u'Stream population for trout species is 32/mile.' >>> CONCAT("In ", 4, " days it is ", datetime.date(2016,1,1)) u'In 4 days it is 2016-01-01' >>> CONCAT("abc") u'abc' >>> CONCAT(0, "abc") u'0abc' >>> assert CONCAT(2, u" crème ", u"brûlée") == u'2 crème brûlée' """ return CONCATENATE(string, *more_strings) def DOLLAR(number, decimals=2): """ Formats a number into a formatted dollar amount, with decimals rounded to the specified place (. If decimals value is omitted, it defaults to 2. >>> DOLLAR(1234.567) '$1,234.57' >>> DOLLAR(1234.567, -2) '$1,200' >>> DOLLAR(-1234.567, -2) '($1,200)' >>> DOLLAR(-0.123, 4) '($0.1230)' >>> DOLLAR(99.888) '$99.89' >>> DOLLAR(0) '$0.00' >>> DOLLAR(10, 0) '$10' """ formatted = "${:,.{}f}".format(ROUND(abs(number), decimals), max(0, decimals)) return formatted if number >= 0 else "(" + formatted + ")" def EXACT(string1, string2): """ Tests whether two strings are identical. Same as `string2 == string2`. >>> EXACT("word", "word") True >>> EXACT("Word", "word") False >>> EXACT("w ord", "word") False """ return string1 == string2 def FIND(find_text, within_text, start_num=1): """ Returns the position at which a string is first found within text. Find is case-sensitive. The returned position is 1 if within_text starts with find_text. Start_num specifies the character at which to start the search, defaulting to 1 (the first character of within_text). If find_text is not found, or start_num is invalid, raises ValueError. >>> FIND("M", "Miriam McGovern") 1 >>> FIND("m", "Miriam McGovern") 6 >>> FIND("M", "Miriam McGovern", 3) 8 >>> FIND(" #", "Hello world # Test") 12 >>> FIND("gle", "Google", 1) 4 >>> FIND("GLE", "Google", 1) Traceback (most recent call last): ... ValueError: substring not found >>> FIND("page", "homepage") 5 >>> FIND("page", "homepage", 6) Traceback (most recent call last): ... ValueError: substring not found """ return within_text.index(find_text, start_num - 1) + 1 def FIXED(number, decimals=2, no_commas=False): """ Formats a number with a fixed number of decimal places (2 by default), and commas. If no_commas is True, then omits the commas. >>> FIXED(1234.567, 1) '1,234.6' >>> FIXED(1234.567, -1) '1,230' >>> FIXED(-1234.567, -1, True) '-1230' >>> FIXED(44.332) '44.33' >>> FIXED(3521.478, 2, False) '3,521.48' >>> FIXED(-3521.478, 1, True) '-3521.5' >>> FIXED(3521.478, 0, True) '3521' >>> FIXED(3521.478, -2, True) '3500' """ comma_flag = '' if no_commas else ',' return "{:{}.{}f}".format(ROUND(number, decimals), comma_flag, max(0, decimals)) def LEFT(string, num_chars=1): """ Returns a substring of length num_chars from the beginning of the given string. If num_chars is omitted, it is assumed to be 1. Same as `string[:num_chars]`. >>> LEFT("Sale Price", 4) 'Sale' >>> LEFT('Swededn') 'S' >>> LEFT('Text', -1) Traceback (most recent call last): ... ValueError: num_chars invalid """ if num_chars < 0: raise ValueError("num_chars invalid") return string[:num_chars] def LEN(text): """ Returns the number of characters in a text string, or the number of items in a list. Same as [`len`](https://docs.python.org/3/library/functions.html#len) in python. See [Record Set](#recordset) for an example of using `len` on a list of records. >>> LEN("Phoenix, AZ") 11 >>> LEN("") 0 >>> LEN(" One ") 11 """ return len(text) def LOWER(text): """ Converts a specified string to lowercase. Same as `text.lower()`. >>> LOWER("E. E. Cummings") 'e. e. cummings' >>> LOWER("Apt. 2B") 'apt. 2b' """ return text.lower() def MID(text, start_num, num_chars): """ Returns a segment of a string, starting at start_num. The first character in text has start_num 1. >>> MID("Fluid Flow", 1, 5) 'Fluid' >>> MID("Fluid Flow", 7, 20) 'Flow' >>> MID("Fluid Flow", 20, 5) '' >>> MID("Fluid Flow", 0, 5) Traceback (most recent call last): ... ValueError: start_num invalid """ if start_num < 1: raise ValueError("start_num invalid") return text[start_num - 1 : start_num - 1 + num_chars] output_formats = { "+": phonenumbers.PhoneNumberFormat.INTERNATIONAL, "INTL": phonenumbers.PhoneNumberFormat.INTERNATIONAL, "#": phonenumbers.PhoneNumberFormat.NATIONAL, "NATL": phonenumbers.PhoneNumberFormat.NATIONAL, "*": phonenumbers.PhoneNumberFormat.E164, "E164": phonenumbers.PhoneNumberFormat.E164, "tel": phonenumbers.PhoneNumberFormat.RFC3966, "RFC3966": phonenumbers.PhoneNumberFormat.RFC3966, } def PHONE_FORMAT(value, country=None, format=None): # pylint: disable=redefined-builtin """ Formats a phone number. With no optional arguments, the number must start with "+" and the international dialing prefix, and will be formatted as an international number, e.g. `+12345678901` becomes `+1 234-567-8901`. The `country` argument allows specifying a 2-letter country code (e.g. "US" or "GB") for interpreting phone numbers that don't start with "+". E.g. `PHONE_FORMAT('2025555555', 'US')` would be seen as a US number and formatted as "(202) 555-5555". Phone numbers that start with "+" ignore `country`. E.g. `PHONE_FORMAT('+33555555555', 'US')` is a French number because '+33' is the international prefix for France. The `format` argument specifies the output format, according to this table: - `"#"` or `"NATL"` (default) - use the national format, without the international dialing prefix, when possible. E.g. `(234) 567-8901` for "US", or `02 34 56 78 90` for "FR". If `country` is omitted, or the number does not correspond to the given country, the international format is used instead. - `"+"` or `"INTL"` - international format, e.g. `+1 234-567-8901` or `+33 2 34 56 78 90`. - `"*"` or `"E164"` - E164 format, like international but with no separators, e.g. `+12345678901`. - `"tel"` or `"RFC3966"` - format suitable to use as a [hyperlink](col-types.md#hyperlinks), e.g. 'tel:+1-234-567-8901'. When specifying the `format` argument, you may omit the `country` argument. I.e. `PHONE_FORMAT(value, "tel")` is equivalent to `PHONE_FORMAT(value, None, "tel")`. For more details, see the [phonenumbers](https://github.com/daviddrysdale/python-phonenumbers) Python library, which underlies this function. >>> PHONE_FORMAT("+12345678901") u'+1 234-567-8901' >>> PHONE_FORMAT("2345678901", "US") u'(234) 567-8901' >>> PHONE_FORMAT("2345678901", "GB") u'023 4567 8901' >>> PHONE_FORMAT("2345678901", "GB", "+") u'+44 23 4567 8901' >>> PHONE_FORMAT("+442345678901", "GB") u'023 4567 8901' >>> PHONE_FORMAT("+12345678901", "GB") u'+1 234-567-8901' >>> PHONE_FORMAT("(234) 567-8901") # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... NumberParseException: (0) Missing or invalid default region. >>> PHONE_FORMAT("(234)567 89-01", "US", "tel") u'tel:+1-234-567-8901' >>> PHONE_FORMAT("2/3456/7890", "FR", '#') u'02 34 56 78 90' >>> PHONE_FORMAT("+33234567890", '#') u'+33 2 34 56 78 90' >>> PHONE_FORMAT("+33234567890", 'tel') u'tel:+33-2-34-56-78-90' >>> PHONE_FORMAT("tel:+1-234-567-8901", country="US", format="*") u'+12345678901' """ if not value: return value if format is None and country in output_formats: format = country country = None parsed = phonenumbers.parse(str(value), country) out_fmt = output_formats.get(format or "#") if out_fmt is None: raise ValueError("Unrecognized phone format; try +, INTL, #, NATL, *, E164, tel, or RFC3966") if out_fmt == phonenumbers.PhoneNumberFormat.NATIONAL and not country: # With no country, we lose info in NATIONAL format (because numbers must be specified with an # international prefix, and the output would discard it). Use INTERNATIONAL instead. out_fmt = phonenumbers.PhoneNumberFormat.INTERNATIONAL result = phonenumbers.format_number(parsed, out_fmt) # If using a national format with a country, check that we don't garble numbers with a different # international prefix. If so, use an international format. E.g. for # PHONE_FORMAT('+12345678901', 'FR'), the output should include the US dialing prefix. if (out_fmt == phonenumbers.PhoneNumberFormat.NATIONAL and country and phonenumbers.parse(result, country) != parsed): result = phonenumbers.format_number(parsed, phonenumbers.PhoneNumberFormat.INTERNATIONAL) return result def PROPER(text): """ Capitalizes each word in a specified string. It converts the first letter of each word to uppercase, and all other letters to lowercase. Same as `text.title()`. >>> PROPER('this is a TITLE') 'This Is A Title' >>> PROPER('2-way street') '2-Way Street' >>> PROPER('76BudGet') '76Budget' """ return text.title() def REGEXEXTRACT(text, regular_expression): """ Extracts the first part of text that matches regular_expression. >>> REGEXEXTRACT("Google Doc 101", "[0-9]+") '101' >>> REGEXEXTRACT("The price today is $826.25", "[0-9]*\\.[0-9]+[0-9]+") '826.25' If there is a parenthesized expression, it is returned instead of the whole match. >>> REGEXEXTRACT("(Content) between brackets", "\\(([A-Za-z]+)\\)") 'Content' >>> REGEXEXTRACT("Foo", "Bar") Traceback (most recent call last): ... ValueError: REGEXEXTRACT text does not match """ m = re.search(regular_expression, text) if not m: raise ValueError("REGEXEXTRACT text does not match") return m.group(1) if m.lastindex else m.group(0) def REGEXMATCH(text, regular_expression): """ Returns whether a piece of text matches a regular expression. >>> REGEXMATCH("Google Doc 101", "[0-9]+") True >>> REGEXMATCH("Google Doc", "[0-9]+") False >>> REGEXMATCH("The price today is $826.25", "[0-9]*\\.[0-9]+[0-9]+") True >>> REGEXMATCH("(Content) between brackets", "\\(([A-Za-z]+)\\)") True >>> REGEXMATCH("Foo", "Bar") False """ return bool(re.search(regular_expression, text)) def REGEXREPLACE(text, regular_expression, replacement): """ Replaces all parts of text matching the given regular expression with replacement text. >>> REGEXREPLACE("Google Doc 101", "[0-9]+", "777") 'Google Doc 777' >>> REGEXREPLACE("Google Doc", "[0-9]+", "777") 'Google Doc' >>> REGEXREPLACE("The price is $826.25", "[0-9]*\\.[0-9]+[0-9]+", "315.75") 'The price is $315.75' >>> REGEXREPLACE("(Content) between brackets", "\\(([A-Za-z]+)\\)", "Word") 'Word between brackets' >>> REGEXREPLACE("Foo", "Bar", "Baz") 'Foo' """ return re.sub(regular_expression, replacement, text) def REPLACE(text, position, length, new_text): """ Replaces part of a text string with a different text string. Position is counted from 1. >>> REPLACE("abcdefghijk", 6, 5, "*") 'abcde*k' >>> REPLACE("2009", 3, 2, "10") '2010' >>> REPLACE('123456', 1, 3, '@') '@456' >>> REPLACE('foo', 1, 0, 'bar') 'barfoo' >>> REPLACE('foo', 0, 1, 'bar') Traceback (most recent call last): ... ValueError: position invalid """ if position < 1: raise ValueError("position invalid") return text[:position - 1] + new_text + text[position - 1 + length:] def REPT(text, number_times): """ Returns specified text repeated a number of times. Same as `text * number_times`. The result of the REPT function cannot be longer than 32767 characters, or it raises a ValueError. >>> REPT("*-", 3) '*-*-*-' >>> REPT('-', 10) '----------' >>> REPT('-', 0) '' >>> len(REPT('---', 10000)) 30000 >>> REPT('---', 11000) Traceback (most recent call last): ... ValueError: number_times invalid >>> REPT('-', -1) Traceback (most recent call last): ... ValueError: number_times invalid """ if number_times < 0 or len(text) * number_times > 32767: raise ValueError("number_times invalid") return text * int(number_times) def RIGHT(string, num_chars=1): """ Returns a substring of length num_chars from the end of a specified string. If num_chars is omitted, it is assumed to be 1. Same as `string[-num_chars:]`. >>> RIGHT("Sale Price", 5) 'Price' >>> RIGHT('Stock Number') 'r' >>> RIGHT('Text', 100) 'Text' >>> RIGHT('Text', -1) Traceback (most recent call last): ... ValueError: num_chars invalid """ if num_chars < 0: raise ValueError("num_chars invalid") return string[-num_chars:] def SEARCH(find_text, within_text, start_num=1): """ Returns the position at which a string is first found within text, ignoring case. Find is case-sensitive. The returned position is 1 if within_text starts with find_text. Start_num specifies the character at which to start the search, defaulting to 1 (the first character of within_text). If find_text is not found, or start_num is invalid, raises ValueError. >>> SEARCH("e", "Statements", 6) 7 >>> SEARCH("margin", "Profit Margin") 8 >>> SEARCH(" ", "Profit Margin") 7 >>> SEARCH('"', 'The "boss" is here.') 5 >>> SEARCH("gle", "Google") 4 >>> SEARCH("GLE", "Google") 4 """ # .lower() isn't always correct for unicode. See http://stackoverflow.com/a/29247821/328565 return within_text.lower().index(find_text.lower(), start_num - 1) + 1 def SUBSTITUTE(text, old_text, new_text, instance_num=None): u""" Replaces existing text with new text in a string. It is useful when you know the substring of text to replace. Use REPLACE when you know the position of text to replace. If instance_num is given, it specifies which occurrence of old_text to replace. If omitted, all occurrences are replaced. Same as `text.replace(old_text, new_text)` when instance_num is omitted. >>> SUBSTITUTE("Sales Data", "Sales", "Cost") u'Cost Data' >>> SUBSTITUTE("Quarter 1, 2008", "1", "2", 1) u'Quarter 2, 2008' >>> SUBSTITUTE("Quarter 1, 2011", "1", "2", 3) u'Quarter 1, 2012' More tests: >>> SUBSTITUTE("Hello world", "", "-") u'Hello world' >>> SUBSTITUTE("Hello world", " ", "-") u'Hello-world' >>> SUBSTITUTE("Hello world", " ", 12.1) u'Hello12.1world' >>> SUBSTITUTE(u"Hello world", u" ", 12.1) u'Hello12.1world' >>> SUBSTITUTE("Hello world", "world", "") u'Hello ' >>> SUBSTITUTE("Hello", "world", "") u'Hello' Overlapping matches are all counted when looking for instance_num. >>> SUBSTITUTE('abababab', 'abab', 'xxxx') u'xxxxxxxx' >>> SUBSTITUTE('abababab', 'abab', 'xxxx', 1) u'xxxxabab' >>> SUBSTITUTE('abababab', 'abab', 'xxxx', 2) u'abxxxxab' >>> SUBSTITUTE('abababab', 'abab', 'xxxx', 3) u'ababxxxx' >>> SUBSTITUTE('abababab', 'abab', 'xxxx', 4) u'abababab' >>> SUBSTITUTE('abababab', 'abab', 'xxxx', 0) Traceback (most recent call last): ... ValueError: instance_num invalid >>> SUBSTITUTE( "crème", "è", "e") u'creme' >>> SUBSTITUTE(u"crème", u"è", "e") u'creme' >>> SUBSTITUTE(u"crème", "è", "e") u'creme' >>> SUBSTITUTE( "crème", u"è", "e") u'creme' """ text = six.text_type(text) old_text = six.text_type(old_text) new_text = six.text_type(new_text) if not old_text: return text if instance_num is None: return text.replace(old_text, new_text) if instance_num <= 0: raise ValueError("instance_num invalid") # No trivial way to replace nth occurrence. i = -1 for c in xrange(instance_num): i = text.find(old_text, i + 1) if i < 0: return text return text[:i] + new_text + text[i + len(old_text):] def T(value): """ Returns value if value is text, or the empty string when value is not text. >>> T('Text') u'Text' >>> T(826) u'' >>> T('826') u'826' >>> T(False) u'' >>> T('100 points') u'100 points' >>> T(AltText('Text')) u'Text' >>> T(float('nan')) u'' """ return (value.decode('utf8') if isinstance(value, six.binary_type) else value if isinstance(value, six.text_type) else six.text_type(value) if isinstance(value, AltText) else u"") @unimplemented def TEXT(number, format_type): # pylint: disable=unused-argument """ Converts a number into text according to a specified format. It is not yet implemented in Grist. """ raise NotImplementedError() _trim_re = re.compile(r' +') def TRIM(text): """ Removes all spaces from text except for single spaces between words. Note that TRIM does not remove other whitespace such as tab or newline characters. >>> TRIM(" First Quarter\\n Earnings ") 'First Quarter\\n Earnings' >>> TRIM("") '' """ return _trim_re.sub(' ', text.strip()) def UPPER(text): """ Converts a specified string to uppercase. Same as `text.lower()`. >>> UPPER("e. e. cummings") 'E. E. CUMMINGS' >>> UPPER("Apt. 2B") 'APT. 2B' """ return text.upper() def VALUE(text): """ Converts a string in accepted date, time or number formats into a number or date. >>> VALUE("$1,000") 1000 >>> assert VALUE("16:48:00") - VALUE("12:00:00") == datetime.timedelta(0, 17280) >>> VALUE("01/01/2012") datetime.datetime(2012, 1, 1, 0, 0) >>> VALUE("") 0 >>> VALUE(0) 0 >>> VALUE("826") 826 >>> VALUE("-826.123123123") -826.123123123 >>> VALUE(float('nan')) nan >>> VALUE("Invalid") Traceback (most recent call last): ... ValueError: text cannot be parsed to a number >>> VALUE("13/13/13") Traceback (most recent call last): ... ValueError: text cannot be parsed to a number """ # This is not particularly robust, but makes an attempt to handle a number of cases: numbers, # including optional comma separators, dates/times, leading dollar-sign. if isinstance(text, (numbers.Number, datetime.date)): return text text = text.strip().lstrip('$') nocommas = text.replace(',', '') if nocommas == "": return 0 try: return int(nocommas) except ValueError: pass try: return float(nocommas) except ValueError: pass try: return dateutil.parser.parse(text) except ValueError: pass raise ValueError('text cannot be parsed to a number')