diff --git a/app/common/NumberFormat.ts b/app/common/NumberFormat.ts index f9750e08..e388535f 100644 --- a/app/common/NumberFormat.ts +++ b/app/common/NumberFormat.ts @@ -35,8 +35,12 @@ export interface NumberFormatOptions extends FormatOptions { currency?: string; } +export function getCurrency(options: NumberFormatOptions, docSettings: DocumentSettings): string { + return options.currency || docSettings.currency || LocaleCurrency.getCurrency(docSettings.locale); +} + export function buildNumberFormat(options: NumberFormatOptions, docSettings: DocumentSettings): Intl.NumberFormat { - const currency = options.currency || docSettings.currency || LocaleCurrency.getCurrency(docSettings.locale); + const currency = getCurrency(options, docSettings); const nfOptions: Intl.NumberFormatOptions = parseNumMode(options.numMode, currency); // numSign is implemented outside of Intl.NumberFormat since the latter's similar 'currencySign' @@ -62,7 +66,7 @@ export function buildNumberFormat(options: NumberFormatOptions, docSettings: Doc return new Intl.NumberFormat(docSettings.locale, nfOptions); } -function parseNumMode(numMode?: NumMode, currency?: string): Intl.NumberFormatOptions { +export function parseNumMode(numMode?: NumMode, currency?: string): Intl.NumberFormatOptions { switch (numMode) { case 'currency': return {style: 'currency', currency, currencyDisplay: 'narrowSymbol' }; case 'decimal': return {useGrouping: true}; diff --git a/app/common/NumberParse.ts b/app/common/NumberParse.ts new file mode 100644 index 00000000..ab68da65 --- /dev/null +++ b/app/common/NumberParse.ts @@ -0,0 +1,201 @@ +/** + * Counterpart of NumberFormat.ts. + * Generic functionality for parsing numbers formatted by Intl.NumberFormat, + * not tied to documents or anything. + */ + +import { NumMode, parseNumMode } from 'app/common/NumberFormat'; +import escapeRegExp = require('lodash/escapeRegExp'); +import last = require('lodash/last'); + +// Possible values of Intl.NumberFormat.formatToParts[i].type +// Seems Intl.NumberFormatPartTypes is not quite complete +type NumberFormatPartTypes = Intl.NumberFormatPartTypes | 'exponentSeparator'; + +/** + * Returns a map converting the decimal digits used in the given formatter + * to the digits 0123456789. + * Excludes digits which don't need conversion, so for many locales this is empty. + */ +function getDigitsMap(locale: string) { + const formatter = Intl.NumberFormat(locale); + const result = new Map(); + for (let i = 0; i < 10; i++) { + const digit = String(i); + const localeDigit = formatter.format(i); + if (localeDigit !== digit) { + result.set(localeDigit, digit); + } + } + return result; +} + +export default class NumberParse { + // Regex for whitespace and some control characters we need to remove + // 200e = Left-to-right mark + // 200f = Right-to-left mark + // 061c = Arabic letter mark + public static readonly removeCharsRegex = /[\s\u200e\u200f\u061c]/g; + + // Many attributes are public for easy testing. + public readonly currencySymbol: string; + public readonly percentageSymbol: string; + public readonly digitGroupSeparator: string; + public readonly digitGroupSeparatorCurrency: string; + public readonly exponentSeparator: string; + public readonly decimalSeparator: string; + public readonly minusSign: string; + + public readonly digitsMap: Map; + + public readonly currencyEndsInMinusSign: boolean; + + private readonly _exponentSeparatorRegex: RegExp; + private readonly _digitGroupSeparatorRegex: RegExp; + + // Function which replaces keys of digitsMap (i.e. locale-specific digits) + // with corresponding digits from 0123456789. + private readonly _replaceDigits: (s: string) => string; + + constructor(locale: string, currency: string) { + const numModes: NumMode[] = ['currency', 'percent', 'scientific', 'decimal']; + const parts = new Map(); + for (const numMode of numModes) { + const formatter = Intl.NumberFormat(locale, parseNumMode(numMode, currency)); + const formatParts = formatter.formatToParts(-1234567.5678); + parts.set(numMode, formatParts); + } + + function getPart(partType: NumberFormatPartTypes, numMode: NumMode = "decimal"): string { + const part = parts.get(numMode)!.find(p => p.type === partType); + // Only time we expect `part` to be undefined is for digitGroupSeparatorCurrency + return part?.value || ''; + } + + this.currencySymbol = getPart('currency', 'currency'); + this.percentageSymbol = getPart('percentSign', 'percent'); + this.exponentSeparator = getPart('exponentSeparator', 'scientific'); + this.minusSign = getPart('minusSign'); + this.decimalSeparator = getPart('decimal'); + + // Separators for groups of digits, typically groups of 3, i.e. 'thousands separators'. + // A few locales have different separators for currency and non-currency. + // We check for both but don't check which one is used, currency or not. + this.digitGroupSeparator = getPart('group'); + this.digitGroupSeparatorCurrency = getPart('group', 'currency'); + + // A few locales format negative currency amounts ending in '-', e.g. '€ 1,00-' + this.currencyEndsInMinusSign = last(parts.get('currency'))!.type === 'minusSign'; + + // Since JS and Python allow both e and E for scientific notation, it seems fair that other + // locales should be case insensitive for this. + this._exponentSeparatorRegex = new RegExp(escapeRegExp(this.exponentSeparator), 'i'); + + // Overall the parser is quite lax about digit separators. + // We only require that the separator is followed by at least 2 digits, + // because India groups digits in pairs after the first 3. + // More careful checking is probably more complicated than is worth it. + this._digitGroupSeparatorRegex = new RegExp( + `[${escapeRegExp( + this.digitGroupSeparator + + this.digitGroupSeparatorCurrency + )}](\\d\\d)`, + 'g' + ); + + const digitsMap = this.digitsMap = getDigitsMap(locale); + if (digitsMap.size === 0) { + this._replaceDigits = (s: string) => s; + } else { + const digitsRegex = new RegExp([...digitsMap.keys()].join("|"), "g"); + this._replaceDigits = (s: string) => s.replace(digitsRegex, d => digitsMap.get(d) || d); + } + } + + /** + * Returns a number if the string looks like that number formatted by Grist using this parser's locale and currency + * (or at least close). + * Returns null otherwise. + */ + public parse(value: string): number | null { + // Remove characters before checking for parentheses on the ends of the string. + const [value2, isCurrency] = removeSymbol(value, this.currencySymbol); + const [value3, isPercent] = removeSymbol(value2, this.percentageSymbol); + + // Remove whitespace and special characters, after currency because some currencies contain spaces. + value = value3.replace(NumberParse.removeCharsRegex, ""); + + const parenthesised = value[0] === "(" && value[value.length - 1] === ")"; + if (parenthesised) { + value = value.substring(1, value.length - 1); + } + + // Must check for empty string directly because Number('') is 0 :facepalm: + // Check early so we can return early for performance. + // Nothing after this should potentially produce an empty string. + if (value === '') { + return null; + } + + // Replace various symbols with the standard versions recognised by JS Number. + // Note that this also allows the 'standard' symbols ('e', '.', '-', and '0123456789') + // even if the locale doesn't use them when formatting, + // although '.' will still be removed if it's a digit separator. + + // Check for exponent separator before replacing digits + // because it can contain locale-specific digits representing '10' as in 'x10^'. + value = value.replace(this._exponentSeparatorRegex, "e"); + value = this._replaceDigits(value); + + // Must come after replacing digits because the regex uses \d + // which doesn't work for locale-specific digits. + // This simply removes the separators, $1 is a captured group of digits which we keep. + value = value.replace(this._digitGroupSeparatorRegex, "$1"); + + // Must come after the digit separator replacement + // because the digit separator might be '.' + value = value.replace(this.decimalSeparator, '.'); + + // .replace with a string only replaces once, + // and a number can contain two minus signs when using scientific notation + value = value.replace(this.minusSign, "-"); + value = value.replace(this.minusSign, "-"); + + // Move '-' from the end to the beginning when appropriate (which is rare) + if (isCurrency && this.currencyEndsInMinusSign && value.endsWith("-")) { + value = "-" + value.substring(0, value.length - 1); + } + + // Number is more strict than parseFloat which allows extra trailing characters. + let result = Number(value); + if (isNaN(result)) { + return null; + } + + // Parentheses represent a negative number, e.g. (123) -> -123 + // (-123) is treated as an error + if (parenthesised) { + if (result <= 0) { + return null; + } + result = -result; + } + + if (isPercent) { + result *= 0.01; + } + + return result; + } +} + +/** + * Returns a tuple [removed, wasPresent] + * - `removed` is the given string `value` with `symbol` removed at most once. + * - `wasPresent` is `true` if `symbol` was present in `value` and was thus removed. + */ +function removeSymbol(value: string, symbol: string): [string, boolean] { + const removed = value.replace(symbol, ""); + const wasPresent = removed.length < value.length; + return [removed, wasPresent]; +}