diff --git a/app/client/components/TypeConversion.ts b/app/client/components/TypeConversion.ts index 904d4fc9..b025d91c 100644 --- a/app/client/components/TypeConversion.ts +++ b/app/client/components/TypeConversion.ts @@ -4,11 +4,13 @@ */ // tslint:disable:no-console +import {isString} from 'app/client/lib/sessionObs'; import {DocModel} from 'app/client/models/DocModel'; import {ColumnRec} from 'app/client/models/entities/ColumnRec'; import * as gristTypes from 'app/common/gristTypes'; import {isFullReferencingType} from 'app/common/gristTypes'; import * as gutil from 'app/common/gutil'; +import NumberParse from 'app/common/NumberParse'; import {dateTimeWidgetOptions, guessDateFormat} from 'app/common/parseDate'; import {TableData} from 'app/common/TableData'; import {decodeObject} from 'app/plugin/objtypes'; @@ -108,6 +110,17 @@ export async function prepTransformColInfo(docModel: DocModel, origCol: ColumnRe widgetOptions = dateTimeWidgetOptions(dateFormat, true); break; } + case 'Numeric': + case 'Int': { + if (["Numeric", "Int"].includes(sourceCol.type())) { + widgetOptions = prevOptions; + } else { + const numberParse = NumberParse.fromSettings(docModel.docData.docSettings()); + const colValues = tableData.getColValues(sourceCol.colId()) || []; + widgetOptions = numberParse.guessOptions(colValues.filter(isString)); + } + break; + } case 'Choice': { if (Array.isArray(prevOptions.choices)) { // Use previous choices if they are set, e.g. if converting from ChoiceList diff --git a/app/common/NumberFormat.ts b/app/common/NumberFormat.ts index 99735ea3..ccfc8922 100644 --- a/app/common/NumberFormat.ts +++ b/app/common/NumberFormat.ts @@ -19,12 +19,14 @@ */ import {clamp} from 'app/common/gutil'; +import {StringUnion} from 'app/common/StringUnion'; import * as LocaleCurrency from "locale-currency"; import {FormatOptions} from 'app/common/ValueFormatter'; import {DocumentSettings} from 'app/common/DocumentSettings'; // Options for number formatting. -export type NumMode = 'currency' | 'decimal' | 'percent' | 'scientific'; +export const NumMode = StringUnion('currency', 'decimal', 'percent', 'scientific'); +export type NumMode = typeof NumMode.type; export type NumSign = 'parens'; export interface NumberFormatOptions extends FormatOptions { diff --git a/app/common/NumberParse.ts b/app/common/NumberParse.ts index ab68da65..bda8b1be 100644 --- a/app/common/NumberParse.ts +++ b/app/common/NumberParse.ts @@ -4,7 +4,9 @@ * not tied to documents or anything. */ -import { NumMode, parseNumMode } from 'app/common/NumberFormat'; +import {DocumentSettings} from 'app/common/DocumentSettings'; +import {getDistinctValues} from 'app/common/gutil'; +import {getCurrency, NumberFormatOptions, NumMode, parseNumMode} from 'app/common/NumberFormat'; import escapeRegExp = require('lodash/escapeRegExp'); import last = require('lodash/last'); @@ -30,6 +32,14 @@ function getDigitsMap(locale: string) { return result; } +interface ParsedOptions { + isPercent: boolean; + isCurrency: boolean; + isParenthesised: boolean; + hasDigitGroupSeparator: boolean; + isScientific: boolean; +} + export default class NumberParse { // Regex for whitespace and some control characters we need to remove // 200e = Left-to-right mark @@ -37,6 +47,10 @@ export default class NumberParse { // 061c = Arabic letter mark public static readonly removeCharsRegex = /[\s\u200e\u200f\u061c]/g; + public static fromSettings(docSettings: DocumentSettings, options: NumberFormatOptions = {}) { + return new NumberParse(docSettings.locale, getCurrency(options, docSettings)); + } + // Many attributes are public for easy testing. public readonly currencySymbol: string; public readonly percentageSymbol: string; @@ -45,6 +59,7 @@ export default class NumberParse { public readonly exponentSeparator: string; public readonly decimalSeparator: string; public readonly minusSign: string; + public readonly defaultNumDecimalsCurrency: number; public readonly digitsMap: Map; @@ -58,9 +73,8 @@ export default class NumberParse { private readonly _replaceDigits: (s: string) => string; constructor(locale: string, currency: string) { - const numModes: NumMode[] = ['currency', 'percent', 'scientific', 'decimal']; const parts = new Map(); - for (const numMode of numModes) { + for (const numMode of NumMode.values) { const formatter = Intl.NumberFormat(locale, parseNumMode(numMode, currency)); const formatParts = formatter.formatToParts(-1234567.5678); parts.set(numMode, formatParts); @@ -87,6 +101,10 @@ export default class NumberParse { // A few locales format negative currency amounts ending in '-', e.g. '€ 1,00-' this.currencyEndsInMinusSign = last(parts.get('currency'))!.type === 'minusSign'; + // Default number of fractional digits for currency, + // e.g. this is 2 for USD because 1 is formatted as $1.00 + this.defaultNumDecimalsCurrency = getPart("fraction", "currency")?.length || 0; + // Since JS and Python allow both e and E for scientific notation, it seems fair that other // locales should be case insensitive for this. this._exponentSeparatorRegex = new RegExp(escapeRegExp(this.exponentSeparator), 'i'); @@ -113,11 +131,16 @@ export default class NumberParse { } /** - * Returns a number if the string looks like that number formatted by Grist using this parser's locale and currency - * (or at least close). + * If the string looks like a number formatted by Grist using this parser's locale and currency (or at least close) + * then returns an object where: + * - `result` is that number, the only thing most callers need + * - `cleaned` is a string derived from `value` which can be parsed directly by Number, although `result` + * is still processed a bit further than that, e.g. dividing by 100 for percentages. + * - `options` describes how the number was apparently formatted. + * * Returns null otherwise. */ - public parse(value: string): number | null { + public parse(value: string): { result: number, cleaned: string, options: ParsedOptions } | null { // Remove characters before checking for parentheses on the ends of the string. const [value2, isCurrency] = removeSymbol(value, this.currencySymbol); const [value3, isPercent] = removeSymbol(value2, this.percentageSymbol); @@ -125,8 +148,8 @@ export default class NumberParse { // Remove whitespace and special characters, after currency because some currencies contain spaces. value = value3.replace(NumberParse.removeCharsRegex, ""); - const parenthesised = value[0] === "(" && value[value.length - 1] === ")"; - if (parenthesised) { + const isParenthesised = value[0] === "(" && value[value.length - 1] === ")"; + if (isParenthesised) { value = value.substring(1, value.length - 1); } @@ -144,13 +167,18 @@ export default class NumberParse { // Check for exponent separator before replacing digits // because it can contain locale-specific digits representing '10' as in 'x10^'. + const withExponent = value; value = value.replace(this._exponentSeparatorRegex, "e"); + const isScientific = withExponent !== value; + value = this._replaceDigits(value); // Must come after replacing digits because the regex uses \d // which doesn't work for locale-specific digits. // This simply removes the separators, $1 is a captured group of digits which we keep. + const withSeparators = value; value = value.replace(this._digitGroupSeparatorRegex, "$1"); + const hasDigitGroupSeparator = withSeparators !== value; // Must come after the digit separator replacement // because the digit separator might be '.' @@ -174,7 +202,7 @@ export default class NumberParse { // Parentheses represent a negative number, e.g. (123) -> -123 // (-123) is treated as an error - if (parenthesised) { + if (isParenthesised) { if (result <= 0) { return null; } @@ -185,6 +213,109 @@ export default class NumberParse { result *= 0.01; } + return { + result, + cleaned: value, + options: {isCurrency, isPercent, isParenthesised, hasDigitGroupSeparator, isScientific} + }; + } + + public guessOptions(values: Array): NumberFormatOptions { + // null: undecided + // true: negative numbers should be parenthesised + // false: they should not + let parens: boolean | null = null; + + // If any of the numbers have thousands separators, that's enough to guess that option + let anyHasDigitGroupSeparator = false; + + // Minimum number of decimal places, guessed by looking for trailing 0s after the decimal point + let decimals = 0; + const decimalsRegex = /\.\d+/; + // Maximum number of decimal places. We never actually guess a value for this option, + // but for currencies we need to check if there are fewer decimal places than the default. + let maxDecimals = 0; + + // Keep track of the number of modes seen to pick the most common + const modes = {} as Record; + for (const mode of NumMode.values) { + modes[mode] = 0; + } + + for (const value of getDistinctValues(values)) { + if (!value) { + continue; + } + const parsed = this.parse(value); + if (!parsed) { + continue; + } + const { + result, + cleaned, + options: {isCurrency, isPercent, isParenthesised, hasDigitGroupSeparator, isScientific} + } = parsed; + + if (result < 0 && !isParenthesised) { + // If we see a negative number not surrounded by parens, assume that any other parens mean something else + parens = false; + } else if (parens === null && isParenthesised) { + // If we're still unsure about parens (i.e. the above case hasn't been encountered) + // then one parenthesised number is enough to guess that the parens option should be used. + parens = true; + } + + // If any of the numbers have thousands separators, that's enough to guess that option + anyHasDigitGroupSeparator = anyHasDigitGroupSeparator || hasDigitGroupSeparator; + + let mode: NumMode = "decimal"; + if (isCurrency) { + mode = "currency"; + } else if (isPercent) { + mode = "percent"; + } else if (isScientific) { + mode = "scientific"; + } + modes[mode] += 1; + + const decimalsMatch = decimalsRegex.exec(cleaned); + if (decimalsMatch) { + // Number of digits after the '.' (which is part of the match, hence the -1) + const numDecimals = decimalsMatch[0].length - 1; + maxDecimals = Math.max(maxDecimals, numDecimals); + if (decimalsMatch[0].endsWith("0")) { + decimals = Math.max(decimals, numDecimals); + } + } + } + + const maxCount = Math.max(...Object.values(modes)); + if (maxCount === 0) { + // No numbers parsed at all, so don't guess any options + return {}; + } + + const result: NumberFormatOptions = {}; + + // Find the most common mode. + const maxMode: NumMode = NumMode.values.find((k) => modes[k] === maxCount)!; + + // 'decimal' is the default mode above when counting, + // but only guess it as an actual option if digit separators were used at least once. + if (maxMode !== "decimal" || anyHasDigitGroupSeparator) { + result.numMode = maxMode; + } + + if (parens) { + result.numSign = "parens"; + } + + // Specify minimum number of decimal places if we saw any trailing 0s after '.' + // Otherwise explicitly set it to 0 if needed to suppress the default for that currency. + if (decimals > 0 || maxMode === "currency" && maxDecimals < this.defaultNumDecimalsCurrency) { + result.decimals = decimals; + } + return result; } } diff --git a/app/common/ValueGuesser.ts b/app/common/ValueGuesser.ts index 6820c140..a3b16b1d 100644 --- a/app/common/ValueGuesser.ts +++ b/app/common/ValueGuesser.ts @@ -1,7 +1,11 @@ import {DocData} from 'app/common/DocData'; import {DocumentSettings} from 'app/common/DocumentSettings'; +import {countIf} from 'app/common/gutil'; +import {NumberFormatOptions} from 'app/common/NumberFormat'; +import NumberParse from 'app/common/NumberParse'; import {dateTimeWidgetOptions, guessDateFormat} from 'app/common/parseDate'; import {createFormatter} from 'app/common/ValueFormatter'; +import {createParserRaw, ValueParser} from 'app/common/ValueParser'; import * as moment from 'moment-timezone'; interface GuessedColInfo { @@ -9,7 +13,7 @@ interface GuessedColInfo { widgetOptions?: object; } -interface GuessResult { +export interface GuessResult { values?: any[]; colInfo: GuessedColInfo; } @@ -39,7 +43,8 @@ abstract class ValueGuesser { const {type, widgetOptions} = colInfo; const formatter = createFormatter(type, widgetOptions || {}, docSettings); const result: any[] = []; - const maxUnparsed = values.length * 0.1; // max number of non-parsed strings to allow before giving up + // max number of non-parsed strings to allow before giving up + const maxUnparsed = countIf(values, v => Boolean(v)) * 0.1; let unparsed = 0; for (const value of values) { @@ -94,14 +99,22 @@ class BoolGuesser extends ValueGuesser { } class NumericGuesser extends ValueGuesser { + private _parser: ValueParser; + constructor(docSettings: DocumentSettings, private _options: NumberFormatOptions) { + super(); + this._parser = createParserRaw('Numeric', _options, docSettings); + } + public colInfo(): GuessedColInfo { - // TODO parse and guess options for formatted numbers, e.g. currency amounts - return {type: 'Numeric'}; + const result: GuessedColInfo = {type: 'Numeric'}; + if (Object.keys(this._options).length) { + result.widgetOptions = this._options; + } + return result; } public parse(value: string): number | string { - const parsed = Number(value); - return !isNaN(parsed) ? parsed : value; + return this._parser.cleanParse(value); } } @@ -144,7 +157,10 @@ export function guessColInfo( return ( new BoolGuesser() .guess(values, docSettings) || - new NumericGuesser() + new NumericGuesser( + docSettings, + NumberParse.fromSettings(docSettings).guessOptions(values) + ) .guess(values, docSettings) || new DateGuesser(guessDateFormat(values, timezone) || "YYYY-MM-DD", timezone) .guess(values, docSettings) || diff --git a/app/common/ValueParser.ts b/app/common/ValueParser.ts index 9b0e4afb..519b1fd0 100644 --- a/app/common/ValueParser.ts +++ b/app/common/ValueParser.ts @@ -6,7 +6,7 @@ import * as gristTypes from 'app/common/gristTypes'; import {getReferencedTableId, isFullReferencingType} from 'app/common/gristTypes'; import * as gutil from 'app/common/gutil'; import {safeJsonParse} from 'app/common/gutil'; -import {getCurrency, NumberFormatOptions} from 'app/common/NumberFormat'; +import {NumberFormatOptions} from 'app/common/NumberFormat'; import NumberParse from 'app/common/NumberParse'; import {parseDateStrict, parseDateTime} from 'app/common/parseDate'; import {MetaRowRecord, TableData} from 'app/common/TableData'; @@ -53,11 +53,11 @@ export class NumericParser extends ValueParser { constructor(type: string, options: NumberFormatOptions, docSettings: DocumentSettings) { super(type, options, docSettings); - this._parse = new NumberParse(docSettings.locale, getCurrency(options, docSettings)); + this._parse = NumberParse.fromSettings(docSettings, options); } public parse(value: string): number | null { - return this._parse.parse(value); + return this._parse.parse(value)?.result ?? null; } }