/**
 * Counterpart of NumberFormat.ts.
 * Generic functionality for parsing numbers formatted by Intl.NumberFormat,
 * not tied to documents or anything.
 */

import {DocumentSettings} from 'app/common/DocumentSettings';
import {getDistinctValues} from 'app/common/gutil';
import {getCurrency, NumberFormatOptions, NumMode, parseNumMode} from 'app/common/NumberFormat';
import escapeRegExp = require('lodash/escapeRegExp');
import last = require('lodash/last');

// Possible values of Intl.NumberFormat.formatToParts[i].type
// Seems Intl.NumberFormatPartTypes is not quite complete
type NumberFormatPartTypes = Intl.NumberFormatPartTypes | 'exponentSeparator';

/**
 * Returns a map converting the decimal digits used in the given formatter
 * to the digits 0123456789.
 * Excludes digits which don't need conversion, so for many locales this is empty.
 */
function getDigitsMap(locale: string) {
  const formatter = Intl.NumberFormat(locale);
  const result = new Map<string, string>();
  for (let i = 0; i < 10; i++) {
    const digit = String(i);
    const localeDigit = formatter.format(i);
    if (localeDigit !== digit) {
      result.set(localeDigit, digit);
    }
  }
  return result;
}

interface ParsedOptions {
  isPercent: boolean;
  isCurrency: boolean;
  isParenthesised: boolean;
  hasDigitGroupSeparator: boolean;
  isScientific: boolean;
}

export default class NumberParse {
  // Regex for whitespace and some control characters we need to remove
  // 200e = Left-to-right mark
  // 200f = Right-to-left mark
  // 061c = Arabic letter mark
  public static readonly removeCharsRegex = /[\s\u200e\u200f\u061c]/g;

  public static fromSettings(docSettings: DocumentSettings, options: NumberFormatOptions = {}) {
    return new NumberParse(docSettings.locale, getCurrency(options, docSettings));
  }

  // Many attributes are public for easy testing.
  public readonly currencySymbol: string;
  public readonly percentageSymbol: string;
  public readonly digitGroupSeparator: string;
  public readonly digitGroupSeparatorCurrency: string;
  public readonly exponentSeparator: string;
  public readonly decimalSeparator: string;
  public readonly minusSign: string;
  public readonly defaultNumDecimalsCurrency: number;

  public readonly digitsMap: Map<string, string>;

  public readonly currencyEndsInMinusSign: boolean;

  private readonly _exponentSeparatorRegex: RegExp;
  private readonly _digitGroupSeparatorRegex: RegExp;

  // Function which replaces keys of digitsMap (i.e. locale-specific digits)
  // with corresponding digits from 0123456789.
  private readonly _replaceDigits: (s: string) => string;

  constructor(locale: string, currency: string) {
    const parts = new Map<NumMode, Intl.NumberFormatPart[]>();
    for (const numMode of NumMode.values) {
      const formatter = Intl.NumberFormat(locale, parseNumMode(numMode, currency));
      const formatParts = formatter.formatToParts(-1234567.5678);
      parts.set(numMode, formatParts);
    }

    function getPart(partType: NumberFormatPartTypes, numMode: NumMode = "decimal"): string {
      const part = parts.get(numMode)!.find(p => p.type === partType);
      // Only time we expect `part` to be undefined is for digitGroupSeparatorCurrency
      return part?.value || '';
    }

    this.currencySymbol = getPart('currency', 'currency');
    this.percentageSymbol = getPart('percentSign', 'percent');
    this.exponentSeparator = getPart('exponentSeparator', 'scientific');
    this.minusSign = getPart('minusSign');
    this.decimalSeparator = getPart('decimal');

    // Separators for groups of digits, typically groups of 3, i.e. 'thousands separators'.
    // A few locales have different separators for currency and non-currency.
    // We check for both but don't check which one is used, currency or not.
    this.digitGroupSeparator = getPart('group');
    this.digitGroupSeparatorCurrency = getPart('group', 'currency');

    // A few locales format negative currency amounts ending in '-', e.g. '€ 1,00-'
    this.currencyEndsInMinusSign = last(parts.get('currency'))!.type === 'minusSign';

    // Default number of fractional digits for currency,
    // e.g. this is 2 for USD because 1 is formatted as $1.00
    this.defaultNumDecimalsCurrency = getPart("fraction", "currency")?.length || 0;

    // Since JS and Python allow both e and E for scientific notation, it seems fair that other
    // locales should be case insensitive for this.
    this._exponentSeparatorRegex = new RegExp(escapeRegExp(this.exponentSeparator), 'i');

    // Overall the parser is quite lax about digit separators.
    // We only require that the separator is followed by at least 2 digits,
    // because India groups digits in pairs after the first 3.
    // More careful checking is probably more complicated than is worth it.
    this._digitGroupSeparatorRegex = new RegExp(
      `[${escapeRegExp(
        this.digitGroupSeparator +
        this.digitGroupSeparatorCurrency
      )}](\\d\\d)`,
      'g'
    );

    const digitsMap = this.digitsMap = getDigitsMap(locale);
    if (digitsMap.size === 0) {
      this._replaceDigits = (s: string) => s;
    } else {
      const digitsRegex = new RegExp([...digitsMap.keys()].join("|"), "g");
      this._replaceDigits = (s: string) => s.replace(digitsRegex, d => digitsMap.get(d) || d);
    }
  }

  /**
   * If the string looks like a number formatted by Grist using this parser's locale and currency (or at least close)
   * then returns an object where:
   *   - `result` is that number, the only thing most callers need
   *   - `cleaned` is a string derived from `value` which can be parsed directly by Number, although `result`
   *      is still processed a bit further than that, e.g. dividing by 100 for percentages.
   *   - `options` describes how the number was apparently formatted.
   *
   * Returns null otherwise.
   */
  public parse(value: string): { result: number, cleaned: string, options: ParsedOptions } | null {
    // Remove characters before checking for parentheses on the ends of the string.
    const [value2, isCurrency] = removeSymbol(value, this.currencySymbol);
    const [value3, isPercent] = removeSymbol(value2, this.percentageSymbol);

    // Remove whitespace and special characters, after currency because some currencies contain spaces.
    value = value3.replace(NumberParse.removeCharsRegex, "");

    const isParenthesised = value[0] === "(" && value[value.length - 1] === ")";
    if (isParenthesised) {
      value = value.substring(1, value.length - 1);
    }

    // Must check for empty string directly because Number('') is 0 :facepalm:
    // Check early so we can return early for performance.
    // Nothing after this should potentially produce an empty string.
    if (value === '') {
      return null;
    }

    // Replace various symbols with the standard versions recognised by JS Number.
    // Note that this also allows the 'standard' symbols ('e', '.', '-', and '0123456789')
    // even if the locale doesn't use them when formatting,
    // although '.' will still be removed if it's a digit separator.

    // Check for exponent separator before replacing digits
    // because it can contain locale-specific digits representing '10' as in 'x10^'.
    const withExponent = value;
    value = value.replace(this._exponentSeparatorRegex, "e");
    const isScientific = withExponent !== value;

    value = this._replaceDigits(value);

    // Must come after replacing digits because the regex uses \d
    // which doesn't work for locale-specific digits.
    // This simply removes the separators, $1 is a captured group of digits which we keep.
    const withSeparators = value;
    value = value.replace(this._digitGroupSeparatorRegex, "$1");
    const hasDigitGroupSeparator = withSeparators !== value;

    // Must come after the digit separator replacement
    // because the digit separator might be '.'
    value = value.replace(this.decimalSeparator, '.');

    // .replace with a string only replaces once,
    // and a number can contain two minus signs when using scientific notation
    value = value.replace(this.minusSign, "-");
    value = value.replace(this.minusSign, "-");

    // Move '-' from the end to the beginning when appropriate (which is rare)
    if (isCurrency && this.currencyEndsInMinusSign && value.endsWith("-")) {
      value = "-" + value.substring(0, value.length - 1);
    }

    // Number is more strict than parseFloat which allows extra trailing characters.
    let result = Number(value);
    if (isNaN(result)) {
      return null;
    }

    // Parentheses represent a negative number, e.g. (123) -> -123
    // (-123) is treated as an error
    if (isParenthesised) {
      if (result <= 0) {
        return null;
      }
      result = -result;
    }

    if (isPercent) {
      result *= 0.01;
    }

    return {
      result,
      cleaned: value,
      options: {isCurrency, isPercent, isParenthesised, hasDigitGroupSeparator, isScientific}
    };
  }

  public guessOptions(values: Array<string | null>): NumberFormatOptions {
    // null: undecided
    // true: negative numbers should be parenthesised
    // false: they should not
    let parens: boolean | null = null;

    // If any of the numbers have thousands separators, that's enough to guess that option
    let anyHasDigitGroupSeparator = false;

    // Minimum number of decimal places, guessed by looking for trailing 0s after the decimal point
    let decimals = 0;
    const decimalsRegex = /\.\d+/;
    // Maximum number of decimal places. We never actually guess a value for this option,
    // but for currencies we need to check if there are fewer decimal places than the default.
    let maxDecimals = 0;

    // Keep track of the number of modes seen to pick the most common
    const modes = {} as Record<NumMode, number>;
    for (const mode of NumMode.values) {
      modes[mode] = 0;
    }

    for (const value of getDistinctValues(values)) {
      if (!value) {
        continue;
      }
      const parsed = this.parse(value);
      if (!parsed) {
        continue;
      }
      const {
        result,
        cleaned,
        options: {isCurrency, isPercent, isParenthesised, hasDigitGroupSeparator, isScientific}
      } = parsed;

      if (result < 0 && !isParenthesised) {
        // If we see a negative number not surrounded by parens, assume that any other parens mean something else
        parens = false;
      } else if (parens === null && isParenthesised) {
        // If we're still unsure about parens (i.e. the above case hasn't been encountered)
        // then one parenthesised number is enough to guess that the parens option should be used.
        parens = true;
      }

      // If any of the numbers have thousands separators, that's enough to guess that option
      anyHasDigitGroupSeparator = anyHasDigitGroupSeparator || hasDigitGroupSeparator;

      let mode: NumMode = "decimal";
      if (isCurrency) {
        mode = "currency";
      } else if (isPercent) {
        mode = "percent";
      } else if (isScientific) {
        mode = "scientific";
      }
      modes[mode] += 1;

      const decimalsMatch = decimalsRegex.exec(cleaned);
      if (decimalsMatch) {
        // Number of digits after the '.' (which is part of the match, hence the -1)
        const numDecimals = decimalsMatch[0].length - 1;
        maxDecimals = Math.max(maxDecimals, numDecimals);
        if (decimalsMatch[0].endsWith("0")) {
          decimals = Math.max(decimals, numDecimals);
        }
      }
    }

    const maxCount = Math.max(...Object.values(modes));
    if (maxCount === 0) {
      // No numbers parsed at all, so don't guess any options
      return {};
    }

    const result: NumberFormatOptions = {};

    // Find the most common mode.
    const maxMode: NumMode = NumMode.values.find((k) => modes[k] === maxCount)!;

    // 'decimal' is the default mode above when counting,
    // but only guess it as an actual option if digit separators were used at least once.
    if (maxMode !== "decimal" || anyHasDigitGroupSeparator) {
      result.numMode = maxMode;
    }

    if (parens) {
      result.numSign = "parens";
    }

    // Specify minimum number of decimal places if we saw any trailing 0s after '.'
    // Otherwise explicitly set it to 0 if needed to suppress the default for that currency.
    if (decimals > 0 || maxMode === "currency" && maxDecimals < this.defaultNumDecimalsCurrency) {
      result.decimals = decimals;
    }

    return result;
  }
}

/**
 * Returns a tuple [removed, wasPresent]
 * - `removed` is the given string `value` with `symbol` removed at most once.
 * - `wasPresent` is `true` if `symbol` was present in `value` and was thus removed.
 */
function removeSymbol(value: string, symbol: string): [string, boolean] {
  const removed = value.replace(symbol, "");
  const wasPresent = removed.length < value.length;
  return [removed, wasPresent];
}