import escapeRegExp = require('lodash/escapeRegExp'); import last = require('lodash/last'); import memoize = require('lodash/memoize'); import {getDistinctValues, isNonNullish} from 'app/common/gutil'; // Simply importing 'moment-guess' inconsistently imports bundle.js or bundle.esm.js depending on environment import guessFormat from '@gristlabs/moment-guess/dist/bundle.js'; import moment from 'moment-timezone'; // When using YY format, use a consistent interpretation in datepicker and in moment parsing: add // 2000 if the result is at most 10 years greater than the current year; otherwise add 1900. See // https://bootstrap-datepicker.readthedocs.io/en/latest/options.html#assumenearbyyear and // "Parsing two digit years" in https://momentjs.com/docs/#/parsing/string-format/. export const TWO_DIGIT_YEAR_THRESHOLD = 10; const MAX_TWO_DIGIT_YEAR = new Date().getFullYear() + TWO_DIGIT_YEAR_THRESHOLD - 2000; // Moment suggests that overriding this is fine, but we need to force TypeScript to allow it. (moment as any).parseTwoDigitYear = function(yearString: string): number { const year = parseInt(yearString, 10); return year + (year > MAX_TWO_DIGIT_YEAR ? 1900 : 2000); }; // Order of formats to try if the date cannot be parsed as the currently set format. // Formats are parsed in momentjs strict mode, but separator matching and the MM/DD // two digit requirement are ignored. Also, partial completion is permitted, so formats // may match even if only beginning elements are provided. // TODO: These should be affected by the user's locale/settings. // TODO: We may want to consider adding default time formats as well to support more // time formats. const PARSER_FORMATS: string[] = [ 'M D YYYY', 'M D YY', 'M D', 'M', 'MMMM D YYYY', 'MMMM D', 'MMMM Do YYYY', 'MMMM Do', 'D MMMM YYYY', 'D MMMM', 'Do MMMM YYYY', 'Do MMMM', 'MMMM', 'MMM D YYYY', 'MMM D', 'MMM Do YYYY', 'MMM Do', 'D MMM YYYY', 'D MMM', 'Do MMM YYYY', 'Do MMM', 'MMM', 'YYYY M D', 'YYYY M', 'YYYY', 'D M YYYY', 'D M YY', 'D M', 'D' ]; const UNAMBIGUOUS_FORMATS = [ 'YYYY M D', ...PARSER_FORMATS.filter(f => f.includes("MMM")), ]; const TIME_REGEX = /(?:^|\s+|T)(?:(\d\d?)(?::(\d\d?)(?::(\d\d?))?)?|(\d\d?)(\d\d))\s*([ap]m?)?$/i; // [^a-zA-Z] because no letters are allowed directly before the abbreviation const UTC_REGEX = /[^a-zA-Z](UTC?|GMT|Z)$/i; const NUMERIC_TZ_REGEX = /([+-]\d\d?)(?::?(\d\d))?$/i; // Not picky about separators, so replace them in the date and format strings to be spaces. const SEPARATORS = /[\W_]+/g; const tzAbbreviations = memoize((tzName: string): RegExp => { // Some abbreviations are just e.g. +05 // and escaping the + seems better than filtering const abbreviations = new Set(moment.tz.zone(tzName)!.abbrs.map(escapeRegExp)); const union = [...abbreviations].join('|'); // [^a-zA-Z] because no letters are allowed directly before the abbreviation // so for example CEST won't match even if EST does return new RegExp(`[^a-zA-Z](${union})$`, 'i'); }); interface ParseOptions { time?: string; dateFormat?: string; timeFormat?: string; timezone?: string; } /** * parseDate - Attempts to parse a date string using several common formats. Returns the * timestamp of the parsed date in seconds since epoch, or returns null on failure. * @param {String} date - The date string to parse. * @param {String} options.dateFormat - The preferred momentjs format to use to parse the * date. This is attempted before the default formats. * @param {String} options.time - The time string to parse. * @param {String} options.timeFormat - The momentjs format to use to parse the time. This * must be given if options.time is given. * @param {String} options.timezone - The timezone string for the date/time, which affects * the resulting timestamp. */ export function parseDate(date: string, options: ParseOptions = {}): number | null { // If no date, return null. if (!date) { return null; } // If this looks like a timestamp (string with 9 or more digits), just return it. const timestamp = parseTimeStamp(date); if (timestamp !== null) { return timestamp; } const dateFormat = options.dateFormat || "YYYY-MM-DD"; const dateFormats = [..._buildVariations(dateFormat, date), ...PARSER_FORMATS]; const cleanDate = date.replace(SEPARATORS, ' '); let datetime = cleanDate.trim(); let timeformat = ''; let time = options.time; if (time) { const parsedTimeZone = parseTimeZone(time, options.timezone!); const parsedTime = standardizeTime(parsedTimeZone.remaining); if (!parsedTime || parsedTime.remaining) { return null; } time = parsedTime.time; const {tzOffset} = parsedTimeZone; datetime += ' ' + time + tzOffset; timeformat = ' HH:mm:ss' + (tzOffset ? 'Z' : ''); } for (const format of dateFormats) { const fullFormat = format + timeformat; const m = moment.tz(datetime, fullFormat, true, options.timezone || 'UTC'); if (m.isValid()) { return m.unix(); } } return null; } /** * Similar to parseDate, with these differences: * - Only for a date (no time part) * - Only falls back to UNAMBIGUOUS_FORMATS, not the full PARSER_FORMATS * - Optionally adds all dates which match some format to `results`, otherwise returns first match. * This is safer so it can be used for parsing when pasting a large number of dates * and won't silently swap around day and month. */ export function parseDateStrict( date: string, dateFormat: string | null, results?: Set<number>, timezone: string = 'UTC' ): number | undefined { if (!date) { return; } // If this looks like a timestamp (string with 9 or more digits), just return it. const timestamp = parseTimeStamp(date); if (timestamp !== null) { return timestamp; } dateFormat = dateFormat || "YYYY-MM-DD"; const dateFormats = [..._buildVariations(dateFormat, date), ...UNAMBIGUOUS_FORMATS]; const cleanDate = date.replace(SEPARATORS, ' ').trim(); for (const format of dateFormats) { const m = moment.tz(cleanDate, format, true, timezone); if (m.isValid()) { const value = m.valueOf() / 1000; if (results) { results.add(value); } else { return value; } } } } export function parseDateTime(dateTime: string, options: ParseOptions): number | undefined { dateTime = dateTime.trim(); if (!dateTime) { return; } const dateFormat = options.dateFormat || "YYYY-MM-DD"; const timezone = options.timezone || "UTC"; const dateOnly = parseDateStrict(dateTime, dateFormat, undefined, timezone); if (dateOnly) { return dateOnly; } const parsedTimeZone = parseTimeZone(dateTime, timezone); let tzOffset = ''; if (parsedTimeZone) { tzOffset = parsedTimeZone.tzOffset; dateTime = parsedTimeZone.remaining; } const parsedTime = standardizeTime(dateTime); if (!parsedTime) { return; } dateTime = parsedTime.remaining; const date = parseDateStrict(dateTime, dateFormat); if (!date) { return; } // date is a timestamp of midnight in UTC, so to get a formatted representation (for parsing // together with time), take care to interpret it in UTC. const dateString = moment.unix(date).utc().format("YYYY-MM-DD"); dateTime = dateString + ' ' + parsedTime.time + tzOffset; const fullFormat = "YYYY-MM-DD HH:mm:ss" + (tzOffset ? 'Z' : ''); return moment.tz(dateTime, fullFormat, true, timezone).valueOf() / 1000; } // Helper function to get the partial format string based on the input. Momentjs has a feature // which allows defaulting to the current year, month and/or day if not accounted for in the // parser. We remove any parts of the parser not given in the input to take advantage of this // feature. function _getPartialFormat(input: string, format: string): string { // Define a regular expression to match contiguous non-separators. const re = /Y+|M+o?|D+o?|[a-zA-Z0-9]+/ig; // Count the number of meaningful parts in the input. const numInputParts = input.match(re)?.length || 0; // Count the number of parts in the format string. let numFormatParts = format.match(re)?.length || 0; if (numFormatParts > numInputParts) { // Remove year from format first, to default to current year. if (/Y+/.test(format)) { format = format.replace(/Y+/, ' ').trim(); numFormatParts -= 1; } if (numFormatParts > numInputParts) { // Remove month from format next. format = format.replace(/M+/, ' ').trim(); } } return format; } // Moment non-strict mode is considered bad, as it's far too lax. But moment's strict mode is too // strict. We want to allow YY|YYYY for either year specifier, as well as M for MMM or MMMM month // specifiers. It's silly that we need to create multiple format variations to support this. function _buildVariations(dateFormat: string, date: string) { // Momentjs has an undesirable feature in strict mode where MM and DD // matches require two digit numbers. Change MM, DD to M, D. let format = dateFormat.replace(/MM+/g, m => (m === 'MM' ? 'M' : m)) .replace(/DD+/g, m => (m === 'DD' ? 'D' : m)) .replace(SEPARATORS, ' ') .trim(); // Allow the input date to end with a 4-digit year even if the format doesn't mention the year if ( format.includes("M") && format.includes("D") && !format.includes("Y") ) { format += " YYYY"; } format = _getPartialFormat(date, format); // Consider some alternatives to the preferred format. const variations = new Set<string>([format]); const otherYear = format.replace(/Y{2,4}/, (m) => (m === 'YY' ? 'YYYY' : (m === 'YYYY' ? 'YY' : m))); variations.add(otherYear); variations.add(format.replace(/MMM+/, 'M')); if (otherYear !== format) { variations.add(otherYear.replace(/MMM+/, 'M')); } return variations; } // Based on private calculateOffset in moment source code. function calculateOffset(tzMatch: string[]): string { const [, hhOffset, mmOffset] = tzMatch; const sign = hhOffset.slice(0, 1); return sign + hhOffset.slice(1).padStart(2, '0') + ':' + (mmOffset || '0').padStart(2, '0'); } function parseTimeZone(str: string, timezone: string): { remaining: string, tzOffset: string } { str = str.trim(); let tzMatch = UTC_REGEX.exec(str); let matchStart = 0; let tzOffset = ''; if (tzMatch) { tzOffset = '+00:00'; matchStart = tzMatch.index + 1; // skip [^a-zA-Z] at regex start } else { tzMatch = NUMERIC_TZ_REGEX.exec(str); if (tzMatch) { tzOffset = calculateOffset(tzMatch); matchStart = tzMatch.index; } else if (timezone) { // Abbreviations are simply stripped and ignored, so tzOffset is not set in this case tzMatch = tzAbbreviations(timezone).exec(str); if (tzMatch) { matchStart = tzMatch.index + 1; // skip [^a-zA-Z] at regex start } } } if (tzMatch) { str = str.slice(0, matchStart).trim(); } return {remaining: str, tzOffset}; } // Parses time of the form, roughly, HH[:MM[:SS]][am|pm]. Returns the time in the // standardized HH:mm:ss format. // This turns out easier than coaxing moment to parse time sensibly and flexibly. function standardizeTime(timeString: string): { remaining: string, time: string } | undefined { const match = TIME_REGEX.exec(timeString); if (!match) { return; } let hours = parseInt(match[1] || match[4], 10); const mm = (match[2] || match[5] || '0').padStart(2, '0'); const ss = (match[3] || '0').padStart(2, '0'); const ampm = (match[6] || '').toLowerCase(); if (hours < 12 && hours > 0 && ampm.startsWith('p')) { hours += 12; } else if (hours === 12 && ampm.startsWith('a')) { hours = 0; } const hh = String(hours).padStart(2, '0'); return {remaining: timeString.slice(0, match.index).trim(), time: `${hh}:${mm}:${ss}`}; } /** * Guesses a full date[time] format that best matches the given strings. * If several formats match equally well, picks the last one lexicographically to match the old date guessing. * This means formats with an early Y and/or M are favoured. * If no formats match, returns the default YYYY-MM-DD. */ export function guessDateFormat(values: Array<string | null>, timezone: string = 'UTC'): string { const formats = guessDateFormats(values, timezone); if (!formats) { return "YYYY-MM-DD"; } return last(formats)!; } /** * Returns all full date[time] formats that best match the given strings. * If several formats match equally well, returns them all. * May return null if there are no matching formats or choosing one is too expensive. */ export function guessDateFormats(values: Array<string | null>, timezone: string = 'UTC'): string[] | null { const dateStrings: string[] = values.filter(isNonNullish); const sample = getDistinctValues(dateStrings, 100); const formats: Record<string, number> = {}; for (const dateString of sample) { let guessed: string | string[]; try { guessed = guessFormat(dateString); } catch { continue; } if (typeof guessed === "string") { guessed = [guessed]; } for (const guess of guessed) { formats[guess] = 0; } } const formatKeys = Object.keys(formats); if (!formatKeys.length || formatKeys.length > 10) { return null; } for (const format of formatKeys) { for (const dateString of dateStrings) { const m = moment.tz(dateString, format, true, timezone); if (m.isValid()) { formats[format] += 1; } } } const maxCount = Math.max(...Object.values(formats)); // Return all formats that tied for first place. // Sort lexicographically for consistency in tests and with the old dateguess.py. return formatKeys.filter(format => formats[format] === maxCount).sort(); } export const dateFormatOptions = [ 'YYYY-MM-DD', 'MM-DD-YYYY', 'MM/DD/YYYY', 'MM-DD-YY', 'MM/DD/YY', 'DD MMM YYYY', 'MMMM Do, YYYY', 'DD-MM-YYYY', ]; export const timeFormatOptions = [ 'h:mma', 'h:mma z', 'HH:mm', 'HH:mm z', 'HH:mm:ss', 'HH:mm:ss z', ]; /** * Construct widget options for a Date or DateTime column based on a single moment string * which may or may not contain both date and time parts. * If defaultTimeFormat is true, fallback to a non-empty default time format when none is found in fullFormat. */ export function dateTimeWidgetOptions(fullFormat: string, defaultTimeFormat: boolean) { const index = fullFormat.match(/[hHkaAmsSzZT]|$/)!.index!; const dateFormat = fullFormat.substr(0, index).trim(); const timeFormat = fullFormat.substr(index).trim() || (defaultTimeFormat ? timeFormatOptions[0] : ""); return { dateFormat, timeFormat, isCustomDateFormat: !dateFormatOptions.includes(dateFormat), isCustomTimeFormat: !timeFormatOptions.includes(timeFormat), }; } /** * Attempts to parse a timestamp string. Returns the timestamp in seconds * since epoch, or returns null on failure. Accepts only strings with 9 to 11 digits. * Lowest 11 digit timestamp is 2286-11-20, so we don't consider them valid. */ export function parseTimeStamp(date: string): number | null { // If this looks like a timestamp (number with 9 or more digits), just return it. // This covers most of the cases leaving some time around the unix epoch not covered. // So time before 100 000 000 (1974-04-26) is not covered. Also negative values // are also not supported, as they overlap with the YYYYYY date format. if (date && /^[1-9]\d{8,9}$/.test(date)) { const parsedDate = moment(date, 'X'); if (parsedDate.isValid()) { return parsedDate.unix(); } } return null; }