gristlabs_grist-core/app/client/lib/ACIndex.ts
Dmitry S 8d68c1c567 (core) Replace time zone selector with one based on the newer autocomplete.
Summary:
Flaky Dates test failures related to the use of JQuery autocomplete for time
zones, which wasn't working well.

This diff replaces that autocomplete (as well as a similar select box in
DocumentSettings) with our newer autocomplete, adding some select-box like
behavior.

Most of the behavior is factored out into ACSelect, which could be more
generally useful.

Adds an option to autocomplete to keep options ordered according to their
initial order.

Unrelated: fix up usage of MultiHolder in Drafts to avoid 'already disposed'
warnings.

Test Plan: Fixed several affected tests.

Reviewers: jarek

Reviewed By: jarek

Differential Revision: https://phab.getgrist.com/D2919
2021-07-23 08:02:05 -04:00

269 lines
11 KiB
TypeScript

/**
* A search index for auto-complete suggestions.
*
* This implementation indexes words, and suggests items based on a best-match score, including
* amount of overlap and position of words. It searches case-insensitively and only at the start
* of words. E.g. searching for "Blue" would match "Blu" in "Lavender Blush", but searching for
* "lush" would only match the "L" in "Lavender".
*/
import {localeCompare, nativeCompare, sortedIndex} from 'app/common/gutil';
import {DomContents} from 'grainjs';
import escapeRegExp = require("lodash/escapeRegExp");
export interface ACItem {
// This should be a trimmed lowercase version of the item's text. It may be an accessor.
// Note that items with empty cleanText are never suggested.
cleanText: string;
}
// Regexp used to split text into words; includes nearly all punctuation. This means that
// "foo-bar" may be searched by "bar", but it's impossible to search for punctuation itself (e.g.
// "a-b" and "a+b" are not distinguished). (It's easy to exclude unicode punctuation too if the
// need arises, see https://stackoverflow.com/a/25575009/328565).
const wordSepRegexp = /[\s!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]+/;
/**
* An auto-complete index, which simply allows searching for a string.
*/
export interface ACIndex<Item extends ACItem> {
search(searchText: string): ACResults<Item>;
}
// Splits text into an array of pieces, with odd-indexed pieces being the ones to highlight.
export type HighlightFunc = (text: string) => string[];
export const highlightNone: HighlightFunc = (text) => [text];
/**
* AutoComplete results include the suggested items, which one to highlight, and a function for
* highlighting the matched portion of each item.
*/
export interface ACResults<Item extends ACItem> {
// Matching items in order from best match to worst.
items: Item[];
// May be used to highlight matches using buildHighlightedDom().
highlightFunc: HighlightFunc;
// index of a good match (normally 0), or -1 if no great match
selectIndex: number;
}
interface Word {
word: string; // The indexed word
index: number; // Index into _allItems for the item containing this word.
pos: number; // Position of the word within the item where it occurred.
}
/**
* Implements a search index. It doesn't currently support updates; when any values change, the
* index needs to be rebuilt from scratch.
*/
export class ACIndexImpl<Item extends ACItem> implements ACIndex<Item> {
private _allItems: Item[];
// All words from _allItems, sorted.
private _words: Word[];
// Creates an index for the given list of items.
// The max number of items to suggest may be set using _maxResults (default is 50).
// If _keepOrder is true, best matches will be suggested in the order they occur in items,
// rather than order by best score.
constructor(items: Item[], private _maxResults: number = 50, private _keepOrder = false) {
this._allItems = items.slice(0);
// Collects [word, occurrence, position] tuples for all words in _allItems.
const allWords: Word[] = [];
for (let index = 0; index < this._allItems.length; index++) {
const item = this._allItems[index];
const words = item.cleanText.split(wordSepRegexp).filter(w => w);
for (let pos = 0; pos < words.length; pos++) {
allWords.push({word: words[pos], index, pos});
}
}
allWords.sort((a, b) => localeCompare(a.word, b.word));
this._words = allWords;
}
// The main search function. SearchText will be cleaned (trimmed and lowercased) at the start.
// Empty search text returns the first N items in the search universe.
public search(searchText: string): ACResults<Item> {
const cleanedSearchText = searchText.trim().toLowerCase();
const searchWords = cleanedSearchText.split(wordSepRegexp).filter(w => w);
// Maps item index in _allItems to its score.
const myMatches = new Map<number, number>();
if (searchWords.length > 0) {
// For each of searchWords, go through items with an overlap, and update their scores.
for (let k = 0; k < searchWords.length; k++) {
const searchWord = searchWords[k];
for (const [itemIndex, score] of this._findOverlaps(searchWord, k)) {
myMatches.set(itemIndex, (myMatches.get(itemIndex) || 0) + score);
}
}
// Give an extra point to items that start with the searchText.
for (const [itemIndex, score] of myMatches) {
if (this._allItems[itemIndex].cleanText.startsWith(cleanedSearchText)) {
myMatches.set(itemIndex, score + 1);
}
}
}
// Array of pairs [itemIndex, score], sorted by score (desc) and itemIndex.
const sortedMatches = Array.from(myMatches)
.sort((a, b) => nativeCompare(b[1], a[1]) || nativeCompare(a[0], b[0]))
.slice(0, this._maxResults);
const itemIndices: number[] = sortedMatches.map(([index, score]) => index);
// Append enough non-matching indices to reach maxResults.
for (let i = 0; i < this._allItems.length && itemIndices.length < this._maxResults; i++) {
if (this._allItems[i].cleanText && !myMatches.has(i)) {
itemIndices.push(i);
}
}
if (this._keepOrder) {
itemIndices.sort(nativeCompare);
}
const items = itemIndices.map(index => this._allItems[index]);
if (!cleanedSearchText) {
// In this case we are just returning the first few items.
return {items, highlightFunc: highlightNone, selectIndex: -1};
}
const highlightFunc = highlightMatches.bind(null, searchWords);
// If we have a best match, and any word in it actually starts with the search text, report it
// as a default selection for highlighting. Otherwise, no item will be auto-selected.
let selectIndex = sortedMatches.length > 0 ? itemIndices.indexOf(sortedMatches[0][0]) : -1;
if (selectIndex >= 0 && !startsWithText(items[selectIndex], cleanedSearchText, searchWords)) {
selectIndex = -1;
}
return {items, highlightFunc, selectIndex};
}
/**
* Given one of the search words, looks it up in the indexed list of words and searches up and
* down the list for all words that share a prefix with it. Each such word contributes something
* to the score of the index entry it is a part of.
*
* Returns a Map from the index entry (index into _allItems) to the score which this searchWord
* contributes to it.
*
* The searchWordPos argument is the position of searchWord in the overall search text (e.g. 0
* if it's the first word). It is used for the position bonus, to give higher scores to entries
* whose words occur in the same order as in the search text.
*/
private _findOverlaps(searchWord: string, searchWordPos: number): Map<number, number> {
const insertIndex = sortedIndex<{word: string}>(this._words, {word: searchWord},
(a, b) => nativeCompare(a.word, b.word));
// Maps index of item to its score.
const scored = new Map<number, number>();
// Search up and down the list, accepting smaller and smaller overlap.
for (const step of [1, -1]) {
let prefix = searchWord;
let index = insertIndex + (step > 0 ? 0 : -1);
while (prefix && index >= 0 && index < this._words.length) {
for ( ; index >= 0 && index < this._words.length; index += step) {
const wordEntry = this._words[index];
// Once we reach a word that doesn't start with our prefix, break this loop, so we can
// reduce the length of the prefix and keep scanning.
if (!wordEntry.word.startsWith(prefix)) { break; }
// The contribution of this word's to the score consists primarily of the length of
// overlap (i.e. length for the current prefix).
const baseScore = prefix.length;
// To this we add 1 if the word matches exactly.
const fullWordBonus = (wordEntry.word === searchWord ? 1 : 0);
// To prefer matches where words occur in the same order as searched (e.g. searching for
// "Foo B" should prefer "Foo Bar" over "Bar Foo"), we give a bonus based on the
// position of the word in the search text and the entry text. (If positions match as
// 0:0 and 1:1, the total position bonus is 2^0+2^(-2)=1.25; while the bonus from 0:1
// and 1:0 would be 2^(-1) + 2^(-1)=1.0.)
const positionBonus = Math.pow(2, -(searchWordPos + wordEntry.pos));
const itemScore = baseScore + fullWordBonus + positionBonus;
// Each search word contributes only one score (e.g. a search for "Foo" will partially
// match both words in "forty five", but only the higher of the matches will count).
if (itemScore >= (scored.get(wordEntry.index) || 0)) {
scored.set(wordEntry.index, itemScore);
}
}
prefix = prefix.slice(0, -1);
}
}
return scored;
}
}
export type BuildHighlightFunc = (match: string) => DomContents;
/**
* Converts text to DOM with matching bits of text rendered using highlight(match) function.
*/
export function buildHighlightedDom(
text: string, highlightFunc: HighlightFunc, highlight: BuildHighlightFunc
): DomContents {
if (!text) { return text; }
const parts = highlightFunc(text);
return parts.map((part, k) => k % 2 ? highlight(part) : part);
}
// Same as wordSepRegexp, but with capturing parentheses.
const wordSepRegexpParen = new RegExp(`(${wordSepRegexp.source})`);
/**
* Splits text into pieces, with odd-numbered pieces the ones matching a prefix of some
* searchWord, i.e. the ones to highlight.
*/
function highlightMatches(searchWords: string[], text: string): string[] {
const textParts = text.split(wordSepRegexpParen);
const outputs = [''];
for (let i = 0; i < textParts.length; i += 2) {
const word = textParts[i];
const separator = textParts[i + 1] || '';
const prefixLen = findLongestPrefixLen(word.toLowerCase(), searchWords);
if (prefixLen === 0) {
outputs[outputs.length - 1] += word + separator;
} else {
outputs.push(word.slice(0, prefixLen), word.slice(prefixLen) + separator);
}
}
return outputs;
}
function findLongestPrefixLen(text: string, choices: string[]): number {
return choices.reduce((max, choice) => Math.max(max, findCommonPrefixLength(text, choice)), 0);
}
function findCommonPrefixLength(text1: string, text2: string): number {
let i = 0;
while (i < text1.length && text1[i] === text2[i]) { ++i; }
return i;
}
/**
* Checks whether `item` starts with `text`, or whether all words of text are prefixes of the
* words of `item`. (E.g. it would return true if item is "New York", and text is "ne yo".)
*/
function startsWithText(item: ACItem, text: string, searchWords: string[]): boolean {
if (item.cleanText.startsWith(text)) { return true; }
const regexp = new RegExp(searchWords.map(w => `\\b` + escapeRegExp(w)).join('.*'));
const cleanText = item.cleanText.split(wordSepRegexp).join(' ');
return regexp.test(cleanText);
}