gristlabs_grist-core/app/client/lib/ACIndex.ts
George Gevoian 5b2666a88a (core) Enhance autocomplete and choice colors
Summary:
Choice columns can now add new choices directly
from the autocomplete menu. The autocomplete will
now highlight the first matching item, even if there are equally
ranked alternatives. No changes have been made to how the
autocomplete index is created, or how it scores items.

For choice and choice list columns, the filter menu will
now display values using their configured colors, similar to the
rest of the UI. Choice tokens throughout the UI now do a better
job of handling text overflow by showing an ellipsis whenever
there isn't enough space to show the full text of a choice.

Test Plan: Browser tests.

Reviewers: cyprien

Reviewed By: cyprien

Differential Revision: https://phab.getgrist.com/D2904
2021-07-16 09:10:51 -07:00

259 lines
10 KiB
TypeScript

/**
* A search index for auto-complete suggestions.
*
* This implementation indexes words, and suggests items based on a best-match score, including
* amount of overlap and position of words. It searches case-insensitively and only at the start
* of words. E.g. searching for "Blue" would match "Blu" in "Lavender Blush", but searching for
* "lush" would only match the "L" in "Lavender".
*/
import {localeCompare, nativeCompare, sortedIndex} from 'app/common/gutil';
import {DomContents} from 'grainjs';
export interface ACItem {
// This should be a trimmed lowercase version of the item's text. It may be an accessor.
// Note that items with empty cleanText are never suggested.
cleanText: string;
}
// Regexp used to split text into words; includes nearly all punctuation. This means that
// "foo-bar" may be searched by "bar", but it's impossible to search for punctuation itself (e.g.
// "a-b" and "a+b" are not distinguished). (It's easy to exclude unicode punctuation too if the
// need arises, see https://stackoverflow.com/a/25575009/328565).
const wordSepRegexp = /[\s!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]+/;
/**
* An auto-complete index, which simply allows searching for a string.
*/
export interface ACIndex<Item extends ACItem> {
search(searchText: string): ACResults<Item>;
}
// Splits text into an array of pieces, with odd-indexed pieces being the ones to highlight.
export type HighlightFunc = (text: string) => string[];
export const highlightNone: HighlightFunc = (text) => [text];
/**
* AutoComplete results include the suggested items, which one to highlight, and a function for
* highlighting the matched portion of each item.
*/
export interface ACResults<Item extends ACItem> {
// Matching items in order from best match to worst.
items: Item[];
// May be used to highlight matches using buildHighlightedDom().
highlightFunc: HighlightFunc;
// index of a good match (normally 0), or -1 if no great match
selectIndex: number;
}
interface Word {
word: string; // The indexed word
index: number; // Index into _allItems for the item containing this word.
pos: number; // Position of the word within the item where it occurred.
}
/**
* Implements a search index. It doesn't currently support updates; when any values change, the
* index needs to be rebuilt from scratch.
*/
export class ACIndexImpl<Item extends ACItem> implements ACIndex<Item> {
private _allItems: Item[];
// All words from _allItems, sorted.
private _words: Word[];
// Creates an index for the given list of items.
// The max number of items to suggest may be set using _maxResults (default is 50).
constructor(items: Item[], private _maxResults: number = 50) {
this._allItems = items.slice(0);
// Collects [word, occurrence, position] tuples for all words in _allItems.
const allWords: Word[] = [];
for (let index = 0; index < this._allItems.length; index++) {
const item = this._allItems[index];
const words = item.cleanText.split(wordSepRegexp).filter(w => w);
for (let pos = 0; pos < words.length; pos++) {
allWords.push({word: words[pos], index, pos});
}
}
allWords.sort((a, b) => localeCompare(a.word, b.word));
this._words = allWords;
}
// The main search function. SearchText will be cleaned (trimmed and lowercased) at the start.
// Empty search text returns the first N items in the search universe.
public search(searchText: string): ACResults<Item> {
const cleanedSearchText = searchText.trim().toLowerCase();
const searchWords = cleanedSearchText.split(wordSepRegexp).filter(w => w);
// Maps item index in _allItems to its score.
const myMatches = new Map<number, number>();
if (searchWords.length > 0) {
// For each of searchWords, go through items with an overlap, and update their scores.
for (let k = 0; k < searchWords.length; k++) {
const searchWord = searchWords[k];
for (const [itemIndex, score] of this._findOverlaps(searchWord, k)) {
myMatches.set(itemIndex, (myMatches.get(itemIndex) || 0) + score);
}
}
// Give an extra point to items that start with the searchText.
for (const [itemIndex, score] of myMatches) {
if (this._allItems[itemIndex].cleanText.startsWith(cleanedSearchText)) {
myMatches.set(itemIndex, score + 1);
}
}
}
// Array of pairs [itemIndex, score], sorted by score (desc) and itemIndex.
const sortedMatches = Array.from(myMatches)
.sort((a, b) => nativeCompare(b[1], a[1]) || nativeCompare(a[0], b[0]))
.slice(0, this._maxResults);
const items: Item[] = sortedMatches.map(([index, score]) => this._allItems[index]);
// Append enough non-matching items to reach maxResults.
for (let i = 0; i < this._allItems.length && items.length < this._maxResults; i++) {
if (this._allItems[i].cleanText && !myMatches.has(i)) {
items.push(this._allItems[i]);
}
}
if (!cleanedSearchText) {
// In this case we are just returning the first few items.
return {items, highlightFunc: highlightNone, selectIndex: -1};
}
const highlightFunc = highlightMatches.bind(null, searchWords);
// The best match is the first item. If any word in the item actually starts with the search
// text, highlight it as a default selection. Otherwise, no item will be auto-selected.
let selectIndex = -1;
if (items.length > 0 && sortedMatches.length > 0 && startsWithText(items[0], cleanedSearchText)) {
selectIndex = 0;
}
return {items, highlightFunc, selectIndex};
}
/**
* Given one of the search words, looks it up in the indexed list of words and searches up and
* down the list for all words that share a prefix with it. Each such word contributes something
* to the score of the index entry it is a part of.
*
* Returns a Map from the index entry (index into _allItems) to the score which this searchWord
* contributes to it.
*
* The searchWordPos argument is the position of searchWord in the overall search text (e.g. 0
* if it's the first word). It is used for the position bonus, to give higher scores to entries
* whose words occur in the same order as in the search text.
*/
private _findOverlaps(searchWord: string, searchWordPos: number): Map<number, number> {
const insertIndex = sortedIndex<{word: string}>(this._words, {word: searchWord},
(a, b) => nativeCompare(a.word, b.word));
// Maps index of item to its score.
const scored = new Map<number, number>();
// Search up and down the list, accepting smaller and smaller overlap.
for (const step of [1, -1]) {
let prefix = searchWord;
let index = insertIndex + (step > 0 ? 0 : -1);
while (prefix && index >= 0 && index < this._words.length) {
for ( ; index >= 0 && index < this._words.length; index += step) {
const wordEntry = this._words[index];
// Once we reach a word that doesn't start with our prefix, break this loop, so we can
// reduce the length of the prefix and keep scanning.
if (!wordEntry.word.startsWith(prefix)) { break; }
// The contribution of this word's to the score consists primarily of the length of
// overlap (i.e. length for the current prefix).
const baseScore = prefix.length;
// To this we add 1 if the word matches exactly.
const fullWordBonus = (wordEntry.word === searchWord ? 1 : 0);
// To prefer matches where words occur in the same order as searched (e.g. searching for
// "Foo B" should prefer "Foo Bar" over "Bar Foo"), we give a bonus based on the
// position of the word in the search text and the entry text. (If positions match as
// 0:0 and 1:1, the total position bonus is 2^0+2^(-2)=1.25; while the bonus from 0:1
// and 1:0 would be 2^(-1) + 2^(-1)=1.0.)
const positionBonus = Math.pow(2, -(searchWordPos + wordEntry.pos));
const itemScore = baseScore + fullWordBonus + positionBonus;
// Each search word contributes only one score (e.g. a search for "Foo" will partially
// match both words in "forty five", but only the higher of the matches will count).
if (itemScore >= (scored.get(wordEntry.index) || 0)) {
scored.set(wordEntry.index, itemScore);
}
}
prefix = prefix.slice(0, -1);
}
}
return scored;
}
}
export type BuildHighlightFunc = (match: string) => DomContents;
/**
* Converts text to DOM with matching bits of text rendered using highlight(match) function.
*/
export function buildHighlightedDom(
text: string, highlightFunc: HighlightFunc, highlight: BuildHighlightFunc
): DomContents {
if (!text) { return text; }
const parts = highlightFunc(text);
return parts.map((part, k) => k % 2 ? highlight(part) : part);
}
// Same as wordSepRegexp, but with capturing parentheses.
const wordSepRegexpParen = new RegExp(`(${wordSepRegexp.source})`);
/**
* Splits text into pieces, with odd-numbered pieces the ones matching a prefix of some
* searchWord, i.e. the ones to highlight.
*/
function highlightMatches(searchWords: string[], text: string): string[] {
const textParts = text.split(wordSepRegexpParen);
const outputs = [''];
for (let i = 0; i < textParts.length; i += 2) {
const word = textParts[i];
const separator = textParts[i + 1] || '';
const prefixLen = findLongestPrefixLen(word.toLowerCase(), searchWords);
if (prefixLen === 0) {
outputs[outputs.length - 1] += word + separator;
} else {
outputs.push(word.slice(0, prefixLen), word.slice(prefixLen) + separator);
}
}
return outputs;
}
function findLongestPrefixLen(text: string, choices: string[]): number {
return choices.reduce((max, choice) => Math.max(max, findCommonPrefixLength(text, choice)), 0);
}
function findCommonPrefixLength(text1: string, text2: string): number {
let i = 0;
while (i < text1.length && text1[i] === text2[i]) { ++i; }
return i;
}
/**
* Checks whether `item` starts with `text`, or has any words that start with `text`.
*/
function startsWithText(item: ACItem, text: string): boolean {
if (item.cleanText.startsWith(text)) { return true; }
const words = item.cleanText.split(wordSepRegexp);
return words.some(w => w.startsWith(text));
}