mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
419 lines
17 KiB
TypeScript
419 lines
17 KiB
TypeScript
|
import {ACIndex, ACIndexImpl, ACItem, ACResults, highlightNone} from 'app/client/lib/ACIndex';
|
||
|
import {nativeCompare} from 'app/common/gutil';
|
||
|
import {assert} from 'chai';
|
||
|
import * as fse from 'fs-extra';
|
||
|
import * as path from 'path';
|
||
|
import {fixturesRoot} from 'test/server/testUtils';
|
||
|
|
||
|
/**
|
||
|
* Set env ENABLE_TIMING_TESTS=1 to run the timing "tests". These don't assert anything but let
|
||
|
* you compare the performance of different implementations.
|
||
|
*/
|
||
|
const ENABLE_TIMING_TESTS = Boolean(process.env.ENABLE_TIMING_TESTS);
|
||
|
|
||
|
|
||
|
interface TestACItem extends ACItem {
|
||
|
text: string;
|
||
|
}
|
||
|
|
||
|
function makeItem(text: string): TestACItem {
|
||
|
return {text, cleanText: text.trim().toLowerCase()};
|
||
|
}
|
||
|
|
||
|
const colors: TestACItem[] = [
|
||
|
"Blue", "Dark Red", "Reddish", "Red", "Orange", "Yellow", "Radical Deep Green", "Bright Red"
|
||
|
].map(makeItem);
|
||
|
|
||
|
const rounds: TestACItem[] = [
|
||
|
"Round 1", "Round 2", "Round 3", "Round 4"
|
||
|
].map(makeItem);
|
||
|
|
||
|
const messy: TestACItem[] = [
|
||
|
"", " \t", " RED ", "123", "-5.6", "red", "read ", "Bread", "#red", "\nred\n#red\nred", "\n\n", "REDIS/1"
|
||
|
].map(makeItem);
|
||
|
|
||
|
|
||
|
describe('ACIndex', function() {
|
||
|
it('should find items with matching words', function() {
|
||
|
const items: ACItem[] = ["blue", "dark red", "reddish", "red", "orange", "yellow", "radical green"].map(
|
||
|
c => ({cleanText: c}));
|
||
|
const acIndex = new ACIndexImpl(items, 5);
|
||
|
assert.deepEqual(acIndex.search("red").items.map((item) => item.cleanText),
|
||
|
["red", "reddish", "dark red", "radical green", "blue"]);
|
||
|
});
|
||
|
|
||
|
|
||
|
it('should return first few items when search text is empty', function() {
|
||
|
let acResult = new ACIndexImpl(colors).search("");
|
||
|
assert.deepEqual(acResult.items, colors);
|
||
|
assert.deepEqual(acResult.selectIndex, -1);
|
||
|
|
||
|
acResult = new ACIndexImpl(colors, 3).search("");
|
||
|
assert.deepEqual(acResult.items, colors.slice(0, 3));
|
||
|
assert.deepEqual(acResult.selectIndex, -1);
|
||
|
|
||
|
acResult = new ACIndexImpl(rounds).search("");
|
||
|
assert.deepEqual(acResult.items, rounds);
|
||
|
assert.deepEqual(acResult.selectIndex, -1);
|
||
|
});
|
||
|
|
||
|
it('should ignore items with empty text', function() {
|
||
|
const acIndex = new ACIndexImpl(messy);
|
||
|
let acResult = acIndex.search("");
|
||
|
|
||
|
assert.deepEqual(acResult.items, messy.filter(t => t.cleanText));
|
||
|
assert.lengthOf(acResult.items, 9);
|
||
|
assert.deepEqual(acResult.selectIndex, -1);
|
||
|
|
||
|
acResult = acIndex.search("bread");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Bread", " RED ", "123", "-5.6", "red", "read ", "#red", "\nred\n#red\nred", "REDIS/1"]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0);
|
||
|
});
|
||
|
|
||
|
it('should find items with the most matching words, and order by best match', function() {
|
||
|
const acIndex = new ACIndexImpl(colors);
|
||
|
let acResult: ACResults<TestACItem>;
|
||
|
|
||
|
// Try a few cases with a single word.
|
||
|
acResult = acIndex.search("red");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Red", "Reddish", "Dark Red", "Bright Red", "Radical Deep Green", "Blue", "Orange", "Yellow"]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0);
|
||
|
|
||
|
acResult = acIndex.search("rex");
|
||
|
// In this case "Reddish" is as good as "Red", so comes first according to original order.
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Reddish", "Red", "Dark Red", "Bright Red", "Radical Deep Green", "Blue", "Orange", "Yellow"]);
|
||
|
assert.deepEqual(acResult.selectIndex, -1); // No great match.
|
||
|
|
||
|
acResult = acIndex.search("REDD");
|
||
|
// In this case "Reddish" is strictly better than "Red".
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Reddish", "Red", "Dark Red", "Bright Red", "Radical Deep Green", "Blue", "Orange", "Yellow"]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0); // It's a good match.
|
||
|
|
||
|
// Try a few cases with multiple words.
|
||
|
acResult = acIndex.search("dark red");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Dark Red", "Red", "Bright Red", "Reddish", "Radical Deep Green", "Blue", "Orange", "Yellow"]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0);
|
||
|
|
||
|
acResult = acIndex.search("da re");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Dark Red", "Radical Deep Green", "Reddish", "Red", "Bright Red", "Blue", "Orange", "Yellow"]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0);
|
||
|
|
||
|
acResult = acIndex.search("red d");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Dark Red", "Red", "Bright Red", "Reddish", "Radical Deep Green", "Blue", "Orange", "Yellow"]);
|
||
|
assert.deepEqual(acResult.selectIndex, -1);
|
||
|
|
||
|
acResult = acIndex.search("EXTRA DARK RED WORDS DON'T HURT");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Dark Red", "Red", "Bright Red", "Radical Deep Green", "Reddish", "Blue", "Orange", "Yellow"]);
|
||
|
assert.deepEqual(acResult.selectIndex, -1);
|
||
|
|
||
|
// Try a few poor matches.
|
||
|
acResult = acIndex.search("a");
|
||
|
assert.deepEqual(acResult.items, colors);
|
||
|
acResult = acIndex.search("z");
|
||
|
assert.deepEqual(acResult.items, colors);
|
||
|
acResult = acIndex.search("RA");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Radical Deep Green", "Reddish", "Red", "Dark Red", "Bright Red", "Blue", "Orange", "Yellow"]);
|
||
|
acResult = acIndex.search("RZ");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text),
|
||
|
["Reddish", "Red", "Radical Deep Green", "Dark Red", "Bright Red", "Blue", "Orange", "Yellow"]);
|
||
|
|
||
|
});
|
||
|
|
||
|
it('should maintain order of equally good matches', function() {
|
||
|
const acIndex = new ACIndexImpl(rounds);
|
||
|
let acResult: ACResults<TestACItem>;
|
||
|
|
||
|
// Try a few cases with a single word.
|
||
|
acResult = acIndex.search("r");
|
||
|
assert.deepEqual(acResult.items, rounds);
|
||
|
|
||
|
acResult = acIndex.search("round 1");
|
||
|
assert.deepEqual(acResult.items, rounds);
|
||
|
|
||
|
acResult = acIndex.search("round 3");
|
||
|
// Round 3 is moved to the front; the rest are unchanged.
|
||
|
assert.deepEqual(acResult.items.map(i => i.text), ["Round 3", "Round 1", "Round 2", "Round 4"]);
|
||
|
});
|
||
|
|
||
|
it('should prefer items with words in a similar order to search text', function() {
|
||
|
const acIndex = new ACIndexImpl(colors);
|
||
|
let acResult: ACResults<TestACItem>;
|
||
|
|
||
|
// "r d" and "d r" prefer choices whose words are in the entered order.
|
||
|
acResult = acIndex.search("r d");
|
||
|
assert.deepEqual(acResult.items.slice(0, 2).map(i => i.text), ["Radical Deep Green", "Dark Red"]);
|
||
|
|
||
|
acResult = acIndex.search("d r");
|
||
|
assert.deepEqual(acResult.items.slice(0, 2).map(i => i.text), ["Dark Red", "Radical Deep Green"]);
|
||
|
|
||
|
// But a better match wins.
|
||
|
acResult = acIndex.search("de r");
|
||
|
assert.deepEqual(acResult.items.slice(0, 2).map(i => i.text), ["Radical Deep Green", "Dark Red"]);
|
||
|
});
|
||
|
|
||
|
it('should limit results to maxResults', function() {
|
||
|
const acIndex = new ACIndexImpl(colors, 3);
|
||
|
let acResult: ACResults<TestACItem>;
|
||
|
|
||
|
acResult = acIndex.search("red");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text), ["Red", "Reddish", "Dark Red"]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0);
|
||
|
|
||
|
acResult = acIndex.search("red d");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text), ["Dark Red", "Red", "Bright Red"]);
|
||
|
assert.deepEqual(acResult.selectIndex, -1);
|
||
|
|
||
|
acResult = acIndex.search("g");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text), ["Radical Deep Green", "Blue", "Dark Red"]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0);
|
||
|
});
|
||
|
|
||
|
it('should split words on punctuation', function() {
|
||
|
// Same as `colors` but with extra punctuation
|
||
|
const punctColors: TestACItem[] = [
|
||
|
"$Blue$", "--Dark@#$%^&Red--", "(Reddish)", "]Red{", "**Orange", "-Yellow?!",
|
||
|
"_Radical ``Deep'' !!Green!!", "<Bright>=\"Red\""
|
||
|
].map(makeItem);
|
||
|
|
||
|
const acIndex = new ACIndexImpl(punctColors);
|
||
|
let acResult: ACResults<TestACItem>;
|
||
|
|
||
|
// Try a few cases with a single word.
|
||
|
acResult = acIndex.search("~red-");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text), [
|
||
|
"]Red{", "--Dark@#$%^&Red--", "<Bright>=\"Red\"", "(Reddish)", "_Radical ``Deep'' !!Green!!",
|
||
|
"$Blue$", "**Orange", "-Yellow?!"]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0);
|
||
|
|
||
|
acResult = acIndex.search("rex");
|
||
|
// In this case "Reddish" is as good as "Red", so comes first according to original order.
|
||
|
assert.deepEqual(acResult.items.map(i => i.text), [
|
||
|
"(Reddish)", "]Red{", "--Dark@#$%^&Red--", "<Bright>=\"Red\"", "_Radical ``Deep'' !!Green!!",
|
||
|
"$Blue$", "**Orange", "-Yellow?!"]);
|
||
|
assert.deepEqual(acResult.selectIndex, -1); // No great match.
|
||
|
|
||
|
acResult = acIndex.search("da-re");
|
||
|
assert.deepEqual(acResult.items.map(i => i.text), [
|
||
|
"--Dark@#$%^&Red--", "_Radical ``Deep'' !!Green!!", "(Reddish)", "]Red{", "<Bright>=\"Red\"",
|
||
|
"$Blue$", "**Orange", "-Yellow?!",
|
||
|
]);
|
||
|
assert.deepEqual(acResult.selectIndex, 0);
|
||
|
|
||
|
// Try a few poor matches.
|
||
|
acResult = acIndex.search("a");
|
||
|
assert.deepEqual(acResult.items, punctColors);
|
||
|
acResult = acIndex.search("z");
|
||
|
assert.deepEqual(acResult.items, punctColors);
|
||
|
});
|
||
|
|
||
|
it('should return an item to select when the match is good', function() {
|
||
|
const acIndex = new ACIndexImpl(rounds);
|
||
|
let acResult: ACResults<TestACItem>;
|
||
|
|
||
|
// Try a few cases with a single word.
|
||
|
acResult = acIndex.search("r");
|
||
|
assert.equal(acResult.selectIndex, 0);
|
||
|
assert.equal(acResult.items[0].text, "Round 1");
|
||
|
|
||
|
acResult = acIndex.search("round 2");
|
||
|
assert.equal(acResult.selectIndex, 0);
|
||
|
assert.equal(acResult.items[0].text, "Round 2");
|
||
|
|
||
|
acResult = acIndex.search("round X");
|
||
|
assert.equal(acResult.selectIndex, -1);
|
||
|
|
||
|
// We only suggest a selection when an item (or one of its words) starts with the search text.
|
||
|
acResult = acIndex.search("1");
|
||
|
assert.equal(acResult.selectIndex, 0);
|
||
|
|
||
|
const acIndex2 = new ACIndexImpl(messy);
|
||
|
acResult = acIndex2.search("#r");
|
||
|
assert.equal(acResult.selectIndex, 0);
|
||
|
assert.equal(acResult.items[0].text, "#red");
|
||
|
|
||
|
// Whitespace and case don't matter.
|
||
|
acResult = acIndex2.search("Red");
|
||
|
assert.equal(acResult.selectIndex, 0);
|
||
|
assert.equal(acResult.items[0].text, " RED ");
|
||
|
});
|
||
|
|
||
|
it('should return a useful highlight function', function() {
|
||
|
const acIndex = new ACIndexImpl(colors, 3);
|
||
|
let acResult: ACResults<TestACItem>;
|
||
|
|
||
|
// Here we split the items' (uncleaned) text with the returned highlightFunc. The values at
|
||
|
// odd-numbered indices should be the matching parts.
|
||
|
acResult = acIndex.search("red");
|
||
|
assert.deepEqual(acResult.items.map(i => acResult.highlightFunc(i.text)),
|
||
|
[["", "Red", ""], ["", "Red", "dish"], ["Dark ", "Red", ""]]);
|
||
|
|
||
|
// Partial matches are highlighted too.
|
||
|
acResult = acIndex.search("darn");
|
||
|
assert.deepEqual(acResult.items.map(i => acResult.highlightFunc(i.text)),
|
||
|
[["", "Dar", "k Red"], ["Radical ", "D", "eep Green"], ["Blue"]]);
|
||
|
|
||
|
// Empty search highlights nothing.
|
||
|
acResult = acIndex.search("");
|
||
|
assert.deepEqual(acResult.items.map(i => acResult.highlightFunc(i.text)),
|
||
|
[["Blue"], ["Dark Red"], ["Reddish"]]);
|
||
|
|
||
|
// Try some messier cases.
|
||
|
const acIndex2 = new ACIndexImpl(messy, 6);
|
||
|
acResult = acIndex2.search("#r");
|
||
|
assert.deepEqual(acResult.items.map(i => acResult.highlightFunc(i.text)),
|
||
|
[["#", "r", "ed"], [" ", "R", "ED "], ["", "r", "ed"], ["", "r", "ead "],
|
||
|
["\n", "r", "ed\n#", "r", "ed\n", "r", "ed"], ["", "R", "EDIS/1"]]);
|
||
|
|
||
|
acResult = acIndex2.search("read");
|
||
|
assert.deepEqual(acResult.items.map(i => acResult.highlightFunc(i.text)), [
|
||
|
["", "read", " "], [" ", "RE", "D "], ["", "re", "d"], ["#", "re", "d"],
|
||
|
["\n", "re", "d\n#", "re", "d\n", "re", "d"], ["", "RE", "DIS/1"]]);
|
||
|
});
|
||
|
|
||
|
it('should highlight multi-byte unicode', function() {
|
||
|
const acIndex = new ACIndexImpl(['Lorem ipsum 𝌆 dolor sit ameͨ͆t.', "mañana", "Москва"].map(makeItem), 3);
|
||
|
const acResult: ACResults<TestACItem> = acIndex.search("mañ моск am");
|
||
|
assert.deepEqual(acResult.items.map(i => acResult.highlightFunc(i.text)),
|
||
|
[["", "Моск", "ва"], ["", "mañ", "ana"], ["Lorem ipsum 𝌆 dolor sit ", "am", "eͨ͆t."]]);
|
||
|
});
|
||
|
|
||
|
it('should match a brute-force scoring implementation', function() {
|
||
|
const acIndex1 = new ACIndexImpl(colors);
|
||
|
const acIndex2 = new BruteForceACIndexImpl(colors);
|
||
|
for (const text of ["RED", "blue", "a", "Z", "rea", "RZ", "da re", "re da", ""]) {
|
||
|
assert.deepEqual(acIndex1.search(text).items, acIndex2.search(text).items,
|
||
|
`different results for "${text}"`);
|
||
|
}
|
||
|
});
|
||
|
|
||
|
// See ENABLE_TIMING_TESTS flag on top of this file.
|
||
|
if (ENABLE_TIMING_TESTS) {
|
||
|
// Returns a list of many items, for checking performance.
|
||
|
async function getCities(): Promise<TestACItem[]> {
|
||
|
// Pick a file we have with 4k+ rows. First two columns are city,country.
|
||
|
// To create more items, we'll return "city N, country" combinations for N in [0, 25).
|
||
|
const filePath = path.resolve(fixturesRoot, 'export-csv/many-rows.csv');
|
||
|
const data = await fse.readFile(filePath, {encoding: 'utf8'});
|
||
|
const result: TestACItem[] = [];
|
||
|
for (const line of data.split("\n")) {
|
||
|
const [city, country] = line.split(",");
|
||
|
for (let i = 0; i < 25; i++) {
|
||
|
result.push(makeItem(`${city} ${i}, ${country}`));
|
||
|
}
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
// Repeat `func()` call `count` times, returning [msec per call, last return value].
|
||
|
function repeat<T>(count: number, func: () => T): [number, T] {
|
||
|
const start = Date.now();
|
||
|
let ret: T;
|
||
|
for (let i = 0; i < count; i++) {
|
||
|
ret = func();
|
||
|
}
|
||
|
const msecTaken = Date.now() - start;
|
||
|
return [msecTaken / count, ret!];
|
||
|
}
|
||
|
|
||
|
describe("timing", function() {
|
||
|
this.timeout(20000);
|
||
|
|
||
|
let items: TestACItem[];
|
||
|
|
||
|
before(async function() {
|
||
|
items = await getCities();
|
||
|
});
|
||
|
|
||
|
// tslint:disable:no-console
|
||
|
|
||
|
it('main algorithm', function() {
|
||
|
const [buildTime, acIndex] = repeat(10, () => new ACIndexImpl(items, 100));
|
||
|
console.log(`Time to build index (${items.length} items): ${buildTime} ms`);
|
||
|
|
||
|
const [searchTime, result] = repeat(10, () => acIndex.search("YORK"));
|
||
|
console.log(`Time to search index (${items.length} items): ${searchTime} ms`);
|
||
|
assert.equal(result.items[0].text, "York 0, United Kingdom");
|
||
|
assert.equal(result.items[75].text, "New York 0, United States");
|
||
|
});
|
||
|
|
||
|
it('brute-force algorithm', function() {
|
||
|
const [buildTime, acIndex] = repeat(10, () => new BruteForceACIndexImpl(items, 100));
|
||
|
console.log(`Time to build index (${items.length} items): ${buildTime} ms`);
|
||
|
|
||
|
const [searchTime, result] = repeat(10, () => acIndex.search("YORK"));
|
||
|
console.log(`Time to search index (${items.length} items): ${searchTime} ms`);
|
||
|
assert.equal(result.items[0].text, "York 0, United Kingdom");
|
||
|
assert.equal(result.items[75].text, "New York 0, United States");
|
||
|
});
|
||
|
});
|
||
|
}
|
||
|
});
|
||
|
|
||
|
|
||
|
// This is a brute force implementation of the same score-based search. It makes scoring logic
|
||
|
// easier to understand.
|
||
|
class BruteForceACIndexImpl<Item extends ACItem> implements ACIndex<Item> {
|
||
|
constructor(private _allItems: Item[], private _maxResults: number = 50) {}
|
||
|
|
||
|
public search(searchText: string): ACResults<Item> {
|
||
|
const cleanedSearchText = searchText.trim().toLowerCase();
|
||
|
if (!cleanedSearchText) {
|
||
|
return {items: this._allItems.slice(0, this._maxResults), highlightFunc: highlightNone, selectIndex: -1};
|
||
|
}
|
||
|
|
||
|
const searchWords = cleanedSearchText.split(/\s+/);
|
||
|
|
||
|
// Each item consists of the item's score, item's index, and the item itself.
|
||
|
const matches: Array<[number, number, Item]> = [];
|
||
|
|
||
|
// Get a score for each item based on the amount of overlap with text.
|
||
|
for (let i = 0; i < this._allItems.length; i++) {
|
||
|
const item = this._allItems[i];
|
||
|
const score: number = getScore(item.cleanText, searchWords);
|
||
|
matches.push([score, i, item]);
|
||
|
}
|
||
|
|
||
|
// Sort the matches by score first, and then by item (searchText).
|
||
|
matches.sort((a, b) => nativeCompare(b[0], a[0]) || nativeCompare(a[1], b[1]));
|
||
|
const items = matches.slice(0, this._maxResults).map((m) => m[2]);
|
||
|
|
||
|
return {items, highlightFunc: highlightNone, selectIndex: -1};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Scores text against an array of search words by adding the lengths of common prefixes between
|
||
|
// the search words and words in the text.
|
||
|
function getScore(text: string, searchWords: string[]) {
|
||
|
const textWords = text.split(/\s+/);
|
||
|
let score = 0;
|
||
|
for (let k = 0; k < searchWords.length; k++) {
|
||
|
const w = searchWords[k];
|
||
|
// Power term for bonus disambiguates scores that are otherwise identical, to prioritize
|
||
|
// earlier words appearing in earlier positions.
|
||
|
const wordScore = Math.max(...textWords.map((sw, i) => getWordScore(sw, w, Math.pow(2, -(i + k)))));
|
||
|
score += wordScore;
|
||
|
}
|
||
|
if (text.startsWith(searchWords.join(' '))) {
|
||
|
score += 1;
|
||
|
}
|
||
|
return score;
|
||
|
}
|
||
|
|
||
|
function getWordScore(searchedWord: string, word: string, bonus: number) {
|
||
|
if (searchedWord === word) { return word.length + 1 + bonus; }
|
||
|
while (word) {
|
||
|
if (searchedWord.startsWith(word)) { return word.length + bonus; }
|
||
|
word = word.slice(0, -1);
|
||
|
}
|
||
|
return 0;
|
||
|
}
|