(core) Porting the AI evaluation script

Summary:
Porting script that run an evaluation against our formula dataset.

To test you need an openai key (see here: https://platform.openai.com/)
or hugging face (it should work as well), then checkout the branch and run

`OPENAI_API_KEY=<my_openai_api_key> node core/test/formula-dataset/runCompletion.js`

Test Plan:
Needs manually testing: so far there is no plan to make it part of CI.

The current score is somewhere around 34 successful prompts over a total of 47.

Reviewers: paulfitz

Reviewed By: paulfitz

Subscribers: jarek

Differential Revision: https://phab.getgrist.com/D3816
This commit is contained in:
Cyprien P 2023-03-15 09:52:17 +01:00
parent 2b2e19c5b5
commit 1ff93f89c2
8 changed files with 377 additions and 37 deletions

View File

@ -951,7 +951,7 @@ export function assertIsDefined<T>(name: string, value: T): asserts value is Non
* Calls function `fn`, passes any thrown errors to function `recover`, and finally calls `fn` * Calls function `fn`, passes any thrown errors to function `recover`, and finally calls `fn`
* once more if `recover` doesn't throw. * once more if `recover` doesn't throw.
*/ */
export async function retryOnce<T>(fn: () => Promise<T>, recover: (e: unknown) => Promise<void>): Promise<T> { export async function retryOnce<T>(fn: () => Promise<T>, recover: (e: unknown) => Promise<void>): Promise<T> {
try { try {
return await fn(); return await fn();
} catch (e) { } catch (e) {
@ -964,7 +964,7 @@ export function assertIsDefined<T>(name: string, value: T): asserts value is Non
* Checks if value is 'empty' (like null, undefined, empty string, empty array/set/map, empty object). * Checks if value is 'empty' (like null, undefined, empty string, empty array/set/map, empty object).
* Values like 0, true, false are not empty. * Values like 0, true, false are not empty.
*/ */
export function notSet(value: any) { export function notSet(value: any) {
return value === undefined || value === null || value === '' return value === undefined || value === null || value === ''
|| (Array.isArray(value) && !value.length) || (Array.isArray(value) && !value.length)
|| (typeof value === 'object' && !Object.keys(value).length) || (typeof value === 'object' && !Object.keys(value).length)

View File

@ -34,7 +34,7 @@ import {
TransformRule TransformRule
} from 'app/common/ActiveDocAPI'; } from 'app/common/ActiveDocAPI';
import {ApiError} from 'app/common/ApiError'; import {ApiError} from 'app/common/ApiError';
import {mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate'; import {asyncOnce, mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate';
import {AttachmentColumns, gatherAttachmentIds, getAttachmentColumns} from 'app/common/AttachmentColumns'; import {AttachmentColumns, gatherAttachmentIds, getAttachmentColumns} from 'app/common/AttachmentColumns';
import { import {
BulkAddRecord, BulkAddRecord,
@ -230,6 +230,11 @@ export class ActiveDoc extends EventEmitter {
private _inactivityTimer = new InactivityTimer(() => this.shutdown(), Deps.ACTIVEDOC_TIMEOUT * 1000); private _inactivityTimer = new InactivityTimer(() => this.shutdown(), Deps.ACTIVEDOC_TIMEOUT * 1000);
private _recoveryMode: boolean = false; private _recoveryMode: boolean = false;
private _shuttingDown: boolean = false; private _shuttingDown: boolean = false;
private _afterShutdownCallback?: () => Promise<void>;
// catch & report error so that asyncOnce does not get cleared.
private _doShutdown = asyncOnce(
() => this._doShutdownImpl().catch((e) => log.error('Uncaught shutdown error', e))
);
/** /**
* In cases where large numbers of documents are restarted simultaneously * In cases where large numbers of documents are restarted simultaneously
@ -493,6 +498,14 @@ export class ActiveDoc extends EventEmitter {
public async shutdown(options: { public async shutdown(options: {
afterShutdown?: () => Promise<void> afterShutdown?: () => Promise<void>
} = {}): Promise<void> { } = {}): Promise<void> {
if (options.afterShutdown) {
this._afterShutdownCallback = options.afterShutdown;
}
await this._doShutdown();
}
private async _doShutdownImpl(): Promise<void> {
const docSession = makeExceptionalDocSession('system'); const docSession = makeExceptionalDocSession('system');
this._log.debug(docSession, "shutdown starting"); this._log.debug(docSession, "shutdown starting");
try { try {
@ -576,7 +589,7 @@ export class ActiveDoc extends EventEmitter {
} catch (err) { } catch (err) {
this._log.error(docSession, "failed to shutdown some resources", err); this._log.error(docSession, "failed to shutdown some resources", err);
} }
await options.afterShutdown?.(); await this._afterShutdownCallback?.();
} finally { } finally {
this._docManager.removeActiveDoc(this); this._docManager.removeActiveDoc(this);
} }

View File

@ -4,16 +4,25 @@
import {delay} from 'app/common/delay'; import {delay} from 'app/common/delay';
import log from 'app/server/lib/log'; import log from 'app/server/lib/log';
import fetch, { Response as FetchResponse} from 'node-fetch'; import fetch from 'node-fetch';
export const DEPS = { fetch };
export async function sendForCompletion(prompt: string): Promise<string> { export async function sendForCompletion(prompt: string): Promise<string> {
let completion: string|null = null; let completion: string|null = null;
if (process.env.OPENAI_API_KEY) { let retries: number = 0;
completion = await sendForCompletionOpenAI(prompt); while(retries++ < 3) {
} try {
if (process.env.HUGGINGFACE_API_KEY) { if (process.env.OPENAI_API_KEY) {
completion = await sendForCompletionHuggingFace(prompt); completion = await sendForCompletionOpenAI(prompt);
}
if (process.env.HUGGINGFACE_API_KEY) {
completion = await sendForCompletionHuggingFace(prompt);
}
break;
} catch(e) {
await delay(1000);
}
} }
if (completion === null) { if (completion === null) {
throw new Error("Please set OPENAI_API_KEY or HUGGINGFACE_API_KEY (and optionally COMPLETION_MODEL)"); throw new Error("Please set OPENAI_API_KEY or HUGGINGFACE_API_KEY (and optionally COMPLETION_MODEL)");
@ -29,7 +38,7 @@ async function sendForCompletionOpenAI(prompt: string) {
if (!apiKey) { if (!apiKey) {
throw new Error("OPENAI_API_KEY not set"); throw new Error("OPENAI_API_KEY not set");
} }
const response = await fetch( const response = await DEPS.fetch(
"https://api.openai.com/v1/completions", "https://api.openai.com/v1/completions",
{ {
method: "POST", method: "POST",
@ -73,31 +82,27 @@ async function sendForCompletionHuggingFace(prompt: string) {
completionUrl = 'https://api-inference.huggingface.co/models/NovelAI/genji-python-6B'; completionUrl = 'https://api-inference.huggingface.co/models/NovelAI/genji-python-6B';
} }
} }
let retries: number = 0;
let response!: FetchResponse; const response = await DEPS.fetch(
while (retries++ < 3) { completionUrl,
response = await fetch( {
completionUrl, method: "POST",
{ headers: {
method: "POST", "Authorization": `Bearer ${apiKey}`,
headers: { "Content-Type": "application/json",
"Authorization": `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
inputs: prompt,
parameters: {
return_full_text: false,
max_new_tokens: 50,
},
}),
}, },
); body: JSON.stringify({
if (response.status === 503) { inputs: prompt,
log.error(`Sleeping for 10s - HuggingFace API returned ${response.status}: ${await response.text()}`); parameters: {
await delay(10000); return_full_text: false,
continue; max_new_tokens: 50,
} },
}),
},
);
if (response.status === 503) {
log.error(`Sleeping for 10s - HuggingFace API returned ${response.status}: ${await response.text()}`);
await delay(10000);
} }
if (response.status !== 200) { if (response.status !== 200) {
const text = await response.text(); const text = await response.text();

2
test/formula-dataset/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
data/templates
data/cache

View File

@ -0,0 +1,55 @@
table_id,col_id,doc_id,Description
Contacts,Send_Email,hQHXqAQXceeQBPvRw5sSs1,"Link to compose an email, if there is one"
Tasks,Today,hQHXqAQXceeQBPvRw5sSs1,Needs to be done today (or every day)
Tasks,Week_Day,hQHXqAQXceeQBPvRw5sSs1,Full name of deadline weekday
Expenses,Month,55Q2EtTbFvB1N6iizLh4Rk,e.g. 2022-01
Payroll,Date_Range,5pHLanQNThxkEaEJHKJUf5,"The start date, followed by a dash (no spaces) and the end date if there is one. Dates are month/day with no leading zeroes."
Payroll,Payment,5pHLanQNThxkEaEJHKJUf5,"Total payment amount for hours worked, rounded to the nearest cent."
Payroll_summary_Pay_Period_Person,Dates,5pHLanQNThxkEaEJHKJUf5,"All date ranges in the group, separated by a comma and a space"
People,Full_Name,5pHLanQNThxkEaEJHKJUf5,"e.g. Doe, John"
General_Ledger,Quarter,2YwYBWpREY2a1N2NV7cb55,e.g. 2020 Q4
General_Ledger,Year,2YwYBWpREY2a1N2NV7cb55,"Just the year of the date, as a string"
Time_Calculator,Time_Worked,np7TVHmuvFcHmo1K8h7Ur4,Formatted as hours:minutes. No leading zeroes for hours.
Time_Calculator,Seconds_Worked,np7TVHmuvFcHmo1K8h7Ur4,"Number of seconds between start/end times, if they're both there"
Funding_Source_summary,Debt_to_Equity,qprycQa2TVwajAe6Hb3bUZ,Ratio of the total amounts in the group where the type is Debt vs Equity
Invoices,Client,bReAxyLmzmEQfHF5L5Sc1e,Client's name followed by their address on the next line
Invoices,Due,bReAxyLmzmEQfHF5L5Sc1e,30 days after the invoice date
Invoices,Invoice_ID,bReAxyLmzmEQfHF5L5Sc1e,Invoice date followed by the client's name in brackets
Projects,Project_Name,bReAxyLmzmEQfHF5L5Sc1e,"Client name and project name, e.g. John Doe: Big project"
Time_Log,Date,bReAxyLmzmEQfHF5L5Sc1e,Start date if there is one
Time_Log,Duration_hrs_,bReAxyLmzmEQfHF5L5Sc1e,Duration (if there is one) in hours rounded to two decimal places
Time_Log,Duration_min_,bReAxyLmzmEQfHF5L5Sc1e,"Number of minutes between start and end time. If either time is missing, leave blank. If end is before start, give 0."
Filtered_By_Formula,LabelCount,9nNr9uQwoXWAvxcWQDygh6,"1 if the state is CA, otherwise 0"
Objects,Address,pyMHqncEspfZN5zfShCwT8,"City and state, separated by comma space"
Books,search_terms,hdXy57qLiyNf35oNLzzgBG,"Title and author name, with a space in between"
BOM_Items,Cost,e4gEm7dt4cgBMkouVBNMeY,Total cost if both quantity and cost are given
Bill_Of_Materials,Cost,e4gEm7dt4cgBMkouVBNMeY,Total cost
All_Responses,Entry,qvND7WUcuNb2fU4n1vBJ7f,"Name and submitted date in the format ""Name - month-day"""
All_Responses,Month,qvND7WUcuNb2fU4n1vBJ7f,Submitted month (full name) and year
Cap_Table,Common_Stock,iXggjrCPHut9u2BuhJxJkk,"If the class is Options, RSUs, or Option Pool, return 0, otherwise return the fully diluted value."
Cap_Table,Fully_Diluted,iXggjrCPHut9u2BuhJxJkk,"The granted amount, minus the total pool used if the class is Option Pool"
Cap_Table,Fully_Diluted_,iXggjrCPHut9u2BuhJxJkk,Fully diluted as a fraction of the total
Classes,Spots_Left,swLvb3Fic22gVzrdczcAoZ,or Full
All_Survey_Responses,Product_Experience_Score,4ktYzGV1mUipSiQFtkLGqm,"A number based on the experience:
Very Dissatisfied: 1
Somewhat Dissatisfied: 2
Neutral: 3
Somewhat Satisfied: 4
Very Satisfied: 5"
Time_Sheet_Entries_summary_Account_Employee_Month,Total_Spend,oGxD8EnzeVs6vSQK3QBrUv,Total hours worked times hourly rate
Time_Sheets,Title,oGxD8EnzeVs6vSQK3QBrUv,Month number and employee full name separated by a space
All_Products,SKU,sXsBGDTKau1F3fvxkCyoaJ,"Brand code, color code, and size, separated by dashes without spaces"
All_Products,Stock_Alert,sXsBGDTKau1F3fvxkCyoaJ,"If the amount in stock and on order is more than 5: In Stock
If it's 0: OUT OF STOCK
Otherwise: Low Stock"
Incoming_Order_Line_Items,Received_Qty,sXsBGDTKau1F3fvxkCyoaJ,"The quantity, but only if the order is received"
Theaters,Latitude2,dKztiPYamcCpttT1LT1FnU,Coordinate before the comma
Theaters,Longitude,dKztiPYamcCpttT1LT1FnU,Coordinate after the comma and space
Families,Amount_Due,cJcSKdUC3nLNAv4wTjAxA6,"Total charged minus total paid, capped at 0"
Gifts_summary_Occasion_Who_Year,Over_Budget_,dr6epxpXUcy9rsFVUoXTEe,Did we spend more than the budget for this person?
Apartments,Have_Picture,5iMYwmESm33JpEECSqdZk2,Yes or No depending on if there's a picture
Leases,Lease_End_Date,5iMYwmESm33JpEECSqdZk2,Start date plus the lease term in years minus one day
Tenancies,Minor,5iMYwmESm33JpEECSqdZk2,"1 if the age is less than 18, otherwise 0"
Game_Schedule,Loser,1xJAp2uxM7tFCVUbEofKoF,The team that won fewer sets
Standings,Win_Rate,1xJAp2uxM7tFCVUbEofKoF,Ratio of wins to total games
Prepare_Invoices,Due,9NH6D58FmxwPP43nw7uzQK,One month after the issued date if there is one
1 table_id col_id doc_id Description
2 Contacts Send_Email hQHXqAQXceeQBPvRw5sSs1 Link to compose an email, if there is one
3 Tasks Today hQHXqAQXceeQBPvRw5sSs1 Needs to be done today (or every day)
4 Tasks Week_Day hQHXqAQXceeQBPvRw5sSs1 Full name of deadline weekday
5 Expenses Month 55Q2EtTbFvB1N6iizLh4Rk e.g. 2022-01
6 Payroll Date_Range 5pHLanQNThxkEaEJHKJUf5 The start date, followed by a dash (no spaces) and the end date if there is one. Dates are month/day with no leading zeroes.
7 Payroll Payment 5pHLanQNThxkEaEJHKJUf5 Total payment amount for hours worked, rounded to the nearest cent.
8 Payroll_summary_Pay_Period_Person Dates 5pHLanQNThxkEaEJHKJUf5 All date ranges in the group, separated by a comma and a space
9 People Full_Name 5pHLanQNThxkEaEJHKJUf5 e.g. Doe, John
10 General_Ledger Quarter 2YwYBWpREY2a1N2NV7cb55 e.g. 2020 Q4
11 General_Ledger Year 2YwYBWpREY2a1N2NV7cb55 Just the year of the date, as a string
12 Time_Calculator Time_Worked np7TVHmuvFcHmo1K8h7Ur4 Formatted as hours:minutes. No leading zeroes for hours.
13 Time_Calculator Seconds_Worked np7TVHmuvFcHmo1K8h7Ur4 Number of seconds between start/end times, if they're both there
14 Funding_Source_summary Debt_to_Equity qprycQa2TVwajAe6Hb3bUZ Ratio of the total amounts in the group where the type is Debt vs Equity
15 Invoices Client bReAxyLmzmEQfHF5L5Sc1e Client's name followed by their address on the next line
16 Invoices Due bReAxyLmzmEQfHF5L5Sc1e 30 days after the invoice date
17 Invoices Invoice_ID bReAxyLmzmEQfHF5L5Sc1e Invoice date followed by the client's name in brackets
18 Projects Project_Name bReAxyLmzmEQfHF5L5Sc1e Client name and project name, e.g. John Doe: Big project
19 Time_Log Date bReAxyLmzmEQfHF5L5Sc1e Start date if there is one
20 Time_Log Duration_hrs_ bReAxyLmzmEQfHF5L5Sc1e Duration (if there is one) in hours rounded to two decimal places
21 Time_Log Duration_min_ bReAxyLmzmEQfHF5L5Sc1e Number of minutes between start and end time. If either time is missing, leave blank. If end is before start, give 0.
22 Filtered_By_Formula LabelCount 9nNr9uQwoXWAvxcWQDygh6 1 if the state is CA, otherwise 0
23 Objects Address pyMHqncEspfZN5zfShCwT8 City and state, separated by comma space
24 Books search_terms hdXy57qLiyNf35oNLzzgBG Title and author name, with a space in between
25 BOM_Items Cost e4gEm7dt4cgBMkouVBNMeY Total cost if both quantity and cost are given
26 Bill_Of_Materials Cost e4gEm7dt4cgBMkouVBNMeY Total cost
27 All_Responses Entry qvND7WUcuNb2fU4n1vBJ7f Name and submitted date in the format "Name - month-day"
28 All_Responses Month qvND7WUcuNb2fU4n1vBJ7f Submitted month (full name) and year
29 Cap_Table Common_Stock iXggjrCPHut9u2BuhJxJkk If the class is Options, RSUs, or Option Pool, return 0, otherwise return the fully diluted value.
30 Cap_Table Fully_Diluted iXggjrCPHut9u2BuhJxJkk The granted amount, minus the total pool used if the class is Option Pool
31 Cap_Table Fully_Diluted_ iXggjrCPHut9u2BuhJxJkk Fully diluted as a fraction of the total
32 Classes Spots_Left swLvb3Fic22gVzrdczcAoZ or Full
33 All_Survey_Responses Product_Experience_Score 4ktYzGV1mUipSiQFtkLGqm A number based on the experience: Very Dissatisfied: 1 Somewhat Dissatisfied: 2 Neutral: 3 Somewhat Satisfied: 4 Very Satisfied: 5
34 Time_Sheet_Entries_summary_Account_Employee_Month Total_Spend oGxD8EnzeVs6vSQK3QBrUv Total hours worked times hourly rate
35 Time_Sheets Title oGxD8EnzeVs6vSQK3QBrUv Month number and employee full name separated by a space
36 All_Products SKU sXsBGDTKau1F3fvxkCyoaJ Brand code, color code, and size, separated by dashes without spaces
37 All_Products Stock_Alert sXsBGDTKau1F3fvxkCyoaJ If the amount in stock and on order is more than 5: In Stock If it's 0: OUT OF STOCK Otherwise: Low Stock
38 Incoming_Order_Line_Items Received_Qty sXsBGDTKau1F3fvxkCyoaJ The quantity, but only if the order is received
39 Theaters Latitude2 dKztiPYamcCpttT1LT1FnU Coordinate before the comma
40 Theaters Longitude dKztiPYamcCpttT1LT1FnU Coordinate after the comma and space
41 Families Amount_Due cJcSKdUC3nLNAv4wTjAxA6 Total charged minus total paid, capped at 0
42 Gifts_summary_Occasion_Who_Year Over_Budget_ dr6epxpXUcy9rsFVUoXTEe Did we spend more than the budget for this person?
43 Apartments Have_Picture 5iMYwmESm33JpEECSqdZk2 Yes or No depending on if there's a picture
44 Leases Lease_End_Date 5iMYwmESm33JpEECSqdZk2 Start date plus the lease term in years minus one day
45 Tenancies Minor 5iMYwmESm33JpEECSqdZk2 1 if the age is less than 18, otherwise 0
46 Game_Schedule Loser 1xJAp2uxM7tFCVUbEofKoF The team that won fewer sets
47 Standings Win_Rate 1xJAp2uxM7tFCVUbEofKoF Ratio of wins to total games
48 Prepare_Invoices Due 9NH6D58FmxwPP43nw7uzQK One month after the issued date if there is one

View File

@ -0,0 +1,12 @@
#!/usr/bin/env node
"use strict";
const path = require('path');
const codeRoot = path.dirname(path.dirname(path.dirname(__dirname)));
process.env.DATA_PATH = path.join(__dirname, 'data');
require('app-module-path').addPath(path.join(codeRoot, '_build'));
require('app-module-path').addPath(path.join(codeRoot, '_build', 'core'));
require('app-module-path').addPath(path.join(codeRoot, '_build', 'ext'));
require('test/formula-dataset/runCompletion_impl').runCompletion().catch(console.error);

View File

@ -0,0 +1,252 @@
/**
* This module holds an evaluation scripts for AI assistance. It tests ai assistance on the formula
* dataset. The formula dataset is made of an index file (formula-dataset-index.csv) and a list of
* grist documents hosted on S3. A row in the index file, reference one column (doc_id, table_id,
* col_id) amongst theses documents and a free-text description.
*
* For each entries of the data set, the scripts load the document, requests assistance based on the
* description, and applies the suggested actions to the document. Then it compares the col values
* before and after. Finally it reverts the modification.
*
* The list of grist documents for the formula dataset is a screenshot of all templates document
* taken somewhere in the beginning of Feb 2023.
*
* The script maintains a simple cache of all request to AI to save on the ai requests.
*
* USAGE:
* OPENAI_API_KEY=<my_openai_api_key> node core/test/formula-dataset/runCompletion.js
*
* # WITH VERBOSE:
* VERBOSE=1 OPENAI_API_KEY=<my_openai_api_key> node core/test/formula-dataset/runCompletion.js
*
* # to reset cache
* rm core/test/formula-dataset/data/cache.json
*/
import { ActiveDoc } from "app/server/lib/ActiveDoc";
import { DEPS } from "app/server/lib/Assistance";
import log from 'app/server/lib/log';
import crypto from 'crypto';
import parse from 'csv-parse/lib/sync';
import fetch, {RequestInfo, RequestInit, Response} from 'node-fetch';
import * as fs from "fs";
import JSZip from "jszip";
import { isEqual, MapCache } from "lodash";
import path from 'path';
import * as os from 'os';
import { pipeline } from 'stream';
import { createDocTools } from "test/server/docTools";
import { promisify } from 'util';
const streamPipeline = promisify(pipeline);
const DATA_PATH = process.env.DATA_PATH || path.join(__dirname, 'data');
const PATH_TO_DOC = path.join(DATA_PATH, 'templates');
const PATH_TO_CSV = path.join(DATA_PATH, 'formula-dataset-index.csv');
const PATH_TO_CACHE = path.join(DATA_PATH, 'cache');
const TEMPLATE_URL = "https://grist-static.com/datasets/grist_dataset_formulai_2023_02_20.zip";
const oldFetch = DEPS.fetch;
interface FormulaRec {
table_id: string;
col_id: string;
doc_id: string;
Description: string;
}
const _stats = {
callCount: 0,
};
export async function runCompletion() {
// if template directory not exists, make it
if (!fs.existsSync(path.join(PATH_TO_DOC))) {
fs.mkdirSync(path.join(PATH_TO_DOC), {recursive: true});
// create tempdir
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'grist-templates-'));
const destPath = path.join(dir, 'template.zip');
// start downloading
console.log(
`source url: ${TEMPLATE_URL}\n` +
`destination: ${destPath}\n` +
`download...`
);
const response = await fetch(TEMPLATE_URL);
if (!response.ok) { throw new Error(`unexpected response ${response.statusText}`); }
await streamPipeline(response.body, fs.createWriteStream(destPath));
console.log('done!\n\n' +
'start extraction...');
// unzip to directory
const data = fs.readFileSync(destPath);
const zip = await JSZip.loadAsync(data);
let count = 0;
for (const filename of Object.keys(zip.files)) {
if (filename.includes('/')) { continue; }
const fileBuffer = await zip.files[filename].async('nodebuffer');
fs.writeFileSync(path.join(PATH_TO_DOC, filename), fileBuffer);
count++;
}
console.log(
`Successfully extracted ${count} template files to ${PATH_TO_DOC}`
);
}
const content = fs.readFileSync(PATH_TO_CSV, {encoding: 'utf8'});
const records = parse(content, {columns: true}) as FormulaRec[];
// let's group by doc id to save on document loading time
records.sort((a, b) => a.doc_id.localeCompare(b.doc_id));
if (!process.env.VERBOSE) {
log.transports.file.level = 'error'; // Suppress most of log output.
}
let activeDoc: ActiveDoc|undefined;
const docTools = createDocTools();
const session = docTools.createFakeSession('owners');
await docTools.before();
let successCount = 0;
console.log('Testing AI assistance: ');
try {
DEPS.fetch = fetchWithCache;
for (const rec of records) {
// load new document
if (!activeDoc || activeDoc.docName !== rec.doc_id) {
const docPath = path.join(PATH_TO_DOC, rec.doc_id + '.grist');
activeDoc = await docTools.loadLocalDoc(docPath);
await activeDoc.waitForInitialization();
}
// get values
await activeDoc.docData!.fetchTable(rec.table_id);
const expected = activeDoc.docData!.getTable(rec.table_id)!.getColValues(rec.col_id)!.slice();
// send prompt
const tableId = rec.table_id;
const colId = rec.col_id;
const description = rec.Description;
const {suggestedActions} = await activeDoc.getAssistance(session, {tableId, colId, description});
// apply modification
const {actionNum} = await activeDoc.applyUserActions(session, suggestedActions);
// get new values
const newValues = activeDoc.docData!.getTable(rec.table_id)!.getColValues(rec.col_id)!.slice();
// revert modification
const [bundle] = await activeDoc.getActions([actionNum]);
await activeDoc.applyUserActionsById(session, [bundle!.actionNum], [bundle!.actionHash!], true);
// compare values
const success = isEqual(expected, newValues);
console.log(` ${success ? 'Successfully' : 'Failed to'} complete formula ` +
`for column ${rec.table_id}.${rec.col_id} (doc=${rec.doc_id})`);
if (success) {
successCount++;
} else {
// TODO: log the difference between expected and actual, similar to what mocha does on
// failure.
// console.log('expected=', expected);
// console.log('actual=', newValues);
}
}
} finally {
await docTools.after();
log.transports.file.level = 'debug';
printStats();
DEPS.fetch = oldFetch;
console.log(
`AI Assistance completed ${successCount} successful prompt on a total of ${records.length};`
);
}
}
export function main() {
runCompletion().catch(console.error);
}
function printStats() {
console.log(`Ai assistance requests stats: ${_stats.callCount} calls`);
}
/**
* Implements a simple cache that read/write from filesystem.
*/
class JsonCache implements MapCache {
constructor() {
if (!fs.existsSync(PATH_TO_CACHE)) {
fs.mkdirSync(path.join(PATH_TO_CACHE), {recursive: true});
}
}
public get(key: string): any {
if (!this.has(key)) { return undefined; }
const content = JSON.parse(fs.readFileSync(this._path(key), 'utf8'));
return JSON.stringify(content.responseBody);
}
public has(key: string): boolean {
return fs.existsSync(this._path(key));
}
public set(key: string, value: any): JsonCache {
const content = {
requestBody: key,
responseBody: JSON.parse(value),
};
fs.writeFileSync(this._path(key), JSON.stringify(content));
return this;
}
public clear(): void {
throw new Error('not implemented');
}
public delete(_key: string): boolean {
throw new Error('not implemented');
}
private _path(key: string) {
return path.join(PATH_TO_CACHE, this._hash(key) + '.json');
}
private _hash(key: string) {
return crypto.createHash('md5').update(key).digest('hex');
}
}
/**
* Calls fetch and uses caching.
*/
const _cache = new JsonCache();
const _queue = new Map<string, any>();
async function fetchWithCache(rinfo: RequestInfo, init?: RequestInit): Promise<Response>
async function fetchWithCache(rinfo: any, init?: RequestInit): Promise<Response> {
const url: string = rinfo.url || rinfo.href || rinfo;
const hash = JSON.stringify({url, body: init?.body});
if (_cache.has(hash)) { return new Response(_cache.get(hash), {status: 200}); }
if (_queue.has(hash)) { return new Response(await _queue.get(hash), {status: 200}); }
_queue.set(hash, fetch(url, init));
const response = await _queue.get(hash);
_stats.callCount++;
if (response.status === 200) {
_cache.set(hash, await response.clone().text()); // response cannot be read twice, hence clone
}
return response;
}
// ts expect this function
fetchWithCache.isRedirect = fetch.isRedirect;

View File

@ -1,3 +1,4 @@
import {Role} from 'app/common/roles';
import {getDocWorkerMap} from 'app/gen-server/lib/DocWorkerMap'; import {getDocWorkerMap} from 'app/gen-server/lib/DocWorkerMap';
import {ActiveDoc} from 'app/server/lib/ActiveDoc'; import {ActiveDoc} from 'app/server/lib/ActiveDoc';
import {DummyAuthorizer} from 'app/server/lib/Authorizer'; import {DummyAuthorizer} from 'app/server/lib/Authorizer';
@ -82,8 +83,8 @@ export function createDocTools(options: {persistAcrossCases?: boolean,
const systemSession = makeExceptionalDocSession('system'); const systemSession = makeExceptionalDocSession('system');
return { return {
/** create a fake session for use when applying user actions to a document */ /** create a fake session for use when applying user actions to a document */
createFakeSession(): DocSession { createFakeSession(role: Role = 'editors'): DocSession {
return {client: null, authorizer: new DummyAuthorizer('editors', 'doc')} as any as DocSession; return {client: null, authorizer: new DummyAuthorizer(role, 'doc')} as any as DocSession;
}, },
/** create a throw-away, empty document for testing purposes */ /** create a throw-away, empty document for testing purposes */