(core) Porting the AI evaluation script

Summary:
Porting script that run an evaluation against our formula dataset.

To test you need an openai key (see here: https://platform.openai.com/)
or hugging face (it should work as well), then checkout the branch and run

`OPENAI_API_KEY=<my_openai_api_key> node core/test/formula-dataset/runCompletion.js`

Test Plan:
Needs manually testing: so far there is no plan to make it part of CI.

The current score is somewhere around 34 successful prompts over a total of 47.

Reviewers: paulfitz

Reviewed By: paulfitz

Subscribers: jarek

Differential Revision: https://phab.getgrist.com/D3816
This commit is contained in:
Cyprien P
2023-03-15 09:52:17 +01:00
parent 2b2e19c5b5
commit 1ff93f89c2
8 changed files with 377 additions and 37 deletions

View File

@@ -951,7 +951,7 @@ export function assertIsDefined<T>(name: string, value: T): asserts value is Non
* Calls function `fn`, passes any thrown errors to function `recover`, and finally calls `fn`
* once more if `recover` doesn't throw.
*/
export async function retryOnce<T>(fn: () => Promise<T>, recover: (e: unknown) => Promise<void>): Promise<T> {
export async function retryOnce<T>(fn: () => Promise<T>, recover: (e: unknown) => Promise<void>): Promise<T> {
try {
return await fn();
} catch (e) {
@@ -964,7 +964,7 @@ export function assertIsDefined<T>(name: string, value: T): asserts value is Non
* Checks if value is 'empty' (like null, undefined, empty string, empty array/set/map, empty object).
* Values like 0, true, false are not empty.
*/
export function notSet(value: any) {
export function notSet(value: any) {
return value === undefined || value === null || value === ''
|| (Array.isArray(value) && !value.length)
|| (typeof value === 'object' && !Object.keys(value).length)

View File

@@ -34,7 +34,7 @@ import {
TransformRule
} from 'app/common/ActiveDocAPI';
import {ApiError} from 'app/common/ApiError';
import {mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate';
import {asyncOnce, mapGetOrSet, MapWithTTL} from 'app/common/AsyncCreate';
import {AttachmentColumns, gatherAttachmentIds, getAttachmentColumns} from 'app/common/AttachmentColumns';
import {
BulkAddRecord,
@@ -230,6 +230,11 @@ export class ActiveDoc extends EventEmitter {
private _inactivityTimer = new InactivityTimer(() => this.shutdown(), Deps.ACTIVEDOC_TIMEOUT * 1000);
private _recoveryMode: boolean = false;
private _shuttingDown: boolean = false;
private _afterShutdownCallback?: () => Promise<void>;
// catch & report error so that asyncOnce does not get cleared.
private _doShutdown = asyncOnce(
() => this._doShutdownImpl().catch((e) => log.error('Uncaught shutdown error', e))
);
/**
* In cases where large numbers of documents are restarted simultaneously
@@ -493,6 +498,14 @@ export class ActiveDoc extends EventEmitter {
public async shutdown(options: {
afterShutdown?: () => Promise<void>
} = {}): Promise<void> {
if (options.afterShutdown) {
this._afterShutdownCallback = options.afterShutdown;
}
await this._doShutdown();
}
private async _doShutdownImpl(): Promise<void> {
const docSession = makeExceptionalDocSession('system');
this._log.debug(docSession, "shutdown starting");
try {
@@ -576,7 +589,7 @@ export class ActiveDoc extends EventEmitter {
} catch (err) {
this._log.error(docSession, "failed to shutdown some resources", err);
}
await options.afterShutdown?.();
await this._afterShutdownCallback?.();
} finally {
this._docManager.removeActiveDoc(this);
}

View File

@@ -4,16 +4,25 @@
import {delay} from 'app/common/delay';
import log from 'app/server/lib/log';
import fetch, { Response as FetchResponse} from 'node-fetch';
import fetch from 'node-fetch';
export const DEPS = { fetch };
export async function sendForCompletion(prompt: string): Promise<string> {
let completion: string|null = null;
if (process.env.OPENAI_API_KEY) {
completion = await sendForCompletionOpenAI(prompt);
}
if (process.env.HUGGINGFACE_API_KEY) {
completion = await sendForCompletionHuggingFace(prompt);
let retries: number = 0;
while(retries++ < 3) {
try {
if (process.env.OPENAI_API_KEY) {
completion = await sendForCompletionOpenAI(prompt);
}
if (process.env.HUGGINGFACE_API_KEY) {
completion = await sendForCompletionHuggingFace(prompt);
}
break;
} catch(e) {
await delay(1000);
}
}
if (completion === null) {
throw new Error("Please set OPENAI_API_KEY or HUGGINGFACE_API_KEY (and optionally COMPLETION_MODEL)");
@@ -29,7 +38,7 @@ async function sendForCompletionOpenAI(prompt: string) {
if (!apiKey) {
throw new Error("OPENAI_API_KEY not set");
}
const response = await fetch(
const response = await DEPS.fetch(
"https://api.openai.com/v1/completions",
{
method: "POST",
@@ -73,31 +82,27 @@ async function sendForCompletionHuggingFace(prompt: string) {
completionUrl = 'https://api-inference.huggingface.co/models/NovelAI/genji-python-6B';
}
}
let retries: number = 0;
let response!: FetchResponse;
while (retries++ < 3) {
response = await fetch(
completionUrl,
{
method: "POST",
headers: {
"Authorization": `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
inputs: prompt,
parameters: {
return_full_text: false,
max_new_tokens: 50,
},
}),
const response = await DEPS.fetch(
completionUrl,
{
method: "POST",
headers: {
"Authorization": `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
);
if (response.status === 503) {
log.error(`Sleeping for 10s - HuggingFace API returned ${response.status}: ${await response.text()}`);
await delay(10000);
continue;
}
body: JSON.stringify({
inputs: prompt,
parameters: {
return_full_text: false,
max_new_tokens: 50,
},
}),
},
);
if (response.status === 503) {
log.error(`Sleeping for 10s - HuggingFace API returned ${response.status}: ${await response.text()}`);
await delay(10000);
}
if (response.status !== 200) {
const text = await response.text();