2023-02-08 15:46:34 +00:00
|
|
|
/**
|
|
|
|
* Module with functions used for AI formula assistance.
|
|
|
|
*/
|
|
|
|
|
2023-07-24 18:56:38 +00:00
|
|
|
import {
|
|
|
|
AssistanceContext,
|
|
|
|
AssistanceMessage,
|
|
|
|
AssistanceRequest,
|
|
|
|
AssistanceResponse
|
|
|
|
} from 'app/common/AssistancePrompts';
|
2023-02-08 15:46:34 +00:00
|
|
|
import {delay} from 'app/common/delay';
|
2023-05-08 18:15:22 +00:00
|
|
|
import {DocAction} from 'app/common/DocActions';
|
2023-07-20 14:25:26 +00:00
|
|
|
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
|
2023-07-20 17:34:25 +00:00
|
|
|
import {getDocSessionUser, OptDocSession} from 'app/server/lib/DocSession';
|
2023-02-08 15:46:34 +00:00
|
|
|
import log from 'app/server/lib/log';
|
2023-03-15 08:52:17 +00:00
|
|
|
import fetch from 'node-fetch';
|
2023-07-20 17:34:25 +00:00
|
|
|
import {createHash} from "crypto";
|
|
|
|
import {getLogMetaFromDocSession} from "./serverUtils";
|
2023-02-08 15:46:34 +00:00
|
|
|
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
// These are mocked/replaced in tests.
|
|
|
|
// fetch is also replacing in the runCompletion script to add caching.
|
|
|
|
export const DEPS = { fetch, delayTime: 1000 };
|
2023-02-08 15:46:34 +00:00
|
|
|
|
2023-05-08 18:15:22 +00:00
|
|
|
/**
|
|
|
|
* An assistant can help a user do things with their document,
|
|
|
|
* by interfacing with an external LLM endpoint.
|
|
|
|
*/
|
2023-07-20 14:25:26 +00:00
|
|
|
interface Assistant {
|
2023-07-05 15:36:45 +00:00
|
|
|
apply(session: OptDocSession, doc: AssistanceDoc, request: AssistanceRequest): Promise<AssistanceResponse>;
|
2023-05-08 18:15:22 +00:00
|
|
|
}
|
2023-03-23 18:22:28 +00:00
|
|
|
|
2023-05-08 18:15:22 +00:00
|
|
|
/**
|
|
|
|
* Document-related methods for use in the implementation of assistants.
|
|
|
|
* Somewhat ad-hoc currently.
|
|
|
|
*/
|
2023-07-20 14:25:26 +00:00
|
|
|
interface AssistanceDoc extends ActiveDoc {
|
2023-05-08 18:15:22 +00:00
|
|
|
/**
|
|
|
|
* Generate a particular prompt coded in the data engine for some reason.
|
|
|
|
* It makes python code for some tables, and starts a function body with
|
|
|
|
* the given docstring.
|
|
|
|
* Marked "V1" to suggest that it is a particular prompt and it would
|
|
|
|
* be great to try variants.
|
|
|
|
*/
|
2023-07-05 15:36:45 +00:00
|
|
|
assistanceSchemaPromptV1(session: OptDocSession, options: AssistanceSchemaPromptV1Context): Promise<string>;
|
2023-07-24 18:56:38 +00:00
|
|
|
|
2023-05-08 18:15:22 +00:00
|
|
|
/**
|
|
|
|
* Some tweaks to a formula after it has been generated.
|
|
|
|
*/
|
|
|
|
assistanceFormulaTweak(txt: string): Promise<string>;
|
2023-07-24 18:56:38 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute the existing formula and return the result along with recorded values
|
|
|
|
* of (possibly nested) attributes of `rec`.
|
|
|
|
* Used by AI assistance to fix an incorrect formula.
|
|
|
|
*/
|
|
|
|
assistanceEvaluateFormula(options: AssistanceContext): Promise<AssistanceFormulaEvaluationResult>;
|
|
|
|
}
|
|
|
|
|
|
|
|
export interface AssistanceFormulaEvaluationResult {
|
|
|
|
error: boolean; // true if an exception was raised
|
|
|
|
result: string; // repr of the return value OR exception message
|
|
|
|
|
|
|
|
// Recorded attributes of `rec` at the time of evaluation.
|
|
|
|
// Keys may be e.g. "rec.foo.bar" for nested attributes.
|
|
|
|
attributes: Record<string, string>;
|
|
|
|
|
|
|
|
formula: string; // the code that was evaluated, without special grist syntax
|
2023-05-08 18:15:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
export interface AssistanceSchemaPromptV1Context {
|
|
|
|
tableId: string,
|
|
|
|
colId: string,
|
|
|
|
docString: string,
|
|
|
|
}
|
|
|
|
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
class SwitchToLongerContext extends Error {
|
|
|
|
}
|
|
|
|
|
|
|
|
class NonRetryableError extends Error {
|
|
|
|
}
|
|
|
|
|
|
|
|
class TokensExceededFirstMessage extends NonRetryableError {
|
|
|
|
constructor() {
|
|
|
|
super(
|
|
|
|
"Sorry, there's too much information for the AI to process. " +
|
|
|
|
"You'll need to either shorten your message or delete some columns."
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class TokensExceededLaterMessage extends NonRetryableError {
|
|
|
|
constructor() {
|
|
|
|
super(
|
|
|
|
"Sorry, there's too much information for the AI to process. " +
|
|
|
|
"You'll need to either shorten your message, restart the conversation, or delete some columns."
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class QuotaExceededError extends NonRetryableError {
|
|
|
|
constructor() {
|
|
|
|
super(
|
|
|
|
"Sorry, the assistant is facing some long term capacity issues. " +
|
|
|
|
"Maybe try again tomorrow."
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class RetryableError extends Error {
|
|
|
|
constructor(message: string) {
|
|
|
|
super(
|
|
|
|
"Sorry, the assistant is unavailable right now. " +
|
|
|
|
"Try again in a few minutes. \n" +
|
|
|
|
`(${message})`
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-08 18:15:22 +00:00
|
|
|
/**
|
2023-08-18 20:14:42 +00:00
|
|
|
* A flavor of assistant for use with the OpenAI chat completion endpoint
|
|
|
|
* and tools with a compatible endpoint (e.g. llama-cpp-python).
|
2023-06-27 11:39:15 +00:00
|
|
|
* Tested primarily with gpt-3.5-turbo.
|
2023-08-18 20:14:42 +00:00
|
|
|
*
|
|
|
|
* Uses the ASSISTANT_CHAT_COMPLETION_ENDPOINT endpoint if set, else
|
|
|
|
* an OpenAI endpoint. Passes ASSISTANT_API_KEY or OPENAI_API_KEY in
|
|
|
|
* a header if set. An api key is required for the default OpenAI
|
|
|
|
* endpoint.
|
|
|
|
*
|
|
|
|
* If a model string is set in ASSISTANT_MODEL, this will be passed
|
|
|
|
* along. For the default OpenAI endpoint, a gpt-3.5-turbo variant
|
|
|
|
* will be set by default.
|
|
|
|
*
|
|
|
|
* If a request fails because of context length limitation, and the
|
|
|
|
* default OpenAI endpoint is in use, the request will be retried
|
|
|
|
* with ASSISTANT_LONGER_CONTEXT_MODEL (another gpt-3.5
|
|
|
|
* variant by default). Set this variable to "" if this behavior is
|
|
|
|
* not desired for the default OpenAI endpoint. If a custom endpoint was
|
|
|
|
* provided, this behavior will only happen if
|
|
|
|
* ASSISTANT_LONGER_CONTEXT_MODEL is explicitly set.
|
|
|
|
*
|
|
|
|
* An optional ASSISTANT_MAX_TOKENS can be specified.
|
2023-05-08 18:15:22 +00:00
|
|
|
*/
|
|
|
|
export class OpenAIAssistant implements Assistant {
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
public static DEFAULT_MODEL = "gpt-3.5-turbo-0613";
|
2023-08-18 20:14:42 +00:00
|
|
|
public static DEFAULT_LONGER_CONTEXT_MODEL = "gpt-3.5-turbo-16k-0613";
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
|
2023-08-18 20:14:42 +00:00
|
|
|
private _apiKey?: string;
|
|
|
|
private _model?: string;
|
|
|
|
private _longerContextModel?: string;
|
2023-05-08 18:15:22 +00:00
|
|
|
private _endpoint: string;
|
2023-08-18 20:14:42 +00:00
|
|
|
private _maxTokens = process.env.ASSISTANT_MAX_TOKENS ?
|
|
|
|
parseInt(process.env.ASSISTANT_MAX_TOKENS, 10) : undefined;
|
2023-05-08 18:15:22 +00:00
|
|
|
|
|
|
|
public constructor() {
|
2023-08-18 20:14:42 +00:00
|
|
|
const apiKey = process.env.ASSISTANT_API_KEY || process.env.OPENAI_API_KEY;
|
|
|
|
const endpoint = process.env.ASSISTANT_CHAT_COMPLETION_ENDPOINT;
|
|
|
|
if (!apiKey && !endpoint) {
|
|
|
|
throw new Error('Please set either OPENAI_API_KEY or ASSISTANT_CHAT_COMPLETION_ENDPOINT');
|
2023-05-08 18:15:22 +00:00
|
|
|
}
|
|
|
|
this._apiKey = apiKey;
|
2023-08-18 20:14:42 +00:00
|
|
|
this._model = process.env.ASSISTANT_MODEL;
|
|
|
|
this._longerContextModel = process.env.ASSISTANT_LONGER_CONTEXT_MODEL;
|
|
|
|
if (!endpoint) {
|
|
|
|
this._model = this._model ?? OpenAIAssistant.DEFAULT_MODEL;
|
|
|
|
this._longerContextModel = this._longerContextModel ?? OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL;
|
|
|
|
}
|
|
|
|
this._endpoint = endpoint || `https://api.openai.com/v1/chat/completions`;
|
2023-05-08 18:15:22 +00:00
|
|
|
}
|
|
|
|
|
2023-07-05 15:36:45 +00:00
|
|
|
public async apply(
|
|
|
|
optSession: OptDocSession, doc: AssistanceDoc, request: AssistanceRequest): Promise<AssistanceResponse> {
|
2023-05-08 18:15:22 +00:00
|
|
|
const messages = request.state?.messages || [];
|
2023-07-20 14:25:26 +00:00
|
|
|
const newMessages = [];
|
2023-07-24 18:56:38 +00:00
|
|
|
if (messages.length === 0) {
|
|
|
|
newMessages.push({
|
|
|
|
role: 'system',
|
|
|
|
content: 'You are a helpful assistant for a user of software called Grist. ' +
|
2023-08-18 10:48:47 +00:00
|
|
|
"Below are one or more fake Python classes representing the structure of the user's data. " +
|
|
|
|
'The function at the end needs completing. ' +
|
|
|
|
"The user will probably give a description of what they want the function (a 'formula') to return. " +
|
|
|
|
'If so, your response should include the function BODY as Python code in a markdown block. ' +
|
|
|
|
"Your response will be automatically concatenated to the code below, so you mustn't repeat any of it. " +
|
|
|
|
'You cannot change the function signature or define additional functions or classes. ' +
|
|
|
|
'It should be a pure function that performs some computation and returns a result. ' +
|
2023-07-24 18:56:38 +00:00
|
|
|
'It CANNOT perform any side effects such as adding/removing/modifying rows/columns/cells/tables/etc. ' +
|
|
|
|
'It CANNOT interact with files/databases/networks/etc. ' +
|
|
|
|
'It CANNOT display images/charts/graphs/maps/etc. ' +
|
|
|
|
'If the user asks for these things, tell them that you cannot help. ' +
|
2023-08-18 10:48:47 +00:00
|
|
|
"\n\n" +
|
2023-07-24 18:56:38 +00:00
|
|
|
'```python\n' +
|
|
|
|
await makeSchemaPromptV1(optSession, doc, request) +
|
|
|
|
'\n```',
|
|
|
|
});
|
|
|
|
}
|
|
|
|
if (request.context.evaluateCurrentFormula) {
|
|
|
|
const result = await doc.assistanceEvaluateFormula(request.context);
|
|
|
|
let message = "Evaluating this code:\n\n```python\n" + result.formula + "\n```\n\n";
|
|
|
|
if (Object.keys(result.attributes).length > 0) {
|
|
|
|
const attributes = Object.entries(result.attributes).map(([k, v]) => `${k} = ${v}`).join('\n');
|
|
|
|
message += `where:\n\n${attributes}\n\n`;
|
2023-03-15 08:52:17 +00:00
|
|
|
}
|
2023-07-24 18:56:38 +00:00
|
|
|
message += `${result.error ? 'raises an exception' : 'returns'}: ${result.result}`;
|
2023-07-20 14:25:26 +00:00
|
|
|
newMessages.push({
|
2023-07-24 18:56:38 +00:00
|
|
|
role: 'system',
|
|
|
|
content: message,
|
2023-05-08 18:15:22 +00:00
|
|
|
});
|
|
|
|
}
|
2023-07-24 18:56:38 +00:00
|
|
|
newMessages.push({
|
|
|
|
role: 'user', content: request.text,
|
|
|
|
});
|
2023-07-20 14:25:26 +00:00
|
|
|
messages.push(...newMessages);
|
|
|
|
|
|
|
|
const newMessagesStartIndex = messages.length - newMessages.length;
|
|
|
|
for (const [index, {role, content}] of newMessages.entries()) {
|
|
|
|
doc.logTelemetryEvent(optSession, 'assistantSend', {
|
|
|
|
full: {
|
|
|
|
conversationId: request.conversationId,
|
|
|
|
context: request.context,
|
|
|
|
prompt: {
|
|
|
|
index: newMessagesStartIndex + index,
|
|
|
|
role,
|
|
|
|
content,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2023-07-20 17:34:25 +00:00
|
|
|
const userIdHash = getUserHash(optSession);
|
|
|
|
const completion: string = await this._getCompletion(messages, userIdHash);
|
2023-08-18 10:48:47 +00:00
|
|
|
|
|
|
|
// It's nice to have this ready to uncomment for debugging.
|
|
|
|
// console.log(completion);
|
|
|
|
|
2023-07-31 07:21:24 +00:00
|
|
|
const response = await completionToResponse(doc, request, completion);
|
|
|
|
if (response.suggestedFormula) {
|
|
|
|
// Show the tweaked version of the suggested formula to the user (i.e. the one that's
|
|
|
|
// copied when the Apply button is clicked).
|
|
|
|
response.reply = replaceMarkdownCode(completion, response.suggestedFormula);
|
|
|
|
} else {
|
|
|
|
response.reply = completion;
|
|
|
|
}
|
2023-07-24 18:56:38 +00:00
|
|
|
response.state = {messages};
|
2023-07-20 14:25:26 +00:00
|
|
|
doc.logTelemetryEvent(optSession, 'assistantReceive', {
|
|
|
|
full: {
|
|
|
|
conversationId: request.conversationId,
|
|
|
|
context: request.context,
|
|
|
|
message: {
|
|
|
|
index: messages.length - 1,
|
|
|
|
content: completion,
|
|
|
|
},
|
2023-07-31 07:21:24 +00:00
|
|
|
suggestedFormula: response.suggestedFormula,
|
2023-07-20 14:25:26 +00:00
|
|
|
},
|
|
|
|
});
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
return response;
|
|
|
|
}
|
2023-05-08 18:15:22 +00:00
|
|
|
|
2023-07-20 17:34:25 +00:00
|
|
|
private async _fetchCompletion(messages: AssistanceMessage[], userIdHash: string, longerContext: boolean) {
|
2023-08-18 20:14:42 +00:00
|
|
|
const model = longerContext ? this._longerContextModel : this._model;
|
2023-05-08 18:15:22 +00:00
|
|
|
const apiResponse = await DEPS.fetch(
|
|
|
|
this._endpoint,
|
|
|
|
{
|
|
|
|
method: "POST",
|
|
|
|
headers: {
|
2023-08-18 20:14:42 +00:00
|
|
|
...(this._apiKey ? {
|
|
|
|
"Authorization": `Bearer ${this._apiKey}`,
|
|
|
|
} : undefined),
|
2023-05-08 18:15:22 +00:00
|
|
|
"Content-Type": "application/json",
|
|
|
|
},
|
|
|
|
body: JSON.stringify({
|
2023-07-24 18:56:38 +00:00
|
|
|
messages,
|
2023-05-08 18:15:22 +00:00
|
|
|
temperature: 0,
|
2023-08-18 20:14:42 +00:00
|
|
|
...(model ? { model } : undefined),
|
2023-07-20 17:34:25 +00:00
|
|
|
user: userIdHash,
|
2023-08-18 20:14:42 +00:00
|
|
|
...(this._maxTokens ? {
|
|
|
|
max_tokens: this._maxTokens,
|
|
|
|
} : undefined),
|
2023-05-08 18:15:22 +00:00
|
|
|
}),
|
|
|
|
},
|
|
|
|
);
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
const resultText = await apiResponse.text();
|
|
|
|
const result = JSON.parse(resultText);
|
|
|
|
const errorCode = result.error?.code;
|
|
|
|
if (errorCode === "context_length_exceeded" || result.choices?.[0].finish_reason === "length") {
|
2023-08-18 20:14:42 +00:00
|
|
|
if (!longerContext && this._longerContextModel) {
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
log.info("Switching to longer context model...");
|
|
|
|
throw new SwitchToLongerContext();
|
|
|
|
} else if (messages.length <= 2) {
|
|
|
|
throw new TokensExceededFirstMessage();
|
|
|
|
} else {
|
|
|
|
throw new TokensExceededLaterMessage();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (errorCode === "insufficient_quota") {
|
|
|
|
log.error("OpenAI billing quota exceeded!!!");
|
|
|
|
throw new QuotaExceededError();
|
|
|
|
}
|
2023-05-08 18:15:22 +00:00
|
|
|
if (apiResponse.status !== 200) {
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
throw new Error(`OpenAI API returned status ${apiResponse.status}: ${resultText}`);
|
2023-05-08 18:15:22 +00:00
|
|
|
}
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2023-07-20 17:34:25 +00:00
|
|
|
private async _fetchCompletionWithRetries(
|
|
|
|
messages: AssistanceMessage[], userIdHash: string, longerContext: boolean
|
|
|
|
): Promise<any> {
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
const maxAttempts = 3;
|
|
|
|
for (let attempt = 1; ; attempt++) {
|
|
|
|
try {
|
2023-07-20 17:34:25 +00:00
|
|
|
return await this._fetchCompletion(messages, userIdHash, longerContext);
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
} catch (e) {
|
|
|
|
if (e instanceof SwitchToLongerContext) {
|
2023-07-20 17:34:25 +00:00
|
|
|
return await this._fetchCompletionWithRetries(messages, userIdHash, true);
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
} else if (e instanceof NonRetryableError) {
|
|
|
|
throw e;
|
|
|
|
} else if (attempt === maxAttempts) {
|
|
|
|
throw new RetryableError(e.toString());
|
|
|
|
}
|
|
|
|
log.warn(`Waiting and then retrying after error: ${e}`);
|
|
|
|
await delay(DEPS.delayTime);
|
|
|
|
}
|
2023-03-15 08:52:17 +00:00
|
|
|
}
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
}
|
2023-05-08 18:15:22 +00:00
|
|
|
|
2023-07-20 17:34:25 +00:00
|
|
|
private async _getCompletion(messages: AssistanceMessage[], userIdHash: string) {
|
|
|
|
const result = await this._fetchCompletionWithRetries(messages, userIdHash, false);
|
2023-07-24 18:56:38 +00:00
|
|
|
const {message} = result.choices[0];
|
|
|
|
messages.push(message);
|
|
|
|
return message.content;
|
2023-02-08 15:46:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-08 18:15:22 +00:00
|
|
|
export class HuggingFaceAssistant implements Assistant {
|
|
|
|
private _apiKey: string;
|
|
|
|
private _completionUrl: string;
|
|
|
|
|
|
|
|
public constructor() {
|
|
|
|
const apiKey = process.env.HUGGINGFACE_API_KEY;
|
|
|
|
if (!apiKey) {
|
|
|
|
throw new Error('HUGGINGFACE_API_KEY not set');
|
|
|
|
}
|
|
|
|
this._apiKey = apiKey;
|
|
|
|
// COMPLETION_MODEL values I've tried:
|
|
|
|
// - codeparrot/codeparrot
|
|
|
|
// - NinedayWang/PolyCoder-2.7B
|
|
|
|
// - NovelAI/genji-python-6B
|
|
|
|
let completionUrl = process.env.COMPLETION_URL;
|
|
|
|
if (!completionUrl) {
|
|
|
|
if (process.env.COMPLETION_MODEL) {
|
|
|
|
completionUrl = `https://api-inference.huggingface.co/models/${process.env.COMPLETION_MODEL}`;
|
|
|
|
} else {
|
|
|
|
completionUrl = 'https://api-inference.huggingface.co/models/NovelAI/genji-python-6B';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
this._completionUrl = completionUrl;
|
2023-02-08 15:46:34 +00:00
|
|
|
|
|
|
|
}
|
2023-05-08 18:15:22 +00:00
|
|
|
|
2023-07-05 15:36:45 +00:00
|
|
|
public async apply(
|
|
|
|
optSession: OptDocSession, doc: AssistanceDoc, request: AssistanceRequest): Promise<AssistanceResponse> {
|
2023-05-08 18:15:22 +00:00
|
|
|
if (request.state) {
|
|
|
|
throw new Error("HuggingFaceAssistant does not support state");
|
|
|
|
}
|
2023-07-05 15:36:45 +00:00
|
|
|
const prompt = await makeSchemaPromptV1(optSession, doc, request);
|
2023-05-08 18:15:22 +00:00
|
|
|
const response = await DEPS.fetch(
|
|
|
|
this._completionUrl,
|
|
|
|
{
|
|
|
|
method: "POST",
|
|
|
|
headers: {
|
|
|
|
"Authorization": `Bearer ${this._apiKey}`,
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
},
|
|
|
|
body: JSON.stringify({
|
|
|
|
inputs: prompt,
|
|
|
|
parameters: {
|
|
|
|
return_full_text: false,
|
|
|
|
max_new_tokens: 50,
|
|
|
|
},
|
|
|
|
}),
|
2023-02-08 15:46:34 +00:00
|
|
|
},
|
2023-05-08 18:15:22 +00:00
|
|
|
);
|
|
|
|
if (response.status === 503) {
|
|
|
|
log.error(`Sleeping for 10s - HuggingFace API returned ${response.status}: ${await response.text()}`);
|
|
|
|
await delay(10000);
|
|
|
|
}
|
|
|
|
if (response.status !== 200) {
|
|
|
|
const text = await response.text();
|
|
|
|
log.error(`HuggingFace API returned ${response.status}: ${text}`);
|
|
|
|
throw new Error(`HuggingFace API returned status ${response.status}: ${text}`);
|
|
|
|
}
|
|
|
|
const result = await response.json();
|
|
|
|
let completion = result[0].generated_text;
|
|
|
|
completion = completion.split('\n\n')[0];
|
|
|
|
return completionToResponse(doc, request, completion);
|
2023-02-08 15:46:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-02 11:25:14 +00:00
|
|
|
/**
|
|
|
|
* Test assistant that mimics ChatGPT and just returns the input.
|
|
|
|
*/
|
2023-07-20 14:25:26 +00:00
|
|
|
class EchoAssistant implements Assistant {
|
2023-07-05 15:36:45 +00:00
|
|
|
public async apply(sess: OptDocSession, doc: AssistanceDoc, request: AssistanceRequest): Promise<AssistanceResponse> {
|
|
|
|
if (request.text === "ERROR") {
|
|
|
|
throw new Error(`ERROR`);
|
|
|
|
}
|
2023-06-02 11:25:14 +00:00
|
|
|
const messages = request.state?.messages || [];
|
|
|
|
if (messages.length === 0) {
|
|
|
|
messages.push({
|
|
|
|
role: 'system',
|
|
|
|
content: ''
|
|
|
|
});
|
|
|
|
}
|
2023-07-24 20:05:50 +00:00
|
|
|
messages.push({
|
|
|
|
role: 'user', content: request.text,
|
|
|
|
});
|
2023-06-16 11:23:35 +00:00
|
|
|
const completion = request.text;
|
2023-06-02 11:25:14 +00:00
|
|
|
const history = { messages };
|
|
|
|
history.messages.push({
|
|
|
|
role: 'assistant',
|
|
|
|
content: completion,
|
|
|
|
});
|
2023-06-16 11:23:35 +00:00
|
|
|
const response = await completionToResponse(doc, request, completion, completion);
|
2023-06-02 11:25:14 +00:00
|
|
|
response.state = history;
|
|
|
|
return response;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-08 18:15:22 +00:00
|
|
|
/**
|
|
|
|
* Instantiate an assistant, based on environment variables.
|
|
|
|
*/
|
2023-07-05 15:36:45 +00:00
|
|
|
export function getAssistant() {
|
2023-06-02 11:25:14 +00:00
|
|
|
if (process.env.OPENAI_API_KEY === 'test') {
|
|
|
|
return new EchoAssistant();
|
|
|
|
}
|
2023-08-18 20:14:42 +00:00
|
|
|
if (process.env.OPENAI_API_KEY || process.env.ASSISTANT_CHAT_COMPLETION_ENDPOINT) {
|
2023-05-08 18:15:22 +00:00
|
|
|
return new OpenAIAssistant();
|
2023-02-08 15:46:34 +00:00
|
|
|
}
|
2023-08-18 20:14:42 +00:00
|
|
|
throw new Error('Please set OPENAI_API_KEY or ASSISTANT_CHAT_COMPLETION_ENDPOINT');
|
2023-05-08 18:15:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
* Service a request for assistance.
|
2023-05-08 18:15:22 +00:00
|
|
|
*/
|
2023-07-05 15:36:45 +00:00
|
|
|
export async function sendForCompletion(
|
|
|
|
optSession: OptDocSession,
|
|
|
|
doc: AssistanceDoc,
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
request: AssistanceRequest,
|
|
|
|
): Promise<AssistanceResponse> {
|
2023-05-08 18:15:22 +00:00
|
|
|
const assistant = getAssistant();
|
(core) Improved error messages, retries, and handling of token limits in AI assistant
Summary:
In a nutshell:
- More specific and helpful error messages are shown to the user
- API requests are only retried when needed
- The system deals with reaching the maximum token limit better, especially by switching to a model with a bigger limit
In more detail:
- `COMPLETION_MODEL` configuration has been removed. By default `gpt-3.5-turbo-0613` is used which accepts 4k tokens. If that's not enough, `gpt-3.5-turbo-16k-0613` is used instead.
- Switching to the bigger model happens when either the prompt is too long by itself (the API immediately returns an error code) or the model reaches the 4k limit itself in the process of generating a response and thus returns an incomplete response. The latter case is made possible by removing the `max_tokens: 1500` in the request, which was very generous and would have lead to switching to the more expensive model more often than needed. The downside is that the user has to wait a bit longer for the response.
- If the bigger 16k token limit is also exceeded, the assistant immediately responds (instead of retrying as before) with an error message including suggestions. The suggestions include restarting the conversation if and only if the user has sent multiple messages.
- If a request fails because Grist has reached its OpenAI monthly billing quota, the assistant immediately responds (instead of retrying as before) with an error message suggesting that the user try again tomorrow.
- If a request fails for some other reason, the assistant retries, and if all attempts fail then the user is told to try again in a few minutes and is shown the exact error message, including the API response if there is one.
- Retrying only happens when an API request fails, whereas previously the system also retried errors from a much bigger scope which included calls to the sandbox. The downside is that the hugging face assistant no longer retries, although that code is currently disabled anyway.
- The assistant no longer waits an additional second after the final retry attempt fails.
Test Plan: Added a new server test file with several unit tests using faked OpenAI responses, including the happy path which wasn't really tested before.
Reviewers: dsagal
Reviewed By: dsagal
Subscribers: dsagal
Differential Revision: https://phab.getgrist.com/D3955
2023-07-18 10:03:31 +00:00
|
|
|
return await assistant.apply(optSession, doc, request);
|
2023-05-08 18:15:22 +00:00
|
|
|
}
|
2023-03-15 08:52:17 +00:00
|
|
|
|
2023-07-31 07:21:24 +00:00
|
|
|
/**
|
|
|
|
* Returns a new Markdown string with the contents of its first multi-line code block
|
|
|
|
* replaced with `replaceValue`.
|
|
|
|
*/
|
|
|
|
export function replaceMarkdownCode(markdown: string, replaceValue: string) {
|
|
|
|
return markdown.replace(/```\w*\n(.*)```/s, '```python\n' + replaceValue + '\n```');
|
|
|
|
}
|
|
|
|
|
2023-07-05 15:36:45 +00:00
|
|
|
async function makeSchemaPromptV1(session: OptDocSession, doc: AssistanceDoc, request: AssistanceRequest) {
|
2023-05-08 18:15:22 +00:00
|
|
|
if (request.context.type !== 'formula') {
|
|
|
|
throw new Error('makeSchemaPromptV1 only works for formulas');
|
|
|
|
}
|
2023-07-05 15:36:45 +00:00
|
|
|
return doc.assistanceSchemaPromptV1(session, {
|
2023-05-08 18:15:22 +00:00
|
|
|
tableId: request.context.tableId,
|
|
|
|
colId: request.context.colId,
|
|
|
|
docString: request.text,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2023-07-31 07:21:24 +00:00
|
|
|
async function completionToResponse(
|
|
|
|
doc: AssistanceDoc,
|
|
|
|
request: AssistanceRequest,
|
|
|
|
completion: string,
|
|
|
|
reply?: string
|
|
|
|
): Promise<AssistanceResponse> {
|
2023-05-08 18:15:22 +00:00
|
|
|
if (request.context.type !== 'formula') {
|
|
|
|
throw new Error('completionToResponse only works for formulas');
|
|
|
|
}
|
2023-07-31 07:21:24 +00:00
|
|
|
const suggestedFormula = await doc.assistanceFormulaTweak(completion) || undefined;
|
2023-05-08 18:15:22 +00:00
|
|
|
// Suggest an action only if the completion is non-empty (that is,
|
|
|
|
// it actually looked like code).
|
2023-07-31 07:21:24 +00:00
|
|
|
const suggestedActions: DocAction[] = suggestedFormula ? [[
|
2023-05-08 18:15:22 +00:00
|
|
|
"ModifyColumn",
|
|
|
|
request.context.tableId,
|
|
|
|
request.context.colId, {
|
2023-07-31 07:21:24 +00:00
|
|
|
formula: suggestedFormula,
|
2023-05-08 18:15:22 +00:00
|
|
|
}
|
|
|
|
]] : [];
|
|
|
|
return {
|
|
|
|
suggestedActions,
|
2023-07-31 07:21:24 +00:00
|
|
|
suggestedFormula,
|
2023-05-08 18:15:22 +00:00
|
|
|
reply,
|
|
|
|
};
|
2023-02-08 15:46:34 +00:00
|
|
|
}
|
2023-07-20 17:34:25 +00:00
|
|
|
|
|
|
|
function getUserHash(session: OptDocSession): string {
|
|
|
|
const user = getDocSessionUser(session);
|
|
|
|
// Make it a bit harder to guess the user ID.
|
|
|
|
const salt = "7a8sb6987asdb678asd687sad6boas7f8b6aso7fd";
|
|
|
|
const hashSource = `${user?.id} ${user?.ref} ${salt}`;
|
|
|
|
const hash = createHash('sha256').update(hashSource).digest('base64');
|
|
|
|
// So that if we get feedback about a user ID hash, we can
|
|
|
|
// search for the hash in the logs to find the original user ID.
|
|
|
|
log.rawInfo("getUserHash", {...getLogMetaFromDocSession(session), userRef: user?.ref, hash});
|
|
|
|
return hash;
|
|
|
|
}
|