switch default LLM from a model that is going away (#1202)

* switch default LLM from a model that is going away

If an api key is provided, Grist can use an LLM as an assistant
for writing formulas. The LLM can be self-hosted or an external
service. The default external service is OpenAI. This commit
freshens the default model used, if this feature is enabled,
since the existing one is going away. Benchmarking suggests the
results are generally better, though not dramatically so.

The feature of falling back on a longer context model is no longer
as important, but is retained since it could be useful for self-hosters.

* update long context model tests
This commit is contained in:
Paul Fitzpatrick 2024-09-13 14:56:42 -04:00 committed by GitHub
parent 43f7fbeff4
commit a45a7f9fda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 15 deletions

View File

@ -148,8 +148,8 @@ class RetryableError extends Error {
* An optional ASSISTANT_MAX_TOKENS can be specified. * An optional ASSISTANT_MAX_TOKENS can be specified.
*/ */
export class OpenAIAssistant implements Assistant { export class OpenAIAssistant implements Assistant {
public static DEFAULT_MODEL = "gpt-3.5-turbo-0613"; public static DEFAULT_MODEL = "gpt-4o-2024-08-06";
public static DEFAULT_LONGER_CONTEXT_MODEL = "gpt-3.5-turbo-16k-0613"; public static DEFAULT_LONGER_CONTEXT_MODEL = "";
private _apiKey?: string; private _apiKey?: string;
private _model?: string; private _model?: string;

View File

@ -1,11 +1,12 @@
import {createDocTools} from "test/server/docTools"; import {AssistanceState} from 'app/common/AssistancePrompts';
import {ActiveDoc} from "app/server/lib/ActiveDoc"; import {ActiveDoc} from "app/server/lib/ActiveDoc";
import {DEPS, OpenAIAssistant, sendForCompletion} from "app/server/lib/Assistance"; import {DEPS, OpenAIAssistant, sendForCompletion} from 'app/server/lib/Assistance';
import {DocSession} from 'app/server/lib/DocSession';
import {assert} from 'chai'; import {assert} from 'chai';
import * as sinon from 'sinon';
import {Response} from 'node-fetch'; import {Response} from 'node-fetch';
import {DocSession} from "app/server/lib/DocSession"; import * as sinon from 'sinon';
import {AssistanceState} from "app/common/AssistancePrompts"; import {createDocTools} from 'test/server/docTools';
import {EnvironmentSnapshot} from 'test/server/testUtils';
// For some reason, assert.isRejected is not getting defined, // For some reason, assert.isRejected is not getting defined,
// though test/chai-as-promised.js should be taking care of this. // though test/chai-as-promised.js should be taking care of this.
@ -14,6 +15,12 @@ const chai = require('chai');
const chaiAsPromised = require('chai-as-promised'); const chaiAsPromised = require('chai-as-promised');
chai.use(chaiAsPromised); chai.use(chaiAsPromised);
/**
* We no longer use a longer context model by default, but we still
* test this configuration.
*/
const LONGER_CONTEXT_MODEL_FOR_TEST = "fake";
describe('Assistance', function () { describe('Assistance', function () {
this.timeout(10000); this.timeout(10000);
@ -22,8 +29,11 @@ describe('Assistance', function () {
const table2Id = "Table2"; const table2Id = "Table2";
let session: DocSession; let session: DocSession;
let doc: ActiveDoc; let doc: ActiveDoc;
let oldEnv: EnvironmentSnapshot;
before(async () => { before(async () => {
oldEnv = new EnvironmentSnapshot();
process.env.OPENAI_API_KEY = "fake"; process.env.OPENAI_API_KEY = "fake";
process.env.ASSISTANT_LONGER_CONTEXT_MODEL = LONGER_CONTEXT_MODEL_FOR_TEST;
session = docTools.createFakeSession(); session = docTools.createFakeSession();
doc = await docTools.createDoc('test.grist'); doc = await docTools.createDoc('test.grist');
await doc.applyUserActions(session, [ await doc.applyUserActions(session, [
@ -31,6 +41,9 @@ describe('Assistance', function () {
["AddTable", table2Id, [{id: "A"}, {id: "B"}, {id: "C"}]], ["AddTable", table2Id, [{id: "A"}, {id: "B"}, {id: "C"}]],
]); ]);
}); });
after(async function () {
oldEnv.restore();
});
const colId = "C"; const colId = "C";
const userMessageContent = "Sum of A and B"; const userMessageContent = "Sum of A and B";
@ -204,8 +217,8 @@ describe('Assistance', function () {
); );
checkModels([ checkModels([
OpenAIAssistant.DEFAULT_MODEL, OpenAIAssistant.DEFAULT_MODEL,
OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL, LONGER_CONTEXT_MODEL_FOR_TEST,
OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL, LONGER_CONTEXT_MODEL_FOR_TEST,
]); ]);
}); });
@ -254,8 +267,8 @@ describe('Assistance', function () {
); );
checkModels([ checkModels([
OpenAIAssistant.DEFAULT_MODEL, OpenAIAssistant.DEFAULT_MODEL,
OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL, LONGER_CONTEXT_MODEL_FOR_TEST,
OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL, LONGER_CONTEXT_MODEL_FOR_TEST,
]); ]);
}); });
@ -278,8 +291,8 @@ describe('Assistance', function () {
); );
checkModels([ checkModels([
OpenAIAssistant.DEFAULT_MODEL, OpenAIAssistant.DEFAULT_MODEL,
OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL, LONGER_CONTEXT_MODEL_FOR_TEST,
OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL, LONGER_CONTEXT_MODEL_FOR_TEST,
]); ]);
}); });
@ -310,8 +323,8 @@ describe('Assistance', function () {
const result = await checkSendForCompletion(); const result = await checkSendForCompletion();
checkModels([ checkModels([
OpenAIAssistant.DEFAULT_MODEL, OpenAIAssistant.DEFAULT_MODEL,
OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL, LONGER_CONTEXT_MODEL_FOR_TEST,
OpenAIAssistant.DEFAULT_LONGER_CONTEXT_MODEL, LONGER_CONTEXT_MODEL_FOR_TEST,
]); ]);
assert.deepEqual(result.suggestedActions, [ assert.deepEqual(result.suggestedActions, [
["ModifyColumn", table1Id, colId, {formula: "123"}] ["ModifyColumn", table1Id, colId, {formula: "123"}]