adds gpt4-turbo model family and support for gpt-4-1106-preview model

This commit is contained in:
nai-degen
2023-11-06 15:29:43 -06:00
parent b615ffa433
commit 0d5dfeccf8
11 changed files with 58 additions and 43 deletions
@@ -65,6 +65,7 @@ const OpenAIV1ChatCompletionSchema = z.object({
presence_penalty: z.number().optional().default(0),
logit_bias: z.any().optional(),
user: z.string().optional(),
seed: z.number().int().optional(),
});
const OpenAIV1TextCompletionSchema = z
@@ -42,6 +42,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
let modelMax: number;
if (model.match(/gpt-3.5-turbo-16k/)) {
modelMax = 16384;
} else if (model.match(/gpt-4-1106(-preview)?/)) {
modelMax = 131072;
} else if (model.match(/gpt-3.5-turbo/)) {
modelMax = 4096;
} else if (model.match(/gpt-4-32k/)) {
@@ -60,8 +62,6 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
// Not sure if AWS Claude has the same context limit as Anthropic Claude.
modelMax = 100000;
} else {
// Don't really want to throw here because I don't want to have to update
// this ASAP every time a new model is released.
req.log.warn({ model }, "Unknown model, using 100k token limit.");
modelMax = 100000;
}
+1
View File
@@ -41,6 +41,7 @@ function getModelsResponse() {
// https://platform.openai.com/docs/models/overview
const knownModels = [
"gpt-4-1106-preview",
"gpt-4",
"gpt-4-0613",
"gpt-4-0314", // EOL 2024-06-13
+4
View File
@@ -211,6 +211,7 @@ function processQueue() {
// TODO: `getLockoutPeriod` uses model names instead of model families
// TODO: genericize this it's really ugly
const gpt4TurboLockout = keyPool.getLockoutPeriod("gpt-4-1106");
const gpt432kLockout = keyPool.getLockoutPeriod("gpt-4-32k");
const gpt4Lockout = keyPool.getLockoutPeriod("gpt-4");
const turboLockout = keyPool.getLockoutPeriod("gpt-3.5-turbo");
@@ -219,6 +220,9 @@ function processQueue() {
const awsClaudeLockout = keyPool.getLockoutPeriod("anthropic.claude-v2");
const reqs: (Request | undefined)[] = [];
if (gpt4TurboLockout === 0) {
reqs.push(dequeue("gpt4-turbo"));
}
if (gpt432kLockout === 0) {
reqs.push(dequeue("gpt4-32k"));
}