adds gpt4-turbo model family and support for gpt-4-1106-preview model
This commit is contained in:
@@ -65,6 +65,7 @@ const OpenAIV1ChatCompletionSchema = z.object({
|
||||
presence_penalty: z.number().optional().default(0),
|
||||
logit_bias: z.any().optional(),
|
||||
user: z.string().optional(),
|
||||
seed: z.number().int().optional(),
|
||||
});
|
||||
|
||||
const OpenAIV1TextCompletionSchema = z
|
||||
|
||||
@@ -42,6 +42,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
let modelMax: number;
|
||||
if (model.match(/gpt-3.5-turbo-16k/)) {
|
||||
modelMax = 16384;
|
||||
} else if (model.match(/gpt-4-1106(-preview)?/)) {
|
||||
modelMax = 131072;
|
||||
} else if (model.match(/gpt-3.5-turbo/)) {
|
||||
modelMax = 4096;
|
||||
} else if (model.match(/gpt-4-32k/)) {
|
||||
@@ -60,8 +62,6 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
// Not sure if AWS Claude has the same context limit as Anthropic Claude.
|
||||
modelMax = 100000;
|
||||
} else {
|
||||
// Don't really want to throw here because I don't want to have to update
|
||||
// this ASAP every time a new model is released.
|
||||
req.log.warn({ model }, "Unknown model, using 100k token limit.");
|
||||
modelMax = 100000;
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ function getModelsResponse() {
|
||||
|
||||
// https://platform.openai.com/docs/models/overview
|
||||
const knownModels = [
|
||||
"gpt-4-1106-preview",
|
||||
"gpt-4",
|
||||
"gpt-4-0613",
|
||||
"gpt-4-0314", // EOL 2024-06-13
|
||||
|
||||
@@ -211,6 +211,7 @@ function processQueue() {
|
||||
|
||||
// TODO: `getLockoutPeriod` uses model names instead of model families
|
||||
// TODO: genericize this it's really ugly
|
||||
const gpt4TurboLockout = keyPool.getLockoutPeriod("gpt-4-1106");
|
||||
const gpt432kLockout = keyPool.getLockoutPeriod("gpt-4-32k");
|
||||
const gpt4Lockout = keyPool.getLockoutPeriod("gpt-4");
|
||||
const turboLockout = keyPool.getLockoutPeriod("gpt-3.5-turbo");
|
||||
@@ -219,6 +220,9 @@ function processQueue() {
|
||||
const awsClaudeLockout = keyPool.getLockoutPeriod("anthropic.claude-v2");
|
||||
|
||||
const reqs: (Request | undefined)[] = [];
|
||||
if (gpt4TurboLockout === 0) {
|
||||
reqs.push(dequeue("gpt4-turbo"));
|
||||
}
|
||||
if (gpt432kLockout === 0) {
|
||||
reqs.push(dequeue("gpt4-32k"));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user