adds gpt4-turbo model family and support for gpt-4-1106-preview model

2023-11-06 15:29:43 -06:00
parent b615ffa433
commit 0d5dfeccf8
11 changed files with 58 additions and 43 deletions
@@ -65,6 +65,7 @@ const OpenAIV1ChatCompletionSchema = z.object({
  presence_penalty: z.number().optional().default(0),
  logit_bias: z.any().optional(),
  user: z.string().optional(),
+  seed: z.number().int().optional(),
 });

 const OpenAIV1TextCompletionSchema = z
@@ -42,6 +42,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  let modelMax: number;
  if (model.match(/gpt-3.5-turbo-16k/)) {
    modelMax = 16384;
+  } else if (model.match(/gpt-4-1106(-preview)?/)) {
+    modelMax = 131072;
  } else if (model.match(/gpt-3.5-turbo/)) {
    modelMax = 4096;
  } else if (model.match(/gpt-4-32k/)) {
@@ -60,8 +62,6 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    // Not sure if AWS Claude has the same context limit as Anthropic Claude.
    modelMax = 100000;
  } else {
-    // Don't really want to throw here because I don't want to have to update
-    // this ASAP every time a new model is released.
    req.log.warn({ model }, "Unknown model, using 100k token limit.");
    modelMax = 100000;
  }
@@ -41,6 +41,7 @@ function getModelsResponse() {

  // https://platform.openai.com/docs/models/overview
  const knownModels = [
+    "gpt-4-1106-preview",
    "gpt-4",
    "gpt-4-0613",
    "gpt-4-0314", // EOL 2024-06-13
@@ -211,6 +211,7 @@ function processQueue() {

  // TODO: `getLockoutPeriod` uses model names instead of model families
  // TODO: genericize this it's really ugly
+  const gpt4TurboLockout = keyPool.getLockoutPeriod("gpt-4-1106");
  const gpt432kLockout = keyPool.getLockoutPeriod("gpt-4-32k");
  const gpt4Lockout = keyPool.getLockoutPeriod("gpt-4");
  const turboLockout = keyPool.getLockoutPeriod("gpt-3.5-turbo");
@@ -219,6 +220,9 @@ function processQueue() {
  const awsClaudeLockout = keyPool.getLockoutPeriod("anthropic.claude-v2");

  const reqs: (Request | undefined)[] = [];
+  if (gpt4TurboLockout === 0) {
+    reqs.push(dequeue("gpt4-turbo"));
+  }
  if (gpt432kLockout === 0) {
    reqs.push(dequeue("gpt4-32k"));
  }