adds gpt4-turbo model family and support for gpt-4-1106-preview model

2023-11-06 15:29:43 -06:00
parent b615ffa433
commit 0d5dfeccf8
11 changed files with 58 additions and 43 deletions
@@ -181,14 +181,14 @@ export const config: Config = {
  firebaseRtdbUrl: getEnvWithDefault("FIREBASE_RTDB_URL", undefined),
  firebaseKey: getEnvWithDefault("FIREBASE_KEY", undefined),
  modelRateLimit: getEnvWithDefault("MODEL_RATE_LIMIT", 4),
-  maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 0),
+  maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 16384),
  maxContextTokensAnthropic: getEnvWithDefault(
    "MAX_CONTEXT_TOKENS_ANTHROPIC",
    0
  ),
  maxOutputTokensOpenAI: getEnvWithDefault(
    ["MAX_OUTPUT_TOKENS_OPENAI", "MAX_OUTPUT_TOKENS"],
-    300
+    400
  ),
  maxOutputTokensAnthropic: getEnvWithDefault(
    ["MAX_OUTPUT_TOKENS_ANTHROPIC", "MAX_OUTPUT_TOKENS"],
@@ -198,6 +198,7 @@ export const config: Config = {
    "turbo",
    "gpt4",
    "gpt4-32k",
+    "gpt4-turbo",
    "claude",
    "bison",
    "aws-claude",
@@ -228,6 +229,7 @@ export const config: Config = {
    turbo: getEnvWithDefault("TOKEN_QUOTA_TURBO", 0),
    gpt4: getEnvWithDefault("TOKEN_QUOTA_GPT4", 0),
    "gpt4-32k": getEnvWithDefault("TOKEN_QUOTA_GPT4_32K", 0),
+    "gpt4-turbo": getEnvWithDefault("TOKEN_QUOTA_GPT4_TURBO", 0),
    claude: getEnvWithDefault("TOKEN_QUOTA_CLAUDE", 0),
    bison: getEnvWithDefault("TOKEN_QUOTA_BISON", 0),
    "aws-claude": getEnvWithDefault("TOKEN_QUOTA_AWS_CLAUDE", 0),
@@ -250,15 +252,6 @@ function generateCookieSecret() {
 export const COOKIE_SECRET = generateCookieSecret();

 export async function assertConfigIsValid() {
-  if (process.env.TURBO_ONLY === "true") {
-    startupLogger.warn(
-      "TURBO_ONLY is deprecated. Use ALLOWED_MODEL_FAMILIES=turbo instead."
-    );
-    config.allowedModelFamilies = config.allowedModelFamilies.filter(
-      (f) => !f.includes("gpt4")
-    );
-  }
-
  if (!["none", "proxy_key", "user_token"].includes(config.gatekeeper)) {
    throw new Error(
      `Invalid gatekeeper mode: ${config.gatekeeper}. Must be one of: none, proxy_key, user_token.`
@@ -192,7 +192,9 @@ function addKeyToAggregates(k: KeyPoolKey) {
        increment(modelStats, `${f}__tokens`, tokens);
      });

-      if (families.includes("gpt4-32k")) {
+      if (families.includes("gpt4-turbo")) {
+        family = "gpt4-turbo";
+      } else if (families.includes("gpt4-32k")) {
        family = "gpt4-32k";
      } else if (families.includes("gpt4")) {
        family = "gpt4";
@@ -285,12 +287,18 @@ function getOpenAIInfo() {
      const tokens = modelStats.get(`${f}__tokens`) || 0;
      const cost = getTokenCostUsd(f, tokens);

+      const active = modelStats.get(`${f}__active`) || 0;
+      const trial = modelStats.get(`${f}__trial`) || 0;
+      const revoked = modelStats.get(`${f}__revoked`) || 0;
+      const overQuota = modelStats.get(`${f}__overQuota`) || 0;
+      if (active + trial + revoked + overQuota === 0) return;
+
      info[f] = {
        usage: `${prettyTokens(tokens)} tokens${getCostString(cost)}`,
-        activeKeys: modelStats.get(`${f}__active`) || 0,
-        trialKeys: modelStats.get(`${f}__trial`) || 0,
-        revokedKeys: modelStats.get(`${f}__revoked`) || 0,
-        overQuotaKeys: modelStats.get(`${f}__overQuota`) || 0,
+        activeKeys: active,
+        trialKeys: trial,
+        revokedKeys: revoked,
+        overQuotaKeys: overQuota,
      };
    });
  } else {
@@ -65,6 +65,7 @@ const OpenAIV1ChatCompletionSchema = z.object({
  presence_penalty: z.number().optional().default(0),
  logit_bias: z.any().optional(),
  user: z.string().optional(),
+  seed: z.number().int().optional(),
 });

 const OpenAIV1TextCompletionSchema = z
@@ -42,6 +42,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  let modelMax: number;
  if (model.match(/gpt-3.5-turbo-16k/)) {
    modelMax = 16384;
+  } else if (model.match(/gpt-4-1106(-preview)?/)) {
+    modelMax = 131072;
  } else if (model.match(/gpt-3.5-turbo/)) {
    modelMax = 4096;
  } else if (model.match(/gpt-4-32k/)) {
@@ -60,8 +62,6 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    // Not sure if AWS Claude has the same context limit as Anthropic Claude.
    modelMax = 100000;
  } else {
-    // Don't really want to throw here because I don't want to have to update
-    // this ASAP every time a new model is released.
    req.log.warn({ model }, "Unknown model, using 100k token limit.");
    modelMax = 100000;
  }
@@ -41,6 +41,7 @@ function getModelsResponse() {

  // https://platform.openai.com/docs/models/overview
  const knownModels = [
+    "gpt-4-1106-preview",
    "gpt-4",
    "gpt-4-0613",
    "gpt-4-0314", // EOL 2024-06-13
@@ -211,6 +211,7 @@ function processQueue() {

  // TODO: `getLockoutPeriod` uses model names instead of model families
  // TODO: genericize this it's really ugly
+  const gpt4TurboLockout = keyPool.getLockoutPeriod("gpt-4-1106");
  const gpt432kLockout = keyPool.getLockoutPeriod("gpt-4-32k");
  const gpt4Lockout = keyPool.getLockoutPeriod("gpt-4");
  const turboLockout = keyPool.getLockoutPeriod("gpt-3.5-turbo");
@@ -219,6 +220,9 @@ function processQueue() {
  const awsClaudeLockout = keyPool.getLockoutPeriod("anthropic.claude-v2");

  const reqs: (Request | undefined)[] = [];
+  if (gpt4TurboLockout === 0) {
+    reqs.push(dequeue("gpt4-turbo"));
+  }
  if (gpt432kLockout === 0) {
    reqs.push(dequeue("gpt4-32k"));
  }
@@ -2,6 +2,7 @@ import axios, { AxiosError } from "axios";
 import type { OpenAIModelFamily } from "../../models";
 import { KeyCheckerBase } from "../key-checker-base";
 import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
+import { getOpenAIModelFamily } from "../../models";

 const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
 const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
@@ -94,29 +95,21 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
    const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
    const models = data.data;

-    const families: OpenAIModelFamily[] = [];
-    if (models.some(({ id }) => id.startsWith("gpt-3.5-turbo"))) {
-      families.push("turbo");
-    }
-
-    if (models.some(({ id }) => id.startsWith("gpt-4"))) {
-      families.push("gpt4");
-    }
-
-    if (models.some(({ id }) => id.startsWith("gpt-4-32k"))) {
-      families.push("gpt4-32k");
-    }
+    // const families: OpenAIModelFamily[] = [];
+    const families = new Set<OpenAIModelFamily>();
+    models.forEach(({ id }) => families.add(getOpenAIModelFamily(id, "turbo")));

    // We want to update the key's model families here, but we don't want to
    // update its `lastChecked` timestamp because we need to let the liveness
    // check run before we can consider the key checked.

+    const familiesArray = [...families];
    const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
    this.updateKey(key.hash, {
-      modelFamilies: families,
+      modelFamilies: familiesArray,
      lastChecked: keyFromPool.lastChecked,
    });
-    return families;
+    return familiesArray;
  }

  private async maybeCreateOrganizationClones(key: OpenAIKey) {
@@ -14,6 +14,7 @@ export type OpenAIModel =
  | "gpt-3.5-turbo-instruct"
  | "gpt-4"
  | "gpt-4-32k"
+  | "gpt-4-1106"
  | "text-embedding-ada-002";
 export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
  "gpt-3.5-turbo",
@@ -98,7 +99,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
      const newKey: OpenAIKey = {
        key: k,
        service: "openai" as const,
-        modelFamilies: ["turbo" as const, "gpt4" as const],
+        modelFamilies: [
+          "turbo" as const,
+          "gpt4" as const,
+          "gpt4-turbo" as const,
+        ],
        isTrial: false,
        isDisabled: false,
        isRevoked: false,
@@ -117,6 +122,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
        turboTokens: 0,
        gpt4Tokens: 0,
        "gpt4-32kTokens": 0,
+        "gpt4-turboTokens": 0,
      };
      this.keys.push(newKey);
    }
@@ -383,10 +389,9 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
    const now = Date.now();
    const key = this.keys.find((k) => k.hash === hash)!;

-    const currentRateLimit = Math.max(
-      key.rateLimitRequestsReset,
-      key.rateLimitTokensReset
-    ) + key.rateLimitedAt;
+    const currentRateLimit =
+      Math.max(key.rateLimitRequestsReset, key.rateLimitTokensReset) +
+      key.rateLimitedAt;
    const nextRateLimit = now + KEY_REUSE_DELAY;

    // Don't throttle if the key is already naturally rate limited.
@@ -1,6 +1,6 @@
 import { logger } from "../logger";

-export type OpenAIModelFamily = "turbo" | "gpt4" | "gpt4-32k";
+export type OpenAIModelFamily = "turbo" | "gpt4" | "gpt4-32k" | "gpt4-turbo";
 export type AnthropicModelFamily = "claude";
 export type GooglePalmModelFamily = "bison";
 export type AwsBedrockModelFamily = "aws-claude";
@@ -16,12 +16,14 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "turbo",
  "gpt4",
  "gpt4-32k",
+  "gpt4-turbo",
  "claude",
  "bison",
  "aws-claude",
 ] as const);

 export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
+  "^gpt-4-1106(-preview)?$": "gpt4-turbo",
  "^gpt-4-32k-\\d{4}$": "gpt4-32k",
  "^gpt-4-32k$": "gpt4-32k",
  "^gpt-4-\\d{4}$": "gpt4",
@@ -30,13 +32,14 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^text-embedding-ada-002$": "turbo",
 };

-export function getOpenAIModelFamily(model: string): OpenAIModelFamily {
+export function getOpenAIModelFamily(
+  model: string,
+  defaultFamily: OpenAIModelFamily = "gpt4"
+): OpenAIModelFamily {
  for (const [regex, family] of Object.entries(OPENAI_MODEL_FAMILY_MAP)) {
    if (model.match(regex)) return family;
  }
-  const stack = new Error().stack;
-  logger.warn({ model, stack }, "Unmapped model family");
-  return "gpt4";
+  return defaultFamily;
 }

 export function getClaudeModelFamily(_model: string): ModelFamily {
@@ -5,6 +5,9 @@ import { ModelFamily } from "./models";
 export function getTokenCostUsd(model: ModelFamily, tokens: number) {
  let cost = 0;
  switch (model) {
+    case "gpt4-turbo":
+      cost = 0.00001;
+      break;
    case "gpt4-32k":
      cost = 0.00006;
      break;
@@ -12,7 +15,7 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
      cost = 0.00003;
      break;
    case "turbo":
-      cost = 0.0000015;
+      cost = 0.000001;
      break;
    case "aws-claude":
    case "claude":
@@ -21,6 +21,7 @@ const INITIAL_TOKENS: Required<UserTokenCounts> = {
  turbo: 0,
  gpt4: 0,
  "gpt4-32k": 0,
+  "gpt4-turbo": 0,
  claude: 0,
  bison: 0,
  "aws-claude": 0,
@@ -362,6 +363,9 @@ async function flushUsers() {

 // TODO: use key-management/models.ts for family mapping
 function getModelFamilyForQuotaUsage(model: string): ModelFamily {
+  if (model.startsWith("gpt-4-1106")) {
+    return "gpt4-turbo";
+  }
  if (model.includes("32k")) {
    return "gpt4-32k";
  }