adds gpt4-turbo model family and support for gpt-4-1106-preview model
This commit is contained in:
+4
-11
@@ -181,14 +181,14 @@ export const config: Config = {
|
||||
firebaseRtdbUrl: getEnvWithDefault("FIREBASE_RTDB_URL", undefined),
|
||||
firebaseKey: getEnvWithDefault("FIREBASE_KEY", undefined),
|
||||
modelRateLimit: getEnvWithDefault("MODEL_RATE_LIMIT", 4),
|
||||
maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 0),
|
||||
maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 16384),
|
||||
maxContextTokensAnthropic: getEnvWithDefault(
|
||||
"MAX_CONTEXT_TOKENS_ANTHROPIC",
|
||||
0
|
||||
),
|
||||
maxOutputTokensOpenAI: getEnvWithDefault(
|
||||
["MAX_OUTPUT_TOKENS_OPENAI", "MAX_OUTPUT_TOKENS"],
|
||||
300
|
||||
400
|
||||
),
|
||||
maxOutputTokensAnthropic: getEnvWithDefault(
|
||||
["MAX_OUTPUT_TOKENS_ANTHROPIC", "MAX_OUTPUT_TOKENS"],
|
||||
@@ -198,6 +198,7 @@ export const config: Config = {
|
||||
"turbo",
|
||||
"gpt4",
|
||||
"gpt4-32k",
|
||||
"gpt4-turbo",
|
||||
"claude",
|
||||
"bison",
|
||||
"aws-claude",
|
||||
@@ -228,6 +229,7 @@ export const config: Config = {
|
||||
turbo: getEnvWithDefault("TOKEN_QUOTA_TURBO", 0),
|
||||
gpt4: getEnvWithDefault("TOKEN_QUOTA_GPT4", 0),
|
||||
"gpt4-32k": getEnvWithDefault("TOKEN_QUOTA_GPT4_32K", 0),
|
||||
"gpt4-turbo": getEnvWithDefault("TOKEN_QUOTA_GPT4_TURBO", 0),
|
||||
claude: getEnvWithDefault("TOKEN_QUOTA_CLAUDE", 0),
|
||||
bison: getEnvWithDefault("TOKEN_QUOTA_BISON", 0),
|
||||
"aws-claude": getEnvWithDefault("TOKEN_QUOTA_AWS_CLAUDE", 0),
|
||||
@@ -250,15 +252,6 @@ function generateCookieSecret() {
|
||||
export const COOKIE_SECRET = generateCookieSecret();
|
||||
|
||||
export async function assertConfigIsValid() {
|
||||
if (process.env.TURBO_ONLY === "true") {
|
||||
startupLogger.warn(
|
||||
"TURBO_ONLY is deprecated. Use ALLOWED_MODEL_FAMILIES=turbo instead."
|
||||
);
|
||||
config.allowedModelFamilies = config.allowedModelFamilies.filter(
|
||||
(f) => !f.includes("gpt4")
|
||||
);
|
||||
}
|
||||
|
||||
if (!["none", "proxy_key", "user_token"].includes(config.gatekeeper)) {
|
||||
throw new Error(
|
||||
`Invalid gatekeeper mode: ${config.gatekeeper}. Must be one of: none, proxy_key, user_token.`
|
||||
|
||||
+13
-5
@@ -192,7 +192,9 @@ function addKeyToAggregates(k: KeyPoolKey) {
|
||||
increment(modelStats, `${f}__tokens`, tokens);
|
||||
});
|
||||
|
||||
if (families.includes("gpt4-32k")) {
|
||||
if (families.includes("gpt4-turbo")) {
|
||||
family = "gpt4-turbo";
|
||||
} else if (families.includes("gpt4-32k")) {
|
||||
family = "gpt4-32k";
|
||||
} else if (families.includes("gpt4")) {
|
||||
family = "gpt4";
|
||||
@@ -285,12 +287,18 @@ function getOpenAIInfo() {
|
||||
const tokens = modelStats.get(`${f}__tokens`) || 0;
|
||||
const cost = getTokenCostUsd(f, tokens);
|
||||
|
||||
const active = modelStats.get(`${f}__active`) || 0;
|
||||
const trial = modelStats.get(`${f}__trial`) || 0;
|
||||
const revoked = modelStats.get(`${f}__revoked`) || 0;
|
||||
const overQuota = modelStats.get(`${f}__overQuota`) || 0;
|
||||
if (active + trial + revoked + overQuota === 0) return;
|
||||
|
||||
info[f] = {
|
||||
usage: `${prettyTokens(tokens)} tokens${getCostString(cost)}`,
|
||||
activeKeys: modelStats.get(`${f}__active`) || 0,
|
||||
trialKeys: modelStats.get(`${f}__trial`) || 0,
|
||||
revokedKeys: modelStats.get(`${f}__revoked`) || 0,
|
||||
overQuotaKeys: modelStats.get(`${f}__overQuota`) || 0,
|
||||
activeKeys: active,
|
||||
trialKeys: trial,
|
||||
revokedKeys: revoked,
|
||||
overQuotaKeys: overQuota,
|
||||
};
|
||||
});
|
||||
} else {
|
||||
|
||||
@@ -65,6 +65,7 @@ const OpenAIV1ChatCompletionSchema = z.object({
|
||||
presence_penalty: z.number().optional().default(0),
|
||||
logit_bias: z.any().optional(),
|
||||
user: z.string().optional(),
|
||||
seed: z.number().int().optional(),
|
||||
});
|
||||
|
||||
const OpenAIV1TextCompletionSchema = z
|
||||
|
||||
@@ -42,6 +42,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
let modelMax: number;
|
||||
if (model.match(/gpt-3.5-turbo-16k/)) {
|
||||
modelMax = 16384;
|
||||
} else if (model.match(/gpt-4-1106(-preview)?/)) {
|
||||
modelMax = 131072;
|
||||
} else if (model.match(/gpt-3.5-turbo/)) {
|
||||
modelMax = 4096;
|
||||
} else if (model.match(/gpt-4-32k/)) {
|
||||
@@ -60,8 +62,6 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
// Not sure if AWS Claude has the same context limit as Anthropic Claude.
|
||||
modelMax = 100000;
|
||||
} else {
|
||||
// Don't really want to throw here because I don't want to have to update
|
||||
// this ASAP every time a new model is released.
|
||||
req.log.warn({ model }, "Unknown model, using 100k token limit.");
|
||||
modelMax = 100000;
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ function getModelsResponse() {
|
||||
|
||||
// https://platform.openai.com/docs/models/overview
|
||||
const knownModels = [
|
||||
"gpt-4-1106-preview",
|
||||
"gpt-4",
|
||||
"gpt-4-0613",
|
||||
"gpt-4-0314", // EOL 2024-06-13
|
||||
|
||||
@@ -211,6 +211,7 @@ function processQueue() {
|
||||
|
||||
// TODO: `getLockoutPeriod` uses model names instead of model families
|
||||
// TODO: genericize this it's really ugly
|
||||
const gpt4TurboLockout = keyPool.getLockoutPeriod("gpt-4-1106");
|
||||
const gpt432kLockout = keyPool.getLockoutPeriod("gpt-4-32k");
|
||||
const gpt4Lockout = keyPool.getLockoutPeriod("gpt-4");
|
||||
const turboLockout = keyPool.getLockoutPeriod("gpt-3.5-turbo");
|
||||
@@ -219,6 +220,9 @@ function processQueue() {
|
||||
const awsClaudeLockout = keyPool.getLockoutPeriod("anthropic.claude-v2");
|
||||
|
||||
const reqs: (Request | undefined)[] = [];
|
||||
if (gpt4TurboLockout === 0) {
|
||||
reqs.push(dequeue("gpt4-turbo"));
|
||||
}
|
||||
if (gpt432kLockout === 0) {
|
||||
reqs.push(dequeue("gpt4-32k"));
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import axios, { AxiosError } from "axios";
|
||||
import type { OpenAIModelFamily } from "../../models";
|
||||
import { KeyCheckerBase } from "../key-checker-base";
|
||||
import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
|
||||
import { getOpenAIModelFamily } from "../../models";
|
||||
|
||||
const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
|
||||
const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
|
||||
@@ -94,29 +95,21 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
|
||||
const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
|
||||
const models = data.data;
|
||||
|
||||
const families: OpenAIModelFamily[] = [];
|
||||
if (models.some(({ id }) => id.startsWith("gpt-3.5-turbo"))) {
|
||||
families.push("turbo");
|
||||
}
|
||||
|
||||
if (models.some(({ id }) => id.startsWith("gpt-4"))) {
|
||||
families.push("gpt4");
|
||||
}
|
||||
|
||||
if (models.some(({ id }) => id.startsWith("gpt-4-32k"))) {
|
||||
families.push("gpt4-32k");
|
||||
}
|
||||
// const families: OpenAIModelFamily[] = [];
|
||||
const families = new Set<OpenAIModelFamily>();
|
||||
models.forEach(({ id }) => families.add(getOpenAIModelFamily(id, "turbo")));
|
||||
|
||||
// We want to update the key's model families here, but we don't want to
|
||||
// update its `lastChecked` timestamp because we need to let the liveness
|
||||
// check run before we can consider the key checked.
|
||||
|
||||
const familiesArray = [...families];
|
||||
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
|
||||
this.updateKey(key.hash, {
|
||||
modelFamilies: families,
|
||||
modelFamilies: familiesArray,
|
||||
lastChecked: keyFromPool.lastChecked,
|
||||
});
|
||||
return families;
|
||||
return familiesArray;
|
||||
}
|
||||
|
||||
private async maybeCreateOrganizationClones(key: OpenAIKey) {
|
||||
|
||||
@@ -14,6 +14,7 @@ export type OpenAIModel =
|
||||
| "gpt-3.5-turbo-instruct"
|
||||
| "gpt-4"
|
||||
| "gpt-4-32k"
|
||||
| "gpt-4-1106"
|
||||
| "text-embedding-ada-002";
|
||||
export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
|
||||
"gpt-3.5-turbo",
|
||||
@@ -98,7 +99,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
const newKey: OpenAIKey = {
|
||||
key: k,
|
||||
service: "openai" as const,
|
||||
modelFamilies: ["turbo" as const, "gpt4" as const],
|
||||
modelFamilies: [
|
||||
"turbo" as const,
|
||||
"gpt4" as const,
|
||||
"gpt4-turbo" as const,
|
||||
],
|
||||
isTrial: false,
|
||||
isDisabled: false,
|
||||
isRevoked: false,
|
||||
@@ -117,6 +122,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
turboTokens: 0,
|
||||
gpt4Tokens: 0,
|
||||
"gpt4-32kTokens": 0,
|
||||
"gpt4-turboTokens": 0,
|
||||
};
|
||||
this.keys.push(newKey);
|
||||
}
|
||||
@@ -383,10 +389,9 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
const now = Date.now();
|
||||
const key = this.keys.find((k) => k.hash === hash)!;
|
||||
|
||||
const currentRateLimit = Math.max(
|
||||
key.rateLimitRequestsReset,
|
||||
key.rateLimitTokensReset
|
||||
) + key.rateLimitedAt;
|
||||
const currentRateLimit =
|
||||
Math.max(key.rateLimitRequestsReset, key.rateLimitTokensReset) +
|
||||
key.rateLimitedAt;
|
||||
const nextRateLimit = now + KEY_REUSE_DELAY;
|
||||
|
||||
// Don't throttle if the key is already naturally rate limited.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { logger } from "../logger";
|
||||
|
||||
export type OpenAIModelFamily = "turbo" | "gpt4" | "gpt4-32k";
|
||||
export type OpenAIModelFamily = "turbo" | "gpt4" | "gpt4-32k" | "gpt4-turbo";
|
||||
export type AnthropicModelFamily = "claude";
|
||||
export type GooglePalmModelFamily = "bison";
|
||||
export type AwsBedrockModelFamily = "aws-claude";
|
||||
@@ -16,12 +16,14 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
|
||||
"turbo",
|
||||
"gpt4",
|
||||
"gpt4-32k",
|
||||
"gpt4-turbo",
|
||||
"claude",
|
||||
"bison",
|
||||
"aws-claude",
|
||||
] as const);
|
||||
|
||||
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^gpt-4-1106(-preview)?$": "gpt4-turbo",
|
||||
"^gpt-4-32k-\\d{4}$": "gpt4-32k",
|
||||
"^gpt-4-32k$": "gpt4-32k",
|
||||
"^gpt-4-\\d{4}$": "gpt4",
|
||||
@@ -30,13 +32,14 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^text-embedding-ada-002$": "turbo",
|
||||
};
|
||||
|
||||
export function getOpenAIModelFamily(model: string): OpenAIModelFamily {
|
||||
export function getOpenAIModelFamily(
|
||||
model: string,
|
||||
defaultFamily: OpenAIModelFamily = "gpt4"
|
||||
): OpenAIModelFamily {
|
||||
for (const [regex, family] of Object.entries(OPENAI_MODEL_FAMILY_MAP)) {
|
||||
if (model.match(regex)) return family;
|
||||
}
|
||||
const stack = new Error().stack;
|
||||
logger.warn({ model, stack }, "Unmapped model family");
|
||||
return "gpt4";
|
||||
return defaultFamily;
|
||||
}
|
||||
|
||||
export function getClaudeModelFamily(_model: string): ModelFamily {
|
||||
|
||||
+4
-1
@@ -5,6 +5,9 @@ import { ModelFamily } from "./models";
|
||||
export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
let cost = 0;
|
||||
switch (model) {
|
||||
case "gpt4-turbo":
|
||||
cost = 0.00001;
|
||||
break;
|
||||
case "gpt4-32k":
|
||||
cost = 0.00006;
|
||||
break;
|
||||
@@ -12,7 +15,7 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
cost = 0.00003;
|
||||
break;
|
||||
case "turbo":
|
||||
cost = 0.0000015;
|
||||
cost = 0.000001;
|
||||
break;
|
||||
case "aws-claude":
|
||||
case "claude":
|
||||
|
||||
@@ -21,6 +21,7 @@ const INITIAL_TOKENS: Required<UserTokenCounts> = {
|
||||
turbo: 0,
|
||||
gpt4: 0,
|
||||
"gpt4-32k": 0,
|
||||
"gpt4-turbo": 0,
|
||||
claude: 0,
|
||||
bison: 0,
|
||||
"aws-claude": 0,
|
||||
@@ -362,6 +363,9 @@ async function flushUsers() {
|
||||
|
||||
// TODO: use key-management/models.ts for family mapping
|
||||
function getModelFamilyForQuotaUsage(model: string): ModelFamily {
|
||||
if (model.startsWith("gpt-4-1106")) {
|
||||
return "gpt4-turbo";
|
||||
}
|
||||
if (model.includes("32k")) {
|
||||
return "gpt4-32k";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user