refactors keypool and implements key health checks
This commit is contained in:
Generated
+57
@@ -9,10 +9,12 @@
|
||||
"version": "1.0.0",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.3.5",
|
||||
"cors": "^2.8.5",
|
||||
"dotenv": "^16.0.3",
|
||||
"express": "^4.18.2",
|
||||
"http-proxy-middleware": "^3.0.0-beta.1",
|
||||
"pino": "^8.11.0",
|
||||
"pino-http": "^8.3.3",
|
||||
"showdown": "^2.1.0",
|
||||
"zlib": "^1.0.5"
|
||||
@@ -24,6 +26,9 @@
|
||||
"nodemon": "^2.0.22",
|
||||
"ts-node": "^10.9.1",
|
||||
"typescript": "^5.0.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@cspotcode/source-map-support": {
|
||||
@@ -259,6 +264,11 @@
|
||||
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
|
||||
"integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg=="
|
||||
},
|
||||
"node_modules/asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
},
|
||||
"node_modules/atomic-sleep": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz",
|
||||
@@ -267,6 +277,16 @@
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.3.5",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.3.5.tgz",
|
||||
"integrity": "sha512-glL/PvG/E+xCWwV8S6nCHcrfg1exGx7vxyUIivIA1iL7BIh6bePylCfVHwp6k13ao7SATxB6imau2kqY+I67kw==",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.0",
|
||||
"form-data": "^4.0.0",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
@@ -415,6 +435,17 @@
|
||||
"fsevents": "~2.3.2"
|
||||
}
|
||||
},
|
||||
"node_modules/combined-stream": {
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||
"dependencies": {
|
||||
"delayed-stream": "~1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/commander": {
|
||||
"version": "9.5.0",
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz",
|
||||
@@ -487,6 +518,14 @@
|
||||
"ms": "2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/delayed-stream": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/depd": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
|
||||
@@ -664,6 +703,19 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/forwarded": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
|
||||
@@ -1215,6 +1267,11 @@
|
||||
"node": ">= 0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||
},
|
||||
"node_modules/pstree.remy": {
|
||||
"version": "1.1.8",
|
||||
"resolved": "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz",
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.3.5",
|
||||
"cors": "^2.8.5",
|
||||
"dotenv": "^16.0.3",
|
||||
"express": "^4.18.2",
|
||||
|
||||
@@ -22,6 +22,8 @@ type Config = {
|
||||
logLevel?: "debug" | "info" | "warn" | "error";
|
||||
/** Whether prompts and responses should be logged. */
|
||||
logPrompts?: boolean; // TODO
|
||||
/** Whether to periodically check keys for usage and validity. */
|
||||
checkKeys?: boolean;
|
||||
};
|
||||
|
||||
export const config: Config = {
|
||||
@@ -38,6 +40,7 @@ export const config: Config = {
|
||||
),
|
||||
logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
|
||||
logPrompts: getEnvWithDefault("LOG_PROMPTS", false),
|
||||
checkKeys: getEnvWithDefault("CHECK_KEYS", true),
|
||||
} as const;
|
||||
|
||||
export const SENSITIVE_KEYS: (keyof Config)[] = ["proxyKey", "openaiKey"];
|
||||
|
||||
+36
-13
@@ -1,7 +1,7 @@
|
||||
import { Request, Response } from "express";
|
||||
import showdown from "showdown";
|
||||
import { config, listConfig } from "./config";
|
||||
import { keys } from "./keys/key-pool";
|
||||
import { keyPool } from "./key-management";
|
||||
import { getUniqueIps } from "./proxy/rate-limit";
|
||||
|
||||
export const handleInfoPage = (req: Request, res: Response) => {
|
||||
@@ -13,22 +13,45 @@ export const handleInfoPage = (req: Request, res: Response) => {
|
||||
};
|
||||
|
||||
function getInfoPageHtml(host: string) {
|
||||
const keylist = keys.list();
|
||||
const rateLimitInfo = { proomptersLastFiveMinutes: getUniqueIps() };
|
||||
const keys = keyPool.list();
|
||||
let keyInfo: Record<string, any> = {
|
||||
all: keys.length,
|
||||
active: keys.filter((k) => !k.isDisabled).length,
|
||||
};
|
||||
|
||||
if (keyPool.anyUnchecked()) {
|
||||
const uncheckedKeys = keys.filter((k) => !k.lastChecked);
|
||||
keyInfo = {
|
||||
...keyInfo,
|
||||
status: `Still checking ${uncheckedKeys.length} keys...`,
|
||||
};
|
||||
} else if (config.checkKeys) {
|
||||
keyInfo = {
|
||||
...keyInfo,
|
||||
trial: keys.filter((k) => k.isTrial).length,
|
||||
gpt4: keys.filter((k) => k.isGpt4).length,
|
||||
remainingQuota: `${Math.round(keyPool.calculateRemainingQuota() * 100)}%`,
|
||||
};
|
||||
}
|
||||
|
||||
const info = {
|
||||
uptime: process.uptime(),
|
||||
timestamp: Date.now(),
|
||||
baseUrl: host,
|
||||
kobold: host, // kobold doesn't need the suffix
|
||||
openai: host + "/proxy/openai",
|
||||
proompts: keylist.reduce((acc, k) => acc + k.promptCount, 0),
|
||||
...(config.modelRateLimit ? rateLimitInfo : {}),
|
||||
keys: {
|
||||
all: keylist.length,
|
||||
active: keylist.filter((k) => !k.isDisabled).length,
|
||||
trial: keylist.filter((k) => k.isTrial).length,
|
||||
gpt4: keylist.filter((k) => k.isGpt4).length,
|
||||
// Describes the URLs each client app and adapter should use to connect
|
||||
appUrls: {
|
||||
baseUrl: host,
|
||||
tavern: {
|
||||
kobold: host,
|
||||
openai: host + "/proxy/openai/v1",
|
||||
},
|
||||
agnaistic: {
|
||||
kobold: host,
|
||||
openai: host + "/proxy/openai",
|
||||
},
|
||||
},
|
||||
proompts: keys.reduce((acc, k) => acc + k.promptCount, 0),
|
||||
...(config.modelRateLimit ? { proomptingNow: getUniqueIps() } : {}),
|
||||
keyInfo,
|
||||
config: listConfig(),
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
import { KeyPool } from "./key-pool";
|
||||
|
||||
export type { Key } from "./key-pool";
|
||||
export const keyPool = new KeyPool();
|
||||
@@ -0,0 +1,191 @@
|
||||
import axios, { AxiosError } from "axios";
|
||||
import { logger } from "../logger";
|
||||
import type { Key, KeyPool } from "./key-pool";
|
||||
|
||||
const ONE_MINUTE = 60 * 1000;
|
||||
const FIVE_MINUTES = 5 * ONE_MINUTE;
|
||||
|
||||
const GET_MODELS_URL = "https://api.openai.com/v1/models";
|
||||
const GET_SUBSCRIPTION_URL =
|
||||
"https://api.openai.com/dashboard/billing/subscription";
|
||||
const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage";
|
||||
|
||||
type GetModelsResponse = {
|
||||
data: [{ id: string }];
|
||||
};
|
||||
|
||||
type GetSubscriptionResponse = {
|
||||
plan: { title: string };
|
||||
has_payment_method: boolean;
|
||||
soft_limit_usd: number;
|
||||
hard_limit_usd: number;
|
||||
system_hard_limit_usd: number;
|
||||
};
|
||||
|
||||
type GetUsageResponse = {
|
||||
total_usage: number;
|
||||
};
|
||||
|
||||
export class KeyChecker {
|
||||
private readonly keys: Key[];
|
||||
private log = logger.child({ module: "KeyChecker" });
|
||||
private timeout?: NodeJS.Timeout;
|
||||
private updateKey: typeof KeyPool.prototype.update;
|
||||
|
||||
constructor(keys: Key[], updateKey: typeof KeyPool.prototype.update) {
|
||||
this.keys = keys;
|
||||
this.updateKey = updateKey;
|
||||
}
|
||||
|
||||
public start() {
|
||||
this.log.info("Starting key checker");
|
||||
this.scheduleNextCheck();
|
||||
}
|
||||
|
||||
public stop() {
|
||||
if (this.timeout) {
|
||||
clearTimeout(this.timeout);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules the next check. If there are still keys yet to be checked, it
|
||||
* will schedule a check immediately for the next unchecked key. Otherwise,
|
||||
* it will schedule a check in several minutes for the oldest key.
|
||||
**/
|
||||
private scheduleNextCheck() {
|
||||
const enabledKeys = this.keys.filter((key) => !key.isDisabled);
|
||||
|
||||
if (enabledKeys.length === 0) {
|
||||
this.log.warn(
|
||||
"There are no enabled keys. Key checking will be disabled."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const uncheckedKeys = enabledKeys.filter((key) => !key.lastChecked);
|
||||
if (uncheckedKeys.length > 0) {
|
||||
this.log.info(
|
||||
{ key: uncheckedKeys[0].hash, remaining: uncheckedKeys.length - 1 },
|
||||
"Scheduling initial check for key."
|
||||
);
|
||||
this.timeout = setTimeout(() => this.checkKey(uncheckedKeys[0]), 1000);
|
||||
return;
|
||||
}
|
||||
|
||||
// A check can be performed once per 30 seconds, but no individual key can
|
||||
// be checked more than once every five minutes.
|
||||
const keysToCheck = enabledKeys.filter(
|
||||
(key) => Date.now() - key.lastChecked > FIVE_MINUTES
|
||||
);
|
||||
|
||||
if (keysToCheck.length === 0) {
|
||||
this.timeout = setTimeout(() => this.scheduleNextCheck(), FIVE_MINUTES);
|
||||
return;
|
||||
}
|
||||
|
||||
keysToCheck.sort((a, b) => a.lastChecked - b.lastChecked);
|
||||
const oldestKey = keysToCheck[0];
|
||||
|
||||
const timeUntilNextCheck =
|
||||
FIVE_MINUTES - (Date.now() - oldestKey.lastChecked);
|
||||
this.log.info(
|
||||
{ key: oldestKey.hash, seconds: timeUntilNextCheck / 1000 },
|
||||
"Scheduling next check for key."
|
||||
);
|
||||
this.timeout = setTimeout(
|
||||
() => this.checkKey(oldestKey),
|
||||
timeUntilNextCheck
|
||||
);
|
||||
}
|
||||
|
||||
private async checkKey(key: Key) {
|
||||
this.log.info({ key: key.hash }, "Checking key...");
|
||||
try {
|
||||
const [provisionedModels, subscription, usage] = await Promise.all([
|
||||
this.getProvisionedModels(key),
|
||||
this.getSubscription(key),
|
||||
this.getUsage(key),
|
||||
]);
|
||||
const updates = {
|
||||
isGpt4: provisionedModels.gpt4,
|
||||
isTrial: !subscription.has_payment_method,
|
||||
softLimit: subscription.soft_limit_usd,
|
||||
hardLimit: subscription.hard_limit_usd,
|
||||
systemHardLimit: subscription.system_hard_limit_usd,
|
||||
usage,
|
||||
};
|
||||
this.updateKey(key.hash, updates);
|
||||
this.log.info({ key: key.hash, updates }, "Key check complete.");
|
||||
} catch (error) {
|
||||
this.handleAxiosError(key, error as AxiosError);
|
||||
}
|
||||
|
||||
this.scheduleNextCheck();
|
||||
}
|
||||
|
||||
private async getProvisionedModels(
|
||||
key: Key
|
||||
): Promise<{ turbo: boolean; gpt4: boolean }> {
|
||||
const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, {
|
||||
headers: { Authorization: `Bearer ${key.key}` },
|
||||
});
|
||||
const turbo = data.data.some(({ id }) => id.startsWith("gpt-3.5"));
|
||||
const gpt4 = data.data.some(({ id }) => id.startsWith("gpt-4"));
|
||||
return { turbo, gpt4 };
|
||||
}
|
||||
|
||||
private async getSubscription(key: Key) {
|
||||
const { data } = await axios.get<GetSubscriptionResponse>(
|
||||
GET_SUBSCRIPTION_URL,
|
||||
{ headers: { Authorization: `Bearer ${key.key}` } }
|
||||
);
|
||||
return data;
|
||||
}
|
||||
|
||||
private async getUsage(key: Key) {
|
||||
const querystring = KeyChecker.getUsageQuerystring(key.isTrial);
|
||||
const url = `${GET_USAGE_URL}?${querystring}`;
|
||||
const { data } = await axios.get<GetUsageResponse>(url, {
|
||||
headers: { Authorization: `Bearer ${key.key}` },
|
||||
});
|
||||
return parseFloat((data.total_usage / 100).toFixed(2));
|
||||
}
|
||||
|
||||
private handleAxiosError(key: Key, error: AxiosError) {
|
||||
if (error.response) {
|
||||
const { status, data } = error.response;
|
||||
if (status === 401) {
|
||||
this.log.warn(
|
||||
{ key: key.hash, error: data },
|
||||
"Key is invalid or revoked. Disabling key."
|
||||
);
|
||||
key.isDisabled = true;
|
||||
} else {
|
||||
this.log.error(
|
||||
{ key: key.hash, status, error: data },
|
||||
"Encountered API error while checking key."
|
||||
);
|
||||
}
|
||||
} else {
|
||||
this.log.error(
|
||||
{ key: key.hash, error },
|
||||
"Network error while checking key."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static getUsageQuerystring(isTrial: boolean) {
|
||||
// For paid keys, the limit resets every month, so we can use the current
|
||||
// month as the start date.
|
||||
// For trial keys, the limit does not reset, so we need to use the start
|
||||
// date of the trial. We don't know that but it can be at most 90 days ago.
|
||||
const today = new Date();
|
||||
const startDate = isTrial
|
||||
? new Date(today.getTime() - 90 * 24 * 60 * 60 * 1000)
|
||||
: new Date(today.getFullYear(), today.getMonth(), 1);
|
||||
return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${
|
||||
today.toISOString().split("T")[0]
|
||||
}`;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
/* Manages OpenAI API keys. Tracks usage, disables expired keys, and provides
|
||||
round-robin access to keys. Keys are stored in the OPENAI_KEY environment
|
||||
variable as a comma-separated list of keys. */
|
||||
import crypto from "crypto";
|
||||
import { config } from "../config";
|
||||
import { logger } from "../logger";
|
||||
import { KeyChecker } from "./key-checker";
|
||||
|
||||
export type Key = {
|
||||
/** The OpenAI API key itself. */
|
||||
key: string;
|
||||
/** Whether this is a free trial key. These are prioritized over paid keys if they can fulfill the request. */
|
||||
isTrial: boolean;
|
||||
/** Whether this key has been provisioned for GPT-4. */
|
||||
isGpt4: boolean;
|
||||
/** Whether this key is currently disabled. We set this if we get a 429 or 401 response from OpenAI. */
|
||||
isDisabled: boolean;
|
||||
/** Threshold at which a warning email will be sent by OpenAI. */
|
||||
softLimit: number;
|
||||
/** Threshold at which the key will be disabled because it has reached the user-defined limit. */
|
||||
hardLimit: number;
|
||||
/** The maximum quota allocated to this key by OpenAI. */
|
||||
systemHardLimit: number;
|
||||
/** The current usage of this key. */
|
||||
usage: number;
|
||||
/** The number of prompts that have been sent with this key. */
|
||||
promptCount: number;
|
||||
/** The time at which this key was last used. */
|
||||
lastUsed: number;
|
||||
/** The time at which this key was last checked. */
|
||||
lastChecked: number;
|
||||
/** Key hash for displaying usage in the dashboard. */
|
||||
hash: string;
|
||||
};
|
||||
|
||||
export type KeyUpdate = Omit<
|
||||
Partial<Key>,
|
||||
"key" | "hash" | "isDisabled" | "lastUsed" | "lastChecked" | "promptCount"
|
||||
>;
|
||||
|
||||
export class KeyPool {
|
||||
private keys: Key[] = [];
|
||||
private checker?: KeyChecker;
|
||||
private log = logger.child({ module: "KeyPool" });
|
||||
|
||||
constructor() {
|
||||
const keyString = config.openaiKey;
|
||||
if (!keyString?.trim()) {
|
||||
throw new Error("OPENAI_KEY environment variable is not set");
|
||||
}
|
||||
let bareKeys: string[];
|
||||
bareKeys = keyString.split(",").map((k) => k.trim());
|
||||
for (const k of bareKeys) {
|
||||
const newKey = {
|
||||
key: k,
|
||||
isGpt4: false,
|
||||
isTrial: false,
|
||||
isDisabled: false,
|
||||
softLimit: 0,
|
||||
hardLimit: 0,
|
||||
systemHardLimit: 0,
|
||||
usage: 0,
|
||||
lastUsed: 0,
|
||||
lastChecked: 0,
|
||||
promptCount: 0,
|
||||
hash: crypto.createHash("sha256").update(k).digest("hex").slice(0, 6),
|
||||
};
|
||||
this.keys.push(newKey);
|
||||
|
||||
this.log.info({ key: newKey.hash }, "Key added");
|
||||
}
|
||||
}
|
||||
|
||||
public init() {
|
||||
if (config.checkKeys) {
|
||||
this.checker = new KeyChecker(this.keys, this.update.bind(this));
|
||||
this.checker.start();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of all keys, with the key field removed.
|
||||
* Don't mutate returned keys, use a KeyPool method instead.
|
||||
**/
|
||||
public list() {
|
||||
return this.keys.map((key) => {
|
||||
return Object.freeze({
|
||||
...key,
|
||||
key: undefined,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
public get(model: string) {
|
||||
const needsGpt4Key = model.startsWith("gpt-4");
|
||||
const availableKeys = this.keys.filter(
|
||||
(key) => !key.isDisabled && (!needsGpt4Key || key.isGpt4)
|
||||
);
|
||||
if (availableKeys.length === 0) {
|
||||
let message = "No keys available. Please add more keys.";
|
||||
if (needsGpt4Key) {
|
||||
message =
|
||||
"No GPT-4 keys available. Please add more keys or use a non-GPT-4 model.";
|
||||
}
|
||||
this.log.error(message);
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
// Prioritize trial keys
|
||||
const trialKeys = availableKeys.filter((key) => key.isTrial);
|
||||
if (trialKeys.length > 0) {
|
||||
this.log.info({ key: trialKeys[0].hash }, "Using trial key");
|
||||
trialKeys[0].lastUsed = Date.now();
|
||||
return trialKeys[0];
|
||||
}
|
||||
|
||||
// Otherwise, return the oldest key
|
||||
const oldestKey = availableKeys.sort((a, b) => a.lastUsed - b.lastUsed)[0];
|
||||
this.log.info({ key: oldestKey.hash }, "Assigning key to request.");
|
||||
oldestKey.lastUsed = Date.now();
|
||||
return { ...oldestKey };
|
||||
}
|
||||
|
||||
public update(keyHash: string, update: KeyUpdate) {
|
||||
const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
|
||||
Object.assign(keyFromPool, { ...update, lastChecked: Date.now() });
|
||||
}
|
||||
|
||||
public disable(key: Key) {
|
||||
const keyFromPool = this.keys.find((k) => k.key === key.key)!;
|
||||
if (keyFromPool.isDisabled) return;
|
||||
keyFromPool.isDisabled = true;
|
||||
this.log.warn({ key: key.hash }, "Key disabled");
|
||||
}
|
||||
|
||||
public anyAvailable() {
|
||||
return this.keys.some((key) => !key.isDisabled);
|
||||
}
|
||||
|
||||
public anyUnchecked() {
|
||||
return config.checkKeys && this.keys.some((key) => !key.lastChecked);
|
||||
}
|
||||
|
||||
public incrementPrompt(keyHash?: string) {
|
||||
if (!keyHash) return;
|
||||
const key = this.keys.find((k) => k.hash === keyHash)!;
|
||||
key.promptCount++;
|
||||
}
|
||||
|
||||
public downgradeKey(keyHash?: string) {
|
||||
if (!keyHash) return;
|
||||
this.log.warn({ key: keyHash }, "Downgrading key to GPT-3.5.");
|
||||
const key = this.keys.find((k) => k.hash === keyHash)!;
|
||||
key.isGpt4 = false;
|
||||
}
|
||||
|
||||
/** Returns the remaining aggregate quota for all keys as a percentage. */
|
||||
public calculateRemainingQuota(gpt4Only = false) {
|
||||
const keys = gpt4Only ? this.keys.filter((k) => k.isGpt4) : this.keys;
|
||||
|
||||
if (keys.length === 0) return 0;
|
||||
|
||||
const totalUsage = keys.reduce((acc, key) => {
|
||||
// Keys can slightly exceed their quota
|
||||
return acc + Math.min(key.usage, key.hardLimit);
|
||||
}, 0);
|
||||
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
|
||||
|
||||
return 1 - totalUsage / totalLimit;
|
||||
}
|
||||
}
|
||||
@@ -1,146 +0,0 @@
|
||||
/* Manages OpenAI API keys. Tracks usage, disables expired keys, and provides
|
||||
round-robin access to keys. Keys are stored in the OPENAI_KEY environment
|
||||
variable, either as a single key, or a base64-encoded JSON array of keys.*/
|
||||
import crypto from "crypto";
|
||||
import { config } from "../config";
|
||||
import { logger } from "../logger";
|
||||
|
||||
/** Represents a key stored in the OPENAI_KEY environment variable. */
|
||||
type KeySchema = {
|
||||
/** The OpenAI API key itself. */
|
||||
key: string;
|
||||
/** Whether this is a free trial key. These are prioritized over paid keys if they can fulfill the request. */
|
||||
isTrial?: boolean;
|
||||
/** Whether this key has been provisioned for GPT-4. */
|
||||
isGpt4?: boolean;
|
||||
};
|
||||
|
||||
/** Runtime information about a key. */
|
||||
export type Key = KeySchema & {
|
||||
/** Whether this key is currently disabled. We set this if we get a 429 or 401 response from OpenAI. */
|
||||
isDisabled?: boolean;
|
||||
/** Threshold at which a warning email will be sent by OpenAI. */
|
||||
softLimit?: number;
|
||||
/** Threshold at which the key will be disabled because it has reached the user-defined limit. */
|
||||
hardLimit?: number;
|
||||
/** The maximum quota allocated to this key by OpenAI. */
|
||||
systemHardLimit?: number;
|
||||
/** The current usage of this key. */
|
||||
usage?: number;
|
||||
/** The number of prompts that have been sent with this key. */
|
||||
promptCount: number;
|
||||
/** The time at which this key was last used. */
|
||||
lastUsed: number;
|
||||
/** Key hash for displaying usage in the dashboard. */
|
||||
hash: string;
|
||||
};
|
||||
|
||||
const keyPool: Key[] = [];
|
||||
|
||||
function init() {
|
||||
const keyString = config.openaiKey;
|
||||
if (!keyString?.trim()) {
|
||||
throw new Error("OPENAI_KEY environment variable is not set");
|
||||
}
|
||||
let keyList: KeySchema[];
|
||||
try {
|
||||
const decoded = Buffer.from(keyString, "base64").toString();
|
||||
keyList = JSON.parse(decoded) as KeySchema[];
|
||||
} catch (err) {
|
||||
logger.info("OPENAI_KEY is not base64-encoded JSON, assuming bare key");
|
||||
// We don't actually know if bare keys are paid/GPT-4 so we assume they are
|
||||
keyList = [{ key: keyString, isTrial: false, isGpt4: true }];
|
||||
}
|
||||
for (const key of keyList) {
|
||||
const newKey = {
|
||||
...key,
|
||||
isDisabled: false,
|
||||
softLimit: 0,
|
||||
hardLimit: 0,
|
||||
systemHardLimit: 0,
|
||||
usage: 0,
|
||||
lastUsed: 0,
|
||||
promptCount: 0,
|
||||
hash: crypto
|
||||
.createHash("sha256")
|
||||
.update(key.key)
|
||||
.digest("hex")
|
||||
.slice(0, 6),
|
||||
};
|
||||
keyPool.push(newKey);
|
||||
|
||||
logger.info({ key: newKey.hash }, "Key added");
|
||||
}
|
||||
// TODO: check each key's usage upon startup.
|
||||
}
|
||||
|
||||
function list() {
|
||||
return keyPool.map((key) => ({
|
||||
...key,
|
||||
key: undefined,
|
||||
}));
|
||||
}
|
||||
|
||||
function disable(key: Key) {
|
||||
const keyFromPool = keyPool.find((k) => k.key === key.key)!;
|
||||
if (keyFromPool.isDisabled) return;
|
||||
keyFromPool.isDisabled = true;
|
||||
logger.warn({ key: key.hash }, "Key disabled");
|
||||
}
|
||||
|
||||
function anyAvailable() {
|
||||
return keyPool.some((key) => !key.isDisabled);
|
||||
}
|
||||
|
||||
function get(model: string) {
|
||||
const needsGpt4Key = model.startsWith("gpt-4");
|
||||
const availableKeys = keyPool.filter(
|
||||
(key) => !key.isDisabled && (!needsGpt4Key || key.isGpt4)
|
||||
);
|
||||
if (availableKeys.length === 0) {
|
||||
let message = "No keys available. Please add more keys.";
|
||||
if (needsGpt4Key) {
|
||||
message =
|
||||
"No GPT-4 keys available. Please add more keys or use a non-GPT-4 model.";
|
||||
}
|
||||
logger.error(message);
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
// Prioritize trial keys
|
||||
const trialKeys = availableKeys.filter((key) => key.isTrial);
|
||||
if (trialKeys.length > 0) {
|
||||
logger.info({ key: trialKeys[0].hash }, "Using trial key");
|
||||
trialKeys[0].lastUsed = Date.now();
|
||||
return trialKeys[0];
|
||||
}
|
||||
|
||||
// Otherwise, return the oldest key
|
||||
const oldestKey = availableKeys.sort((a, b) => a.lastUsed - b.lastUsed)[0];
|
||||
logger.info({ key: oldestKey.hash }, "Assigning key to request.");
|
||||
oldestKey.lastUsed = Date.now();
|
||||
return { ...oldestKey };
|
||||
}
|
||||
|
||||
function incrementPrompt(keyHash?: string) {
|
||||
if (!keyHash) return;
|
||||
const key = keyPool.find((k) => k.hash === keyHash)!;
|
||||
key.promptCount++;
|
||||
}
|
||||
|
||||
function downgradeKey(keyHash?: string) {
|
||||
if (!keyHash) return;
|
||||
logger.warn({ key: keyHash }, "Downgrading key to GPT-3.5.");
|
||||
const key = keyPool.find((k) => k.hash === keyHash)!;
|
||||
key.isGpt4 = false;
|
||||
}
|
||||
|
||||
export const keys = {
|
||||
init,
|
||||
list,
|
||||
get,
|
||||
anyAvailable,
|
||||
disable,
|
||||
incrementPrompt,
|
||||
downgradeKey,
|
||||
};
|
||||
+6
-6
@@ -4,7 +4,7 @@ import util from "util";
|
||||
import zlib from "zlib";
|
||||
import * as httpProxy from "http-proxy";
|
||||
import { logger } from "../logger";
|
||||
import { keys } from "../keys/key-pool";
|
||||
import { keyPool } from "../key-management";
|
||||
|
||||
export const QUOTA_ROUTES = ["/v1/chat/completions"];
|
||||
|
||||
@@ -39,7 +39,7 @@ export const handleDownstreamErrors = (
|
||||
let errorPayload: any = {
|
||||
error: "Proxy couldn't parse error from OpenAI",
|
||||
};
|
||||
const canTryAgain = keys.anyAvailable()
|
||||
const canTryAgain = keyPool.anyAvailable()
|
||||
? "You can try again to get a different key."
|
||||
: "There are no more keys available.";
|
||||
try {
|
||||
@@ -61,7 +61,7 @@ export const handleDownstreamErrors = (
|
||||
logger.warn(
|
||||
`OpenAI key is invalid or revoked. Keyhash ${req.key?.hash}`
|
||||
);
|
||||
keys.disable(req.key!);
|
||||
keyPool.disable(req.key!);
|
||||
const message = `The OpenAI key is invalid or revoked. ${canTryAgain}`;
|
||||
errorPayload.proxy_note = message;
|
||||
} else if (statusCode === 429) {
|
||||
@@ -72,7 +72,7 @@ export const handleDownstreamErrors = (
|
||||
// - Model overloaded, their server is overloaded
|
||||
if (errorPayload.error?.type === "insufficient_quota") {
|
||||
logger.warn(`OpenAI key is exhausted. Keyhash ${req.key?.hash}`);
|
||||
keys.disable(req.key!);
|
||||
keyPool.disable(req.key!);
|
||||
const message = `The OpenAI key is exhausted. ${canTryAgain}`;
|
||||
errorPayload.proxy_note = message;
|
||||
} else {
|
||||
@@ -85,7 +85,7 @@ export const handleDownstreamErrors = (
|
||||
// Most likely model not found
|
||||
if (errorPayload.error?.code === "model_not_found") {
|
||||
if (req.key!.isGpt4) {
|
||||
keys.downgradeKey(req.key?.hash);
|
||||
keyPool.downgradeKey(req.key?.hash);
|
||||
}
|
||||
errorPayload.proxy_note =
|
||||
"This key may have been incorrectly flagged as gpt-4 enabled.";
|
||||
@@ -128,7 +128,7 @@ export const handleInternalError: httpProxy.ErrorCallback = (
|
||||
|
||||
export const incrementKeyUsage = (req: Request) => {
|
||||
if (QUOTA_ROUTES.includes(req.path)) {
|
||||
keys.incrementPrompt(req.key?.hash);
|
||||
keyPool.incrementPrompt(req.key?.hash);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import type { ExpressHttpProxyReqCallback } from ".";
|
||||
import { Key, keys } from "../../keys/key-pool";
|
||||
import { Key, keyPool } from "../../key-management";
|
||||
|
||||
/** Add an OpenAI key from the pool to the request. */
|
||||
export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => {
|
||||
let assignedKey: Key;
|
||||
assignedKey = keys.get(req.body?.model || "gpt-3.5")!;
|
||||
assignedKey = keyPool.get(req.body?.model || "gpt-3.5")!;
|
||||
req.key = assignedKey;
|
||||
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
|
||||
};
|
||||
|
||||
+2
-2
@@ -3,7 +3,7 @@ import express from "express";
|
||||
import cors from "cors";
|
||||
import pinoHttp from "pino-http";
|
||||
import { logger } from "./logger";
|
||||
import { keys } from "./keys/key-pool";
|
||||
import { keyPool } from "./key-management";
|
||||
import { proxyRouter, rewriteTavernRequests } from "./proxy/routes";
|
||||
import { handleInfoPage } from "./info-page";
|
||||
|
||||
@@ -44,5 +44,5 @@ app.use((_req: unknown, res: express.Response) => {
|
||||
// start server and load keys
|
||||
app.listen(PORT, () => {
|
||||
logger.info(`Server listening on port ${PORT}`);
|
||||
keys.init();
|
||||
keyPool.init();
|
||||
});
|
||||
|
||||
Vendored
+1
-1
@@ -1,5 +1,5 @@
|
||||
import { Express } from "express-serve-static-core";
|
||||
import { Key } from "../keys/key-pool";
|
||||
import { Key } from "../key-management/key-pool";
|
||||
|
||||
declare global {
|
||||
namespace Express {
|
||||
|
||||
Reference in New Issue
Block a user