Per-user token quotas and automatic quota refreshing (khanon/oai-reverse-proxy!37)

This commit is contained in:
khanon
2023-08-28 19:33:14 +00:00
parent 785b1f69f3
commit cb780e85da
31 changed files with 544 additions and 145 deletions
@@ -0,0 +1,30 @@
import { hasAvailableQuota } from "../../auth/user-store";
import { isCompletionRequest } from "../common";
import { ProxyRequestMiddleware } from ".";
export class QuotaExceededError extends Error {
public quotaInfo: any;
constructor(message: string, quotaInfo: any) {
super(message);
this.name = "QuotaExceededError";
this.quotaInfo = quotaInfo;
}
}
export const applyQuotaLimits: ProxyRequestMiddleware = (_proxyReq, req) => {
if (!isCompletionRequest(req) || !req.user) {
return;
}
const requestedTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
if (!hasAvailableQuota(req.user.token, req.body.model, requestedTokens)) {
throw new QuotaExceededError(
"You have exceeded your proxy token quota for this model.",
{
quota: req.user.tokenLimits,
used: req.user.tokenCounts,
requested: requestedTokens,
}
);
}
};
@@ -1,7 +1,7 @@
import { Request } from "express";
import { z } from "zod";
import { config } from "../../../config";
import { countTokens } from "../../../tokenization";
import { OpenAIPromptMessage, countTokens } from "../../../tokenization";
import { RequestPreprocessor } from ".";
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
@@ -15,22 +15,26 @@ const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
* request body.
*/
export const checkContextSize: RequestPreprocessor = async (req) => {
let prompt;
const service = req.outboundApi;
let result;
switch (req.outboundApi) {
case "openai":
switch (service) {
case "openai": {
req.outputTokens = req.body.max_tokens;
prompt = req.body.messages;
const prompt: OpenAIPromptMessage[] = req.body.messages;
result = await countTokens({ req, prompt, service });
break;
case "anthropic":
}
case "anthropic": {
req.outputTokens = req.body.max_tokens_to_sample;
prompt = req.body.prompt;
const prompt: string = req.body.prompt;
result = await countTokens({ req, prompt, service });
break;
}
default:
throw new Error(`Unknown outbound API: ${req.outboundApi}`);
}
const result = await countTokens({ req, prompt, service: req.outboundApi });
req.promptTokens = result.token_count;
// TODO: Remove once token counting is stable
@@ -89,6 +93,7 @@ function validateContextSize(req: Request) {
);
req.debug.prompt_tokens = promptTokens;
req.debug.completion_tokens = outputTokens;
req.debug.max_model_tokens = modelMax;
req.debug.max_proxy_tokens = proxyMax;
}
@@ -101,7 +106,7 @@ function assertRequestHasTokenCounts(
outputTokens: z.number().int().min(1),
})
.nonstrict()
.parse(req);
.parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
}
/**
+1
View File
@@ -3,6 +3,7 @@ import type { ClientRequest } from "http";
import type { ProxyReqCallback } from "http-proxy";
// Express middleware (runs before http-proxy-middleware, can be async)
export { applyQuotaLimits } from "./apply-quota-limits";
export { createPreprocessorMiddleware } from "./preprocess";
export { checkContextSize } from "./check-context-size";
export { setApiFormat } from "./set-api-format";