Per-user token quotas and automatic quota refreshing (khanon/oai-reverse-proxy!37)
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import { Request, Response } from "express";
|
||||
import httpProxy from "http-proxy";
|
||||
import { ZodError } from "zod";
|
||||
import { AIService } from "../../key-management";
|
||||
import { QuotaExceededError } from "./request/apply-quota-limits";
|
||||
|
||||
const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
|
||||
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
|
||||
@@ -63,9 +65,7 @@ export const handleInternalError = (
|
||||
res: Response
|
||||
) => {
|
||||
try {
|
||||
const isZod = err instanceof ZodError;
|
||||
const isForbidden = err.name === "ForbiddenError";
|
||||
if (isZod) {
|
||||
if (err instanceof ZodError) {
|
||||
writeErrorResponse(req, res, 400, {
|
||||
error: {
|
||||
type: "proxy_validation_error",
|
||||
@@ -75,7 +75,7 @@ export const handleInternalError = (
|
||||
message: err.message,
|
||||
},
|
||||
});
|
||||
} else if (isForbidden) {
|
||||
} else if (err.name === "ForbiddenError") {
|
||||
// Spoofs a vaguely threatening OpenAI error message. Only invoked by the
|
||||
// block-zoomers rewriter to scare off tiktokers.
|
||||
writeErrorResponse(req, res, 403, {
|
||||
@@ -86,6 +86,16 @@ export const handleInternalError = (
|
||||
message: err.message,
|
||||
},
|
||||
});
|
||||
} else if (err instanceof QuotaExceededError) {
|
||||
writeErrorResponse(req, res, 429, {
|
||||
error: {
|
||||
type: "proxy_quota_exceeded",
|
||||
code: "quota_exceeded",
|
||||
message: `You've exceeded your token quota for this model type.`,
|
||||
info: err.quotaInfo,
|
||||
stack: err.stack,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
writeErrorResponse(req, res, 500, {
|
||||
error: {
|
||||
@@ -141,3 +151,17 @@ export function buildFakeSseMessage(
|
||||
}
|
||||
return `data: ${JSON.stringify(fakeEvent)}\n\n`;
|
||||
}
|
||||
|
||||
export function getCompletionForService({
|
||||
service,
|
||||
body,
|
||||
}: {
|
||||
service: AIService;
|
||||
body: Record<string, any>;
|
||||
}): { completion: string; model: string } {
|
||||
if (service === "anthropic") {
|
||||
return { completion: body.completion.trim(), model: body.model };
|
||||
} else {
|
||||
return { completion: body.choices[0].message.content, model: body.model };
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user