Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 27cbb7aaaf | |||
| d54acad6ad | |||
| 5e1fffe07d | |||
| f7fd5f00f2 | |||
| 6d323f6ea1 | |||
| 2959ed3f7f | |||
| b58e7cb830 | |||
| f531272b00 |
+4
-4
@@ -415,18 +415,18 @@ export const config: Config = {
|
||||
firebaseKey: getEnvWithDefault("FIREBASE_KEY", undefined),
|
||||
textModelRateLimit: getEnvWithDefault("TEXT_MODEL_RATE_LIMIT", 4),
|
||||
imageModelRateLimit: getEnvWithDefault("IMAGE_MODEL_RATE_LIMIT", 4),
|
||||
maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 16384),
|
||||
maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 32768),
|
||||
maxContextTokensAnthropic: getEnvWithDefault(
|
||||
"MAX_CONTEXT_TOKENS_ANTHROPIC",
|
||||
0
|
||||
32768
|
||||
),
|
||||
maxOutputTokensOpenAI: getEnvWithDefault(
|
||||
["MAX_OUTPUT_TOKENS_OPENAI", "MAX_OUTPUT_TOKENS"],
|
||||
400
|
||||
1024
|
||||
),
|
||||
maxOutputTokensAnthropic: getEnvWithDefault(
|
||||
["MAX_OUTPUT_TOKENS_ANTHROPIC", "MAX_OUTPUT_TOKENS"],
|
||||
400
|
||||
1024
|
||||
),
|
||||
allowedModelFamilies: getEnvWithDefault(
|
||||
"ALLOWED_MODEL_FAMILIES",
|
||||
|
||||
+31
-68
@@ -46,7 +46,7 @@ const getModelsResponse = () => {
|
||||
"claude-3-haiku-20240307",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
"claude-3-5-sonnet-20240620"
|
||||
"claude-3-5-sonnet-20240620",
|
||||
];
|
||||
|
||||
const models = claudeVariants.map((id) => ({
|
||||
@@ -70,7 +70,7 @@ const handleModelRequest: RequestHandler = (_req, res) => {
|
||||
};
|
||||
|
||||
/** Only used for non-streaming requests. */
|
||||
const anthropicResponseHandler: ProxyResHandlerWithBody = async (
|
||||
const anthropicBlockingResponseHandler: ProxyResHandlerWithBody = async (
|
||||
_proxyRes,
|
||||
req,
|
||||
res,
|
||||
@@ -179,6 +179,28 @@ export function transformAnthropicChatResponseToOpenAI(
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* If a client using the OpenAI compatibility endpoint requests an actual OpenAI
|
||||
* model, reassigns it to Claude 3 Sonnet.
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
if (!model.startsWith("gpt-")) return;
|
||||
req.body.model = "claude-3-sonnet-20240229";
|
||||
}
|
||||
|
||||
/**
|
||||
* If client requests more than 4096 output tokens the request must have a
|
||||
* particular version header.
|
||||
* https://docs.anthropic.com/en/release-notes/api#july-15th-2024
|
||||
*/
|
||||
function setAnthropicBetaHeader(req: Request) {
|
||||
const { max_tokens_to_sample } = req.body;
|
||||
if (max_tokens_to_sample > 4096) {
|
||||
req.headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
|
||||
}
|
||||
}
|
||||
|
||||
const anthropicProxy = createQueueMiddleware({
|
||||
proxyMiddleware: createProxyMiddleware({
|
||||
target: "https://api.anthropic.com",
|
||||
@@ -189,7 +211,7 @@ const anthropicProxy = createQueueMiddleware({
|
||||
proxyReq: createOnProxyReqHandler({
|
||||
pipeline: [addKey, addAnthropicPreamble, finalizeBody],
|
||||
}),
|
||||
proxyRes: createOnProxyResHandler([anthropicResponseHandler]),
|
||||
proxyRes: createOnProxyResHandler([anthropicBlockingResponseHandler]),
|
||||
error: handleProxyError,
|
||||
},
|
||||
// Abusing pathFilter to rewrite the paths dynamically.
|
||||
@@ -213,6 +235,11 @@ const anthropicProxy = createQueueMiddleware({
|
||||
}),
|
||||
});
|
||||
|
||||
const nativeAnthropicChatPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-chat", outApi: "anthropic-chat", service: "anthropic" },
|
||||
{ afterTransform: [setAnthropicBetaHeader] }
|
||||
);
|
||||
|
||||
const nativeTextPreprocessor = createPreprocessorMiddleware({
|
||||
inApi: "anthropic-text",
|
||||
outApi: "anthropic-text",
|
||||
@@ -268,11 +295,7 @@ anthropicRouter.get("/v1/models", handleModelRequest);
|
||||
anthropicRouter.post(
|
||||
"/v1/messages",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "anthropic-chat",
|
||||
outApi: "anthropic-chat",
|
||||
service: "anthropic",
|
||||
}),
|
||||
nativeAnthropicChatPreprocessor,
|
||||
anthropicProxy
|
||||
);
|
||||
// Anthropic text completion endpoint. Translates to Anthropic chat completion
|
||||
@@ -292,65 +315,5 @@ anthropicRouter.post(
|
||||
preprocessOpenAICompatRequest,
|
||||
anthropicProxy
|
||||
);
|
||||
// Temporarily force Anthropic Text to Anthropic Chat for frontends which do not
|
||||
// yet support the new model. Forces claude-3. Will be removed once common
|
||||
// frontends have been updated.
|
||||
anthropicRouter.post(
|
||||
"/v1/:type(sonnet|opus)/:action(complete|messages)",
|
||||
ipLimiter,
|
||||
handleAnthropicTextCompatRequest,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "anthropic-text",
|
||||
outApi: "anthropic-chat",
|
||||
service: "anthropic",
|
||||
}),
|
||||
anthropicProxy
|
||||
);
|
||||
|
||||
function handleAnthropicTextCompatRequest(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: any
|
||||
) {
|
||||
const type = req.params.type;
|
||||
const action = req.params.action;
|
||||
const alreadyInChatFormat = Boolean(req.body.messages);
|
||||
const compatModel = `claude-3-${type}-20240229`;
|
||||
req.log.info(
|
||||
{ type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
|
||||
"Handling Anthropic compatibility request"
|
||||
);
|
||||
|
||||
if (action === "messages" || alreadyInChatFormat) {
|
||||
return sendErrorToClient({
|
||||
req,
|
||||
res,
|
||||
options: {
|
||||
title: "Unnecessary usage of compatibility endpoint",
|
||||
message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/anthropic\` proxy endpoint instead.`,
|
||||
format: "unknown",
|
||||
statusCode: 400,
|
||||
reqId: req.id,
|
||||
obj: {
|
||||
requested_endpoint: "/anthropic/" + type,
|
||||
correct_endpoint: "/anthropic",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
req.body.model = compatModel;
|
||||
next();
|
||||
}
|
||||
|
||||
/**
|
||||
* If a client using the OpenAI compatibility endpoint requests an actual OpenAI
|
||||
* model, reassigns it to Claude 3 Sonnet.
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
if (!model.startsWith("gpt-")) return;
|
||||
req.body.model = "claude-3-sonnet-20240229";
|
||||
}
|
||||
|
||||
export const anthropic = anthropicRouter;
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
import { HPMRequestCallback } from "../index";
|
||||
import { config } from "../../../../config";
|
||||
import { ForbiddenError } from "../../../../shared/errors";
|
||||
import { getModelFamilyForRequest } from "../../../../shared/models";
|
||||
import { HPMRequestCallback } from "../index";
|
||||
|
||||
/**
|
||||
* Ensures the selected model family is enabled by the proxy configuration.
|
||||
**/
|
||||
export const checkModelFamily: HPMRequestCallback = (_proxyReq, req, res) => {
|
||||
*/
|
||||
export const checkModelFamily: HPMRequestCallback = (_proxyReq, req) => {
|
||||
const family = getModelFamilyForRequest(req);
|
||||
if (!config.allowedModelFamilies.includes(family)) {
|
||||
throw new ForbiddenError(`Model family '${family}' is not enabled on this proxy`);
|
||||
throw new ForbiddenError(
|
||||
`Model family '${family}' is not enabled on this proxy`
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -35,6 +35,7 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
|
||||
|
||||
const credential = getCredentialParts(req);
|
||||
const host = AMZ_HOST.replace("%REGION%", credential.region);
|
||||
|
||||
// AWS only uses 2023-06-01 and does not actually check this header, but we
|
||||
// set it so that the stream adapter always selects the correct transformer.
|
||||
req.headers["anthropic-version"] = "2023-06-01";
|
||||
|
||||
@@ -67,11 +67,13 @@ function applyMistralPromptFixes(req: Request): void {
|
||||
);
|
||||
|
||||
// If the prompt relies on `prefix: true` for the last message, we need to
|
||||
// convert it to a text completions request because Mistral support for
|
||||
// this feature is limited (and completely broken on AWS Mistral).
|
||||
// convert it to a text completions request because AWS Mistral support for
|
||||
// this feature is broken.
|
||||
// On Mistral La Plateforme, we can't do this because they don't expose
|
||||
// a text completions endpoint.
|
||||
const { messages } = req.body;
|
||||
const lastMessage = messages && messages[messages.length - 1];
|
||||
if (lastMessage && lastMessage.role === "assistant") {
|
||||
if (lastMessage?.role === "assistant" && req.service === "aws") {
|
||||
// enable prefix if client forgot, otherwise the template will insert an
|
||||
// eos token which is very unlikely to be what the client wants.
|
||||
lastMessage.prefix = true;
|
||||
|
||||
@@ -58,6 +58,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
modelMax = 16384;
|
||||
} else if (model.match(/^gpt-4o/)) {
|
||||
modelMax = 128000;
|
||||
} else if (model.match(/^chatgpt-4o/)) {
|
||||
modelMax = 128000;
|
||||
} else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
|
||||
modelMax = 131072;
|
||||
} else if (model.match(/gpt-4-turbo(-preview)?$/)) {
|
||||
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
createOnProxyResHandler,
|
||||
ProxyResHandlerWithBody,
|
||||
} from "./middleware/response";
|
||||
import { BadRequestError } from "../shared/errors";
|
||||
|
||||
// Mistral can't settle on a single naming scheme and deprecates models within
|
||||
// months of releasing them so this list is hard to keep up to date. 2024-07-28
|
||||
@@ -170,7 +171,12 @@ export function detectMistralInputApi(req: Request) {
|
||||
if (messages) {
|
||||
req.inboundApi = "mistral-ai";
|
||||
req.outboundApi = "mistral-ai";
|
||||
} else if (prompt) {
|
||||
} else if (prompt && req.service === "mistral-ai") {
|
||||
// Mistral La Plateforme doesn't expose a text completions endpoint.
|
||||
throw new BadRequestError(
|
||||
"Mistral (via La Plateforme API) does not support text completions. This format is only supported on Mistral via the AWS API."
|
||||
);
|
||||
} else if (prompt && req.service === "aws") {
|
||||
req.inboundApi = "mistral-text";
|
||||
req.outboundApi = "mistral-text";
|
||||
}
|
||||
|
||||
@@ -35,6 +35,8 @@ export const KNOWN_OPENAI_MODELS = [
|
||||
// GPT4o Mini
|
||||
"gpt-4o-mini",
|
||||
"gpt-4o-mini-2024-07-18",
|
||||
// GPT4o (ChatGPT)
|
||||
"chatgpt-4o-latest",
|
||||
// GPT4 Turbo (superceded by GPT4o)
|
||||
"gpt-4-turbo",
|
||||
"gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
|
||||
|
||||
@@ -45,7 +45,9 @@ const BaseMistralAIV1CompletionsSchema = z.object({
|
||||
.default([])
|
||||
.transform((v) => (Array.isArray(v) ? v : [v])),
|
||||
random_seed: z.number().int().min(0).optional(),
|
||||
response_format: z.enum(["text", "json_object"]).optional().default("text"),
|
||||
response_format: z
|
||||
.object({ type: z.enum(["text", "json_object"]) })
|
||||
.optional(),
|
||||
safe_prompt: z.boolean().optional().default(false),
|
||||
});
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ export const OpenAIV1ChatCompletionSchema = z
|
||||
content: z.union([z.string(), OpenAIV1ChatContentArraySchema]),
|
||||
name: z.string().optional(),
|
||||
tool_calls: z.array(z.any()).optional(),
|
||||
function_call: z.array(z.any()).optional(),
|
||||
function_call: z.any().optional(),
|
||||
tool_call_id: z.string().optional(),
|
||||
}),
|
||||
{
|
||||
@@ -52,7 +52,7 @@ export const OpenAIV1ChatCompletionSchema = z
|
||||
.number()
|
||||
.int()
|
||||
.nullish()
|
||||
.default(Math.min(OPENAI_OUTPUT_MAX, 4096))
|
||||
.default(Math.min(OPENAI_OUTPUT_MAX, 16384))
|
||||
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
|
||||
frequency_penalty: z.number().optional().default(0),
|
||||
presence_penalty: z.number().optional().default(0),
|
||||
|
||||
@@ -130,6 +130,7 @@ export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e"];
|
||||
|
||||
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^gpt-4o(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4o",
|
||||
"^chatgpt-4o": "gpt4o",
|
||||
"^gpt-4o-mini(-\\d{4}-\\d{2}-\\d{2})?$": "turbo", // closest match
|
||||
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
|
||||
"^gpt-4-turbo(-preview)?$": "gpt4-turbo",
|
||||
|
||||
Reference in New Issue
Block a user