refactors aws endpoint router to split claude/mistral
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
import { NextFunction, Request, Response } from "express";
|
||||
|
||||
export function addV1(req: Request, res: Response, next: NextFunction) {
|
||||
// Clients don't consistently use the /v1 prefix so we'll add it for them.
|
||||
if (!req.path.startsWith("/v1/") && !req.path.startsWith("/v1beta/")) {
|
||||
req.url = `/v1${req.url}`;
|
||||
}
|
||||
next();
|
||||
}
|
||||
@@ -0,0 +1,246 @@
|
||||
import { Request, RequestHandler, Router } from "express";
|
||||
import { createProxyMiddleware } from "http-proxy-middleware";
|
||||
import { v4 } from "uuid";
|
||||
import { config } from "../config";
|
||||
import { logger } from "../logger";
|
||||
import { createQueueMiddleware } from "./queue";
|
||||
import { ipLimiter } from "./rate-limit";
|
||||
import { handleProxyError } from "./middleware/common";
|
||||
import {
|
||||
createPreprocessorMiddleware,
|
||||
signAwsRequest,
|
||||
finalizeSignedRequest,
|
||||
createOnProxyReqHandler,
|
||||
} from "./middleware/request";
|
||||
import {
|
||||
ProxyResHandlerWithBody,
|
||||
createOnProxyResHandler,
|
||||
} from "./middleware/response";
|
||||
import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatResponseToOpenAI } from "./anthropic";
|
||||
|
||||
/** Only used for non-streaming requests. */
|
||||
const awsResponseHandler: ProxyResHandlerWithBody = async (
|
||||
_proxyRes,
|
||||
req,
|
||||
res,
|
||||
body
|
||||
) => {
|
||||
if (typeof body !== "object") {
|
||||
throw new Error("Expected body to be an object");
|
||||
}
|
||||
|
||||
let newBody = body;
|
||||
switch (`${req.inboundApi}<-${req.outboundApi}`) {
|
||||
case "openai<-anthropic-text":
|
||||
req.log.info("Transforming Anthropic Text back to OpenAI format");
|
||||
newBody = transformAwsTextResponseToOpenAI(body, req);
|
||||
break;
|
||||
case "openai<-anthropic-chat":
|
||||
req.log.info("Transforming AWS Anthropic Chat back to OpenAI format");
|
||||
newBody = transformAnthropicChatResponseToOpenAI(body);
|
||||
break;
|
||||
case "anthropic-text<-anthropic-chat":
|
||||
req.log.info("Transforming AWS Anthropic Chat back to Text format");
|
||||
newBody = transformAnthropicChatResponseToAnthropicText(body);
|
||||
break;
|
||||
}
|
||||
|
||||
// AWS does not always confirm the model in the response, so we have to add it
|
||||
if (!newBody.model && req.body.model) {
|
||||
newBody.model = req.body.model;
|
||||
}
|
||||
|
||||
res.status(200).json({ ...newBody, proxy: body.proxy });
|
||||
};
|
||||
|
||||
/**
|
||||
* Transforms a model response from the Anthropic API to match those from the
|
||||
* OpenAI API, for users using Claude via the OpenAI-compatible endpoint. This
|
||||
* is only used for non-streaming requests as streaming requests are handled
|
||||
* on-the-fly.
|
||||
*/
|
||||
function transformAwsTextResponseToOpenAI(
|
||||
awsBody: Record<string, any>,
|
||||
req: Request
|
||||
): Record<string, any> {
|
||||
const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
|
||||
return {
|
||||
id: "aws-" + v4(),
|
||||
object: "chat.completion",
|
||||
created: Date.now(),
|
||||
model: req.body.model,
|
||||
usage: {
|
||||
prompt_tokens: req.promptTokens,
|
||||
completion_tokens: req.outputTokens,
|
||||
total_tokens: totalTokens,
|
||||
},
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: awsBody.completion?.trim(),
|
||||
},
|
||||
finish_reason: awsBody.stop_reason,
|
||||
index: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
const awsProxy = createQueueMiddleware({
|
||||
beforeProxy: signAwsRequest,
|
||||
proxyMiddleware: createProxyMiddleware({
|
||||
target: "bad-target-will-be-rewritten",
|
||||
router: ({ signedRequest }) => {
|
||||
if (!signedRequest) throw new Error("Must sign request before proxying");
|
||||
return `${signedRequest.protocol}//${signedRequest.hostname}`;
|
||||
},
|
||||
changeOrigin: true,
|
||||
selfHandleResponse: true,
|
||||
logger,
|
||||
on: {
|
||||
proxyReq: createOnProxyReqHandler({ pipeline: [finalizeSignedRequest] }),
|
||||
proxyRes: createOnProxyResHandler([awsResponseHandler]),
|
||||
error: handleProxyError,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
const nativeTextPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
const textToChatPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
/**
|
||||
* Routes text completion prompts to aws anthropic-chat if they need translation
|
||||
* (claude-3 based models do not support the old text completion endpoint).
|
||||
*/
|
||||
const preprocessAwsTextRequest: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.includes("claude-3")) {
|
||||
textToChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
nativeTextPreprocessor(req, res, next);
|
||||
}
|
||||
};
|
||||
|
||||
const oaiToAwsTextPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "openai", outApi: "anthropic-text", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
const oaiToAwsChatPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "openai", outApi: "anthropic-chat", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
/**
|
||||
* Routes an OpenAI prompt to either the legacy Claude text completion endpoint
|
||||
* or the new Claude chat completion endpoint, based on the requested model.
|
||||
*/
|
||||
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.includes("claude-3")) {
|
||||
oaiToAwsChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
oaiToAwsTextPreprocessor(req, res, next);
|
||||
}
|
||||
};
|
||||
|
||||
const awsClaudeRouter = Router();
|
||||
// Native(ish) Anthropic text completion endpoint.
|
||||
awsClaudeRouter.post("/v1/complete", ipLimiter, preprocessAwsTextRequest, awsProxy);
|
||||
// Native Anthropic chat completion endpoint.
|
||||
awsClaudeRouter.post(
|
||||
"/v1/messages",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-chat", outApi: "anthropic-chat", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
),
|
||||
awsProxy
|
||||
);
|
||||
|
||||
// OpenAI-to-AWS Anthropic compatibility endpoint.
|
||||
awsClaudeRouter.post(
|
||||
"/v1/chat/completions",
|
||||
ipLimiter,
|
||||
preprocessOpenAICompatRequest,
|
||||
awsProxy
|
||||
);
|
||||
|
||||
/**
|
||||
* Tries to deal with:
|
||||
* - frontends sending AWS model names even when they want to use the OpenAI-
|
||||
* compatible endpoint
|
||||
* - frontends sending Anthropic model names that AWS doesn't recognize
|
||||
* - frontends sending OpenAI model names because they expect the proxy to
|
||||
* translate them
|
||||
*
|
||||
* If client sends AWS model ID it will be used verbatim. Otherwise, various
|
||||
* strategies are used to try to map a non-AWS model name to AWS model ID.
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
|
||||
// If it looks like an AWS model, use it as-is
|
||||
if (model.includes("anthropic.claude")) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Anthropic model names can look like:
|
||||
// - claude-v1
|
||||
// - claude-2.1
|
||||
// - claude-3-5-sonnet-20240620-v1:0
|
||||
const pattern =
|
||||
/^(claude-)?(instant-)?(v)?(\d+)([.-](\d))?(-\d+k)?(-sonnet-|-opus-|-haiku-)?(\d*)/i;
|
||||
const match = model.match(pattern);
|
||||
|
||||
// If there's no match, fallback to Claude v2 as it is most likely to be
|
||||
// available on AWS.
|
||||
if (!match) {
|
||||
req.body.model = `anthropic.claude-v2:1`;
|
||||
return;
|
||||
}
|
||||
|
||||
const [_, _cl, instant, _v, major, _sep, minor, _ctx, name, _rev] = match;
|
||||
|
||||
if (instant) {
|
||||
req.body.model = "anthropic.claude-instant-v1";
|
||||
return;
|
||||
}
|
||||
|
||||
const ver = minor ? `${major}.${minor}` : major;
|
||||
switch (ver) {
|
||||
case "1":
|
||||
case "1.0":
|
||||
req.body.model = "anthropic.claude-v1";
|
||||
return;
|
||||
case "2":
|
||||
case "2.0":
|
||||
req.body.model = "anthropic.claude-v2";
|
||||
return;
|
||||
case "3":
|
||||
case "3.0":
|
||||
if (name.includes("opus")) {
|
||||
req.body.model = "anthropic.claude-3-opus-20240229-v1:0";
|
||||
} else if (name.includes("haiku")) {
|
||||
req.body.model = "anthropic.claude-3-haiku-20240307-v1:0";
|
||||
} else {
|
||||
req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
|
||||
}
|
||||
return;
|
||||
case "3.5":
|
||||
req.body.model = "anthropic.claude-3-5-sonnet-20240620-v1:0";
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback to Claude 2.1
|
||||
req.body.model = `anthropic.claude-v2:1`;
|
||||
return;
|
||||
}
|
||||
|
||||
export const awsClaude = awsClaudeRouter;
|
||||
+36
-317
@@ -1,337 +1,56 @@
|
||||
import { Request, RequestHandler, Response, Router } from "express";
|
||||
import { createProxyMiddleware } from "http-proxy-middleware";
|
||||
import { v4 } from "uuid";
|
||||
/* Shared code between AWS Claude and AWS Mistral endpoints. */
|
||||
|
||||
import { Request, Response, Router } from "express";
|
||||
import { config } from "../config";
|
||||
import { logger } from "../logger";
|
||||
import { createQueueMiddleware } from "./queue";
|
||||
import { ipLimiter } from "./rate-limit";
|
||||
import { handleProxyError } from "./middleware/common";
|
||||
import {
|
||||
createPreprocessorMiddleware,
|
||||
signAwsRequest,
|
||||
finalizeSignedRequest,
|
||||
createOnProxyReqHandler,
|
||||
} from "./middleware/request";
|
||||
import {
|
||||
ProxyResHandlerWithBody,
|
||||
createOnProxyResHandler,
|
||||
} from "./middleware/response";
|
||||
import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatResponseToOpenAI } from "./anthropic";
|
||||
import { sendErrorToClient } from "./middleware/response/error-generator";
|
||||
import { awsClaude } from "./aws-claude";
|
||||
import { addV1 } from "./add-v1";
|
||||
|
||||
const LATEST_AWS_V2_MINOR_VERSION = "1";
|
||||
const awsRouter = Router();
|
||||
awsRouter.use("/claude", addV1, awsClaude);
|
||||
// awsRouter.use("/mistral", addV1, awsMistralRouter);
|
||||
awsRouter.get("/:vendor?/models", handleModelsRequest);
|
||||
|
||||
const MODELS_CACHE_TTL = 10000;
|
||||
let modelsCache: any = null;
|
||||
let modelsCacheTime = 0;
|
||||
|
||||
const getModelsResponse = () => {
|
||||
if (new Date().getTime() - modelsCacheTime < 1000 * 60) {
|
||||
return modelsCache;
|
||||
function handleModelsRequest(req: Request, res: Response) {
|
||||
if (!config.awsCredentials) return { object: "list", data: [] };
|
||||
if (new Date().getTime() - modelsCacheTime < MODELS_CACHE_TTL) {
|
||||
return res.json(modelsCache);
|
||||
}
|
||||
|
||||
if (!config.awsCredentials) return { object: "list", data: [] };
|
||||
|
||||
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
|
||||
const variants = [
|
||||
const models = [
|
||||
"anthropic.claude-v2",
|
||||
"anthropic.claude-v2:1",
|
||||
"anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
"anthropic.claude-3-opus-20240229-v1:0",
|
||||
];
|
||||
"mistral.mistral-7b-instruct-v0:2",
|
||||
"mistral.mixtral-8x7b-instruct-v0:1",
|
||||
"mistral.mistral-large-2402-v1:0",
|
||||
"mistral.mistral-large-2407-v1:0",
|
||||
"mistral.mistral-small-2402-v1:0",
|
||||
].map((id) => {
|
||||
const vendor = id.match(/^(.*)\./)?.[1];
|
||||
return {
|
||||
id,
|
||||
object: "model",
|
||||
created: new Date().getTime(),
|
||||
owned_by: vendor,
|
||||
permission: [],
|
||||
root: vendor,
|
||||
parent: null,
|
||||
};
|
||||
});
|
||||
|
||||
const models = variants.map((id) => ({
|
||||
id,
|
||||
object: "model",
|
||||
created: new Date().getTime(),
|
||||
owned_by: "anthropic",
|
||||
permission: [],
|
||||
root: "claude",
|
||||
parent: null,
|
||||
}));
|
||||
|
||||
modelsCache = { object: "list", data: models };
|
||||
const requestedVendor = req.params.vendor;
|
||||
const vendor = requestedVendor === "claude" ? "anthropic" : requestedVendor;
|
||||
modelsCache = { object: "list", data: models.filter((m) => m.root === vendor) };
|
||||
modelsCacheTime = new Date().getTime();
|
||||
|
||||
return modelsCache;
|
||||
};
|
||||
|
||||
const handleModelRequest: RequestHandler = (_req, res) => {
|
||||
res.status(200).json(getModelsResponse());
|
||||
};
|
||||
|
||||
/** Only used for non-streaming requests. */
|
||||
const awsResponseHandler: ProxyResHandlerWithBody = async (
|
||||
_proxyRes,
|
||||
req,
|
||||
res,
|
||||
body
|
||||
) => {
|
||||
if (typeof body !== "object") {
|
||||
throw new Error("Expected body to be an object");
|
||||
}
|
||||
|
||||
let newBody = body;
|
||||
switch (`${req.inboundApi}<-${req.outboundApi}`) {
|
||||
case "openai<-anthropic-text":
|
||||
req.log.info("Transforming Anthropic Text back to OpenAI format");
|
||||
newBody = transformAwsTextResponseToOpenAI(body, req);
|
||||
break;
|
||||
case "openai<-anthropic-chat":
|
||||
req.log.info("Transforming AWS Anthropic Chat back to OpenAI format");
|
||||
newBody = transformAnthropicChatResponseToOpenAI(body);
|
||||
break;
|
||||
case "anthropic-text<-anthropic-chat":
|
||||
req.log.info("Transforming AWS Anthropic Chat back to Text format");
|
||||
newBody = transformAnthropicChatResponseToAnthropicText(body);
|
||||
break;
|
||||
}
|
||||
|
||||
// AWS does not always confirm the model in the response, so we have to add it
|
||||
if (!newBody.model && req.body.model) {
|
||||
newBody.model = req.body.model;
|
||||
}
|
||||
|
||||
res.status(200).json({ ...newBody, proxy: body.proxy });
|
||||
};
|
||||
|
||||
/**
|
||||
* Transforms a model response from the Anthropic API to match those from the
|
||||
* OpenAI API, for users using Claude via the OpenAI-compatible endpoint. This
|
||||
* is only used for non-streaming requests as streaming requests are handled
|
||||
* on-the-fly.
|
||||
*/
|
||||
function transformAwsTextResponseToOpenAI(
|
||||
awsBody: Record<string, any>,
|
||||
req: Request
|
||||
): Record<string, any> {
|
||||
const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
|
||||
return {
|
||||
id: "aws-" + v4(),
|
||||
object: "chat.completion",
|
||||
created: Date.now(),
|
||||
model: req.body.model,
|
||||
usage: {
|
||||
prompt_tokens: req.promptTokens,
|
||||
completion_tokens: req.outputTokens,
|
||||
total_tokens: totalTokens,
|
||||
},
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: awsBody.completion?.trim(),
|
||||
},
|
||||
finish_reason: awsBody.stop_reason,
|
||||
index: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
const awsProxy = createQueueMiddleware({
|
||||
beforeProxy: signAwsRequest,
|
||||
proxyMiddleware: createProxyMiddleware({
|
||||
target: "bad-target-will-be-rewritten",
|
||||
router: ({ signedRequest }) => {
|
||||
if (!signedRequest) throw new Error("Must sign request before proxying");
|
||||
return `${signedRequest.protocol}//${signedRequest.hostname}`;
|
||||
},
|
||||
changeOrigin: true,
|
||||
selfHandleResponse: true,
|
||||
logger,
|
||||
on: {
|
||||
proxyReq: createOnProxyReqHandler({ pipeline: [finalizeSignedRequest] }),
|
||||
proxyRes: createOnProxyResHandler([awsResponseHandler]),
|
||||
error: handleProxyError,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
const nativeTextPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
const textToChatPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
/**
|
||||
* Routes text completion prompts to aws anthropic-chat if they need translation
|
||||
* (claude-3 based models do not support the old text completion endpoint).
|
||||
*/
|
||||
const preprocessAwsTextRequest: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.includes("claude-3")) {
|
||||
textToChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
nativeTextPreprocessor(req, res, next);
|
||||
}
|
||||
};
|
||||
|
||||
const oaiToAwsTextPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "openai", outApi: "anthropic-text", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
const oaiToAwsChatPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "openai", outApi: "anthropic-chat", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
/**
|
||||
* Routes an OpenAI prompt to either the legacy Claude text completion endpoint
|
||||
* or the new Claude chat completion endpoint, based on the requested model.
|
||||
*/
|
||||
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.includes("claude-3")) {
|
||||
oaiToAwsChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
oaiToAwsTextPreprocessor(req, res, next);
|
||||
}
|
||||
};
|
||||
|
||||
const awsRouter = Router();
|
||||
awsRouter.get("/v1/models", handleModelRequest);
|
||||
// Native(ish) Anthropic text completion endpoint.
|
||||
awsRouter.post("/v1/complete", ipLimiter, preprocessAwsTextRequest, awsProxy);
|
||||
// Native Anthropic chat completion endpoint.
|
||||
awsRouter.post(
|
||||
"/v1/messages",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-chat", outApi: "anthropic-chat", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
),
|
||||
awsProxy
|
||||
);
|
||||
// Temporary force-Claude3 endpoint
|
||||
awsRouter.post(
|
||||
"/v1/sonnet/:action(complete|messages)",
|
||||
ipLimiter,
|
||||
handleCompatibilityRequest,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "anthropic-text",
|
||||
outApi: "anthropic-chat",
|
||||
service: "aws",
|
||||
}),
|
||||
awsProxy
|
||||
);
|
||||
|
||||
// OpenAI-to-AWS Anthropic compatibility endpoint.
|
||||
awsRouter.post(
|
||||
"/v1/chat/completions",
|
||||
ipLimiter,
|
||||
preprocessOpenAICompatRequest,
|
||||
awsProxy
|
||||
);
|
||||
|
||||
/**
|
||||
* Tries to deal with:
|
||||
* - frontends sending AWS model names even when they want to use the OpenAI-
|
||||
* compatible endpoint
|
||||
* - frontends sending Anthropic model names that AWS doesn't recognize
|
||||
* - frontends sending OpenAI model names because they expect the proxy to
|
||||
* translate them
|
||||
*
|
||||
* If client sends AWS model ID it will be used verbatim. Otherwise, various
|
||||
* strategies are used to try to map a non-AWS model name to AWS model ID.
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
|
||||
// If it looks like an AWS model, use it as-is
|
||||
if (model.includes("anthropic.claude")) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Anthropic model names can look like:
|
||||
// - claude-v1
|
||||
// - claude-2.1
|
||||
// - claude-3-5-sonnet-20240620-v1:0
|
||||
const pattern =
|
||||
/^(claude-)?(instant-)?(v)?(\d+)([.-](\d{1}))?(-\d+k)?(-sonnet-|-opus-|-haiku-)?(\d*)/i;
|
||||
const match = model.match(pattern);
|
||||
|
||||
// If there's no match, fallback to Claude v2 as it is most likely to be
|
||||
// available on AWS.
|
||||
if (!match) {
|
||||
req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
|
||||
return;
|
||||
}
|
||||
|
||||
const [_, _cl, instant, _v, major, _sep, minor, _ctx, name, _rev] = match;
|
||||
|
||||
if (instant) {
|
||||
req.body.model = "anthropic.claude-instant-v1";
|
||||
return;
|
||||
}
|
||||
|
||||
const ver = minor ? `${major}.${minor}` : major;
|
||||
switch (ver) {
|
||||
case "1":
|
||||
case "1.0":
|
||||
req.body.model = "anthropic.claude-v1";
|
||||
return;
|
||||
case "2":
|
||||
case "2.0":
|
||||
req.body.model = "anthropic.claude-v2";
|
||||
return;
|
||||
case "3":
|
||||
case "3.0":
|
||||
if (name.includes("opus")) {
|
||||
req.body.model = "anthropic.claude-3-opus-20240229-v1:0";
|
||||
} else if (name.includes("haiku")) {
|
||||
req.body.model = "anthropic.claude-3-haiku-20240307-v1:0";
|
||||
} else {
|
||||
req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
|
||||
}
|
||||
return;
|
||||
case "3.5":
|
||||
req.body.model = "anthropic.claude-3-5-sonnet-20240620-v1:0";
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback to Claude 2.1
|
||||
req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
|
||||
return;
|
||||
}
|
||||
|
||||
export function handleCompatibilityRequest(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: any
|
||||
) {
|
||||
const action = req.params.action;
|
||||
const alreadyInChatFormat = Boolean(req.body.messages);
|
||||
const compatModel = "anthropic.claude-3-5-sonnet-20240620-v1:0";
|
||||
req.log.info(
|
||||
{ inputModel: req.body.model, compatModel, alreadyInChatFormat },
|
||||
"Handling AWS compatibility request"
|
||||
);
|
||||
|
||||
if (action === "messages" || alreadyInChatFormat) {
|
||||
return sendErrorToClient({
|
||||
req,
|
||||
res,
|
||||
options: {
|
||||
title: "Unnecessary usage of compatibility endpoint",
|
||||
message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/aws/claude\` proxy endpoint instead.`,
|
||||
format: "unknown",
|
||||
statusCode: 400,
|
||||
reqId: req.id,
|
||||
obj: {
|
||||
requested_endpoint: "/aws/claude/sonnet",
|
||||
correct_endpoint: "/aws/claude",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
req.body.model = compatModel;
|
||||
next();
|
||||
return res.json(modelsCache);
|
||||
}
|
||||
|
||||
export const aws = awsRouter;
|
||||
|
||||
+24
-20
@@ -1,44 +1,55 @@
|
||||
import express, { Request, Response, NextFunction } from "express";
|
||||
import { gatekeeper } from "./gatekeeper";
|
||||
import { checkRisuToken } from "./check-risu-token";
|
||||
import { openai } from "./openai";
|
||||
import { openaiImage } from "./openai-image";
|
||||
import express from "express";
|
||||
import { addV1 } from "./add-v1";
|
||||
import { anthropic } from "./anthropic";
|
||||
import { aws } from "./aws";
|
||||
import { azure } from "./azure";
|
||||
import { checkRisuToken } from "./check-risu-token";
|
||||
import { gatekeeper } from "./gatekeeper";
|
||||
import { gcp } from "./gcp";
|
||||
import { googleAI } from "./google-ai";
|
||||
import { mistralAI } from "./mistral-ai";
|
||||
import { aws } from "./aws";
|
||||
import { gcp } from "./gcp";
|
||||
import { azure } from "./azure";
|
||||
import { openai } from "./openai";
|
||||
import { openaiImage } from "./openai-image";
|
||||
import { sendErrorToClient } from "./middleware/response/error-generator";
|
||||
|
||||
const proxyRouter = express.Router();
|
||||
|
||||
// Remove `expect: 100-continue` header from requests due to incompatibility
|
||||
// with node-http-proxy.
|
||||
proxyRouter.use((req, _res, next) => {
|
||||
if (req.headers.expect) {
|
||||
// node-http-proxy does not like it when clients send `expect: 100-continue`
|
||||
// and will stall. none of the upstream APIs use this header anyway.
|
||||
delete req.headers.expect;
|
||||
}
|
||||
next();
|
||||
});
|
||||
|
||||
// Apply body parsers.
|
||||
proxyRouter.use(
|
||||
express.json({ limit: "100mb" }),
|
||||
express.urlencoded({ extended: true, limit: "100mb" })
|
||||
);
|
||||
|
||||
// Apply auth/rate limits.
|
||||
proxyRouter.use(gatekeeper);
|
||||
proxyRouter.use(checkRisuToken);
|
||||
|
||||
// Initialize request queue metadata.
|
||||
proxyRouter.use((req, _res, next) => {
|
||||
req.startTime = Date.now();
|
||||
req.retryCount = 0;
|
||||
next();
|
||||
});
|
||||
|
||||
// Proxy endpoints.
|
||||
proxyRouter.use("/openai", addV1, openai);
|
||||
proxyRouter.use("/openai-image", addV1, openaiImage);
|
||||
proxyRouter.use("/anthropic", addV1, anthropic);
|
||||
proxyRouter.use("/google-ai", addV1, googleAI);
|
||||
proxyRouter.use("/mistral-ai", addV1, mistralAI);
|
||||
proxyRouter.use("/aws/claude", addV1, aws);
|
||||
proxyRouter.use("/aws", aws);
|
||||
proxyRouter.use("/gcp/claude", addV1, gcp);
|
||||
proxyRouter.use("/azure/openai", addV1, azure);
|
||||
|
||||
// Redirect browser requests to the homepage.
|
||||
proxyRouter.get("*", (req, res, next) => {
|
||||
const isBrowser = req.headers["user-agent"]?.includes("Mozilla");
|
||||
@@ -48,7 +59,8 @@ proxyRouter.get("*", (req, res, next) => {
|
||||
next();
|
||||
}
|
||||
});
|
||||
// Handle 404s.
|
||||
|
||||
// Send a fake client error if user specifies an invalid proxy endpoint.
|
||||
proxyRouter.use((req, res) => {
|
||||
sendErrorToClient({
|
||||
req,
|
||||
@@ -69,11 +81,3 @@ proxyRouter.use((req, res) => {
|
||||
});
|
||||
|
||||
export { proxyRouter as proxyRouter };
|
||||
|
||||
function addV1(req: Request, res: Response, next: NextFunction) {
|
||||
// Clients don't consistently use the /v1 prefix so we'll add it for them.
|
||||
if (!req.path.startsWith("/v1/") && !req.path.startsWith("/v1beta/")) {
|
||||
req.url = `/v1${req.url}`;
|
||||
}
|
||||
next();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user