adds AWS Claude Chat Completions and Claude 3 Sonnet support

This commit is contained in:
nai-degen
2024-03-04 16:25:06 -06:00
parent 802d847cc6
commit 51ffca480a
9 changed files with 155 additions and 49 deletions
+63 -8
View File
@@ -16,8 +16,10 @@ import {
ProxyResHandlerWithBody,
createOnProxyResHandler,
} from "./middleware/response";
import { transformAnthropicChatResponseToAnthropicText } from "./anthropic";
const LATEST_AWS_V2_MINOR_VERSION = "1";
const CLAUDE_3_COMPAT_MODEL = "anthropic.claude-3-sonnet-20240229-v1:0";
let modelsCache: any = null;
let modelsCacheTime = 0;
@@ -29,10 +31,11 @@ const getModelsResponse = () => {
if (!config.awsCredentials) return { object: "list", data: [] };
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
const variants = [
"anthropic.claude-v2",
"anthropic.claude-v2:1",
"anthropic.claude-3-sonnet-20240229-v1:0"
"anthropic.claude-3-sonnet-20240229-v1:0",
];
const models = variants.map((id) => ({
@@ -73,7 +76,12 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (
if (req.inboundApi === "openai") {
req.log.info("Transforming AWS Claude response to OpenAI format");
body = transformAwsResponse(body, req);
body = transformAwsTextResponseToOpenAI(body, req);
}
if (req.inboundApi === "anthropic-text") {
req.log.info("Transforming Text AWS Claude response to Chat format");
body = transformAnthropicChatResponseToAnthropicText(body, req);
}
if (req.tokenizerInfo) {
@@ -92,7 +100,7 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (
* is only used for non-streaming requests as streaming requests are handled
* on-the-fly.
*/
function transformAwsResponse(
function transformAwsTextResponseToOpenAI(
awsBody: Record<string, any>,
req: Request
): Record<string, any> {
@@ -139,18 +147,54 @@ const awsProxy = createQueueMiddleware({
}),
});
const nativeTextPreprocessor = createPreprocessorMiddleware(
{ inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
{ afterTransform: [maybeReassignModel] }
);
const textToChatPreprocessor = createPreprocessorMiddleware(
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
{ afterTransform: [maybeReassignModel] }
);
/**
* Routes text completion prompts to aws anthropic-chat if they need translation
* (claude-3 based models do not support the old text completion endpoint).
*/
const awsTextCompletionRouter: RequestHandler = (req, res, next) => {
if (req.body.model?.includes("claude-3")) {
textToChatPreprocessor(req, res, next);
} else {
nativeTextPreprocessor(req, res, next);
}
};
const awsRouter = Router();
awsRouter.get("/v1/models", handleModelRequest);
// Native(ish) Anthropic chat completion endpoint.
// Native(ish) Anthropic text completion endpoint.
awsRouter.post("/v1/complete", ipLimiter, awsTextCompletionRouter, awsProxy);
// Native Anthropic chat completion endpoint.
awsRouter.post(
"/v1/complete",
"/v1/messages",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
{ inApi: "anthropic-chat", outApi: "anthropic-chat", service: "aws" },
{ afterTransform: [maybeReassignModel] }
),
awsProxy
);
// Temporary force-Claude3 endpoint
awsRouter.post(
"/v1/claude-3/complete",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
{
beforeTransform: [(req) => void (req.body.model = CLAUDE_3_COMPAT_MODEL)],
}
),
awsProxy
);
// OpenAI-to-AWS Anthropic compatibility endpoint.
awsRouter.post(
"/v1/chat/completions",
@@ -178,7 +222,8 @@ function maybeReassignModel(req: Request) {
return;
}
const pattern = /^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?$/i;
const pattern =
/^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?(-sonnet-?|-opus-?)(\d*)/i;
const match = model.match(pattern);
// If there's no match, return the latest v2 model
@@ -187,7 +232,9 @@ function maybeReassignModel(req: Request) {
return;
}
const [, , instant, , major, , minor] = match;
const instant = match[2];
const major = match[4];
const minor = match[6];
if (instant) {
req.body.model = "anthropic.claude-instant-v1";
@@ -210,6 +257,14 @@ function maybeReassignModel(req: Request) {
return;
}
// AWS currently only supports one v3 model.
const variant = match[8]; // sonnet or opus
const variantVersion = match[9];
if (major === "3") {
req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
return;
}
// Fallback to latest v2 model
req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
return;