adds AWS Claude Chat Completions and Claude 3 Sonnet support

2024-03-04 16:25:06 -06:00
parent 802d847cc6
commit 51ffca480a
9 changed files with 155 additions and 49 deletions
@@ -16,8 +16,10 @@ import {
  ProxyResHandlerWithBody,
  createOnProxyResHandler,
 } from "./middleware/response";
+import { transformAnthropicChatResponseToAnthropicText } from "./anthropic";

 const LATEST_AWS_V2_MINOR_VERSION = "1";
+const CLAUDE_3_COMPAT_MODEL = "anthropic.claude-3-sonnet-20240229-v1:0";

 let modelsCache: any = null;
 let modelsCacheTime = 0;
@@ -29,10 +31,11 @@ const getModelsResponse = () => {

  if (!config.awsCredentials) return { object: "list", data: [] };

+  // https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
  const variants = [
    "anthropic.claude-v2",
    "anthropic.claude-v2:1",
-    "anthropic.claude-3-sonnet-20240229-v1:0"
+    "anthropic.claude-3-sonnet-20240229-v1:0",
  ];

  const models = variants.map((id) => ({
@@ -73,7 +76,12 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (

  if (req.inboundApi === "openai") {
    req.log.info("Transforming AWS Claude response to OpenAI format");
-    body = transformAwsResponse(body, req);
+    body = transformAwsTextResponseToOpenAI(body, req);
+  }
+
+  if (req.inboundApi === "anthropic-text") {
+    req.log.info("Transforming Text AWS Claude response to Chat format");
+    body = transformAnthropicChatResponseToAnthropicText(body, req);
  }

  if (req.tokenizerInfo) {
@@ -92,7 +100,7 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (
 * is only used for non-streaming requests as streaming requests are handled
 * on-the-fly.
 */
-function transformAwsResponse(
+function transformAwsTextResponseToOpenAI(
  awsBody: Record<string, any>,
  req: Request
 ): Record<string, any> {
@@ -139,18 +147,54 @@ const awsProxy = createQueueMiddleware({
  }),
 });

+const nativeTextPreprocessor = createPreprocessorMiddleware(
+  { inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
+  { afterTransform: [maybeReassignModel] }
+);
+
+const textToChatPreprocessor = createPreprocessorMiddleware(
+  { inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
+  { afterTransform: [maybeReassignModel] }
+);
+
+/**
+ * Routes text completion prompts to aws anthropic-chat if they need translation
+ * (claude-3 based models do not support the old text completion endpoint).
+ */
+const awsTextCompletionRouter: RequestHandler = (req, res, next) => {
+  if (req.body.model?.includes("claude-3")) {
+    textToChatPreprocessor(req, res, next);
+  } else {
+    nativeTextPreprocessor(req, res, next);
+  }
+};
+
 const awsRouter = Router();
 awsRouter.get("/v1/models", handleModelRequest);
-// Native(ish) Anthropic chat completion endpoint.
+// Native(ish) Anthropic text completion endpoint.
+awsRouter.post("/v1/complete", ipLimiter, awsTextCompletionRouter, awsProxy);
+// Native Anthropic chat completion endpoint.
 awsRouter.post(
-  "/v1/complete",
+  "/v1/messages",
  ipLimiter,
  createPreprocessorMiddleware(
-    { inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
+    { inApi: "anthropic-chat", outApi: "anthropic-chat", service: "aws" },
    { afterTransform: [maybeReassignModel] }
  ),
  awsProxy
 );
+// Temporary force-Claude3 endpoint
+awsRouter.post(
+  "/v1/claude-3/complete",
+  ipLimiter,
+  createPreprocessorMiddleware(
+    { inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
+    {
+      beforeTransform: [(req) => void (req.body.model = CLAUDE_3_COMPAT_MODEL)],
+    }
+  ),
+  awsProxy
+);
 // OpenAI-to-AWS Anthropic compatibility endpoint.
 awsRouter.post(
  "/v1/chat/completions",
@@ -178,7 +222,8 @@ function maybeReassignModel(req: Request) {
    return;
  }

-  const pattern = /^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?$/i;
+  const pattern =
+    /^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?(-sonnet-?|-opus-?)(\d*)/i;
  const match = model.match(pattern);

  // If there's no match, return the latest v2 model
@@ -187,7 +232,9 @@ function maybeReassignModel(req: Request) {
    return;
  }

-  const [, , instant, , major, , minor] = match;
+  const instant = match[2];
+  const major = match[4];
+  const minor = match[6];

  if (instant) {
    req.body.model = "anthropic.claude-instant-v1";
@@ -210,6 +257,14 @@ function maybeReassignModel(req: Request) {
    return;
  }

+  // AWS currently only supports one v3 model.
+  const variant = match[8]; // sonnet or opus
+  const variantVersion = match[9];
+  if (major === "3") {
+    req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
+    return;
+  }
+
  // Fallback to latest v2 model
  req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
  return;