adds AWS Claude Chat Completions and Claude 3 Sonnet support

2024-03-04 16:25:06 -06:00
parent 802d847cc6
commit 51ffca480a
9 changed files with 155 additions and 49 deletions
@@ -105,7 +105,7 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
  res.status(200).json(body);
 };

-function transformAnthropicChatResponseToAnthropicText(
+export function transformAnthropicChatResponseToAnthropicText(
  anthropicBody: Record<string, any>,
  req: Request
 ): Record<string, any> {
@@ -16,8 +16,10 @@ import {
  ProxyResHandlerWithBody,
  createOnProxyResHandler,
 } from "./middleware/response";
+import { transformAnthropicChatResponseToAnthropicText } from "./anthropic";

 const LATEST_AWS_V2_MINOR_VERSION = "1";
+const CLAUDE_3_COMPAT_MODEL = "anthropic.claude-3-sonnet-20240229-v1:0";

 let modelsCache: any = null;
 let modelsCacheTime = 0;
@@ -29,10 +31,11 @@ const getModelsResponse = () => {

  if (!config.awsCredentials) return { object: "list", data: [] };

+  // https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
  const variants = [
    "anthropic.claude-v2",
    "anthropic.claude-v2:1",
-    "anthropic.claude-3-sonnet-20240229-v1:0"
+    "anthropic.claude-3-sonnet-20240229-v1:0",
  ];

  const models = variants.map((id) => ({
@@ -73,7 +76,12 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (

  if (req.inboundApi === "openai") {
    req.log.info("Transforming AWS Claude response to OpenAI format");
-    body = transformAwsResponse(body, req);
+    body = transformAwsTextResponseToOpenAI(body, req);
+  }
+
+  if (req.inboundApi === "anthropic-text") {
+    req.log.info("Transforming Text AWS Claude response to Chat format");
+    body = transformAnthropicChatResponseToAnthropicText(body, req);
  }

  if (req.tokenizerInfo) {
@@ -92,7 +100,7 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (
 * is only used for non-streaming requests as streaming requests are handled
 * on-the-fly.
 */
-function transformAwsResponse(
+function transformAwsTextResponseToOpenAI(
  awsBody: Record<string, any>,
  req: Request
 ): Record<string, any> {
@@ -139,18 +147,54 @@ const awsProxy = createQueueMiddleware({
  }),
 });

+const nativeTextPreprocessor = createPreprocessorMiddleware(
+  { inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
+  { afterTransform: [maybeReassignModel] }
+);
+
+const textToChatPreprocessor = createPreprocessorMiddleware(
+  { inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
+  { afterTransform: [maybeReassignModel] }
+);
+
+/**
+ * Routes text completion prompts to aws anthropic-chat if they need translation
+ * (claude-3 based models do not support the old text completion endpoint).
+ */
+const awsTextCompletionRouter: RequestHandler = (req, res, next) => {
+  if (req.body.model?.includes("claude-3")) {
+    textToChatPreprocessor(req, res, next);
+  } else {
+    nativeTextPreprocessor(req, res, next);
+  }
+};
+
 const awsRouter = Router();
 awsRouter.get("/v1/models", handleModelRequest);
-// Native(ish) Anthropic chat completion endpoint.
+// Native(ish) Anthropic text completion endpoint.
+awsRouter.post("/v1/complete", ipLimiter, awsTextCompletionRouter, awsProxy);
+// Native Anthropic chat completion endpoint.
 awsRouter.post(
-  "/v1/complete",
+  "/v1/messages",
  ipLimiter,
  createPreprocessorMiddleware(
-    { inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
+    { inApi: "anthropic-chat", outApi: "anthropic-chat", service: "aws" },
    { afterTransform: [maybeReassignModel] }
  ),
  awsProxy
 );
+// Temporary force-Claude3 endpoint
+awsRouter.post(
+  "/v1/claude-3/complete",
+  ipLimiter,
+  createPreprocessorMiddleware(
+    { inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
+    {
+      beforeTransform: [(req) => void (req.body.model = CLAUDE_3_COMPAT_MODEL)],
+    }
+  ),
+  awsProxy
+);
 // OpenAI-to-AWS Anthropic compatibility endpoint.
 awsRouter.post(
  "/v1/chat/completions",
@@ -178,7 +222,8 @@ function maybeReassignModel(req: Request) {
    return;
  }

-  const pattern = /^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?$/i;
+  const pattern =
+    /^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?(-sonnet-?|-opus-?)(\d*)/i;
  const match = model.match(pattern);

  // If there's no match, return the latest v2 model
@@ -187,7 +232,9 @@ function maybeReassignModel(req: Request) {
    return;
  }

-  const [, , instant, , major, , minor] = match;
+  const instant = match[2];
+  const major = match[4];
+  const minor = match[6];

  if (instant) {
    req.body.model = "anthropic.claude-instant-v1";
@@ -210,6 +257,14 @@ function maybeReassignModel(req: Request) {
    return;
  }

+  // AWS currently only supports one v3 model.
+  const variant = match[8]; // sonnet or opus
+  const variantVersion = match[9];
+  if (major === "3") {
+    req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
+    return;
+  }
+
  // Fallback to latest v2 model
  req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
  return;
@@ -15,15 +15,19 @@ const AMZ_HOST =
 /**
 * Signs an outgoing AWS request with the appropriate headers modifies the
 * request object in place to fix the path.
+ * This happens AFTER request transformation.
 */
 export const signAwsRequest: RequestPreprocessor = async (req) => {
-  req.key = keyPool.get("anthropic.claude-v2", "aws");
-
  const { model, stream } = req.body;
+  req.key = keyPool.get(model, "aws");
+
  req.isStreaming = stream === true || stream === "true";

-  let preamble = req.body.prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
-  req.body.prompt = preamble + req.body.prompt;
+  // same as addAnthropicPreamble for non-AWS requests, but has to happen here
+  if (req.outboundApi === "anthropic-text") {
+    let preamble = req.body.prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
+    req.body.prompt = preamble + req.body.prompt;
+  }

  // AWS uses mostly the same parameters as Anthropic, with a few removed params
  // and much stricter validation on unused parameters. Rather than treating it
@@ -31,28 +35,27 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
  // parameters.
  // TODO: This should happen in transform-outbound-payload.ts
  let strippedParams: Record<string, unknown>;
-  if (req.inboundApi === "anthropic-chat") {
-    strippedParams = AnthropicV1MessagesSchema
-      .pick({
-        messages: true,
-        max_tokens: true,
-        stop_sequences: true,
-        temperature: true,
-        top_k: true,
-        top_p: true,
-      })
+  if (req.outboundApi === "anthropic-chat") {
+    strippedParams = AnthropicV1MessagesSchema.pick({
+      messages: true,
+      max_tokens: true,
+      stop_sequences: true,
+      temperature: true,
+      top_k: true,
+      top_p: true,
+    })
      .strip()
      .parse(req.body);
+    strippedParams.anthropic_version = "bedrock-2023-05-31";
  } else {
-    strippedParams = AnthropicV1TextSchema
-      .pick({
-        prompt: true,
-        max_tokens_to_sample: true,
-        stop_sequences: true,
-        temperature: true,
-        top_k: true,
-        top_p: true,
-      })
+    strippedParams = AnthropicV1TextSchema.pick({
+      prompt: true,
+      max_tokens_to_sample: true,
+      stop_sequences: true,
+      temperature: true,
+      top_k: true,
+      top_p: true,
+    })
      .strip()
      .parse(req.body);
  }
@@ -332,12 +332,17 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
        errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
        break;
      case "AccessDeniedException":
-        req.log.error(
-          { key: req.key?.hash, model: req.body?.model },
-          "Disabling key due to AccessDeniedException when invoking model. If credentials are valid, check IAM permissions."
+        const isModelAccessError = errorPayload.error?.message?.includes(
+          `access to the model with the specified model ID`
        );
-        keyPool.disable(req.key!, "revoked");
-        errorPayload.proxy_note = `API key doesn't have access to the requested resource.`;
+        if (!isModelAccessError) {
+          req.log.error(
+            { key: req.key?.hash, model: req.body?.model },
+            "Disabling key due to AccessDeniedException when invoking model. If credentials are valid, check IAM permissions."
+          );
+          keyPool.disable(req.key!, "revoked");
+        }
+        errorPayload.proxy_note = `API key doesn't have access to the requested resource. Model ID: ${req.body?.model}`;
        break;
      default:
        errorPayload.proxy_note = `Received 403 error. Key may be invalid.`;
@@ -49,7 +49,16 @@ export class SSEStreamAdapter extends Transform {
        if (contentType === "application/json" && eventType === "chunk") {
          const { bytes } = JSON.parse(bodyStr);
          const event = Buffer.from(bytes, "base64").toString("utf8");
-          return ["event: completion", `data: ${event}`].join(`\n`);
+          const eventObj = JSON.parse(event);
+
+          if ('completion' in eventObj) {
+            return ["event: completion", `data: ${event}`].join(`\n`);
+          } else {
+            return [
+              `event: ${eventObj.type}`,
+              `data: ${event}`,
+            ].join(`\n`);
+          }
        }
      // Intentional fallthrough, as non-JSON events may as well be errors
      // noinspection FallThroughInSwitchStatementJS