adds AWS Claude Chat Completions and Claude 3 Sonnet support
This commit is contained in:
@@ -105,7 +105,7 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
|
||||
res.status(200).json(body);
|
||||
};
|
||||
|
||||
function transformAnthropicChatResponseToAnthropicText(
|
||||
export function transformAnthropicChatResponseToAnthropicText(
|
||||
anthropicBody: Record<string, any>,
|
||||
req: Request
|
||||
): Record<string, any> {
|
||||
|
||||
+63
-8
@@ -16,8 +16,10 @@ import {
|
||||
ProxyResHandlerWithBody,
|
||||
createOnProxyResHandler,
|
||||
} from "./middleware/response";
|
||||
import { transformAnthropicChatResponseToAnthropicText } from "./anthropic";
|
||||
|
||||
const LATEST_AWS_V2_MINOR_VERSION = "1";
|
||||
const CLAUDE_3_COMPAT_MODEL = "anthropic.claude-3-sonnet-20240229-v1:0";
|
||||
|
||||
let modelsCache: any = null;
|
||||
let modelsCacheTime = 0;
|
||||
@@ -29,10 +31,11 @@ const getModelsResponse = () => {
|
||||
|
||||
if (!config.awsCredentials) return { object: "list", data: [] };
|
||||
|
||||
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
|
||||
const variants = [
|
||||
"anthropic.claude-v2",
|
||||
"anthropic.claude-v2:1",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
];
|
||||
|
||||
const models = variants.map((id) => ({
|
||||
@@ -73,7 +76,12 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (
|
||||
|
||||
if (req.inboundApi === "openai") {
|
||||
req.log.info("Transforming AWS Claude response to OpenAI format");
|
||||
body = transformAwsResponse(body, req);
|
||||
body = transformAwsTextResponseToOpenAI(body, req);
|
||||
}
|
||||
|
||||
if (req.inboundApi === "anthropic-text") {
|
||||
req.log.info("Transforming Text AWS Claude response to Chat format");
|
||||
body = transformAnthropicChatResponseToAnthropicText(body, req);
|
||||
}
|
||||
|
||||
if (req.tokenizerInfo) {
|
||||
@@ -92,7 +100,7 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (
|
||||
* is only used for non-streaming requests as streaming requests are handled
|
||||
* on-the-fly.
|
||||
*/
|
||||
function transformAwsResponse(
|
||||
function transformAwsTextResponseToOpenAI(
|
||||
awsBody: Record<string, any>,
|
||||
req: Request
|
||||
): Record<string, any> {
|
||||
@@ -139,18 +147,54 @@ const awsProxy = createQueueMiddleware({
|
||||
}),
|
||||
});
|
||||
|
||||
const nativeTextPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
const textToChatPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
);
|
||||
|
||||
/**
|
||||
* Routes text completion prompts to aws anthropic-chat if they need translation
|
||||
* (claude-3 based models do not support the old text completion endpoint).
|
||||
*/
|
||||
const awsTextCompletionRouter: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.includes("claude-3")) {
|
||||
textToChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
nativeTextPreprocessor(req, res, next);
|
||||
}
|
||||
};
|
||||
|
||||
const awsRouter = Router();
|
||||
awsRouter.get("/v1/models", handleModelRequest);
|
||||
// Native(ish) Anthropic chat completion endpoint.
|
||||
// Native(ish) Anthropic text completion endpoint.
|
||||
awsRouter.post("/v1/complete", ipLimiter, awsTextCompletionRouter, awsProxy);
|
||||
// Native Anthropic chat completion endpoint.
|
||||
awsRouter.post(
|
||||
"/v1/complete",
|
||||
"/v1/messages",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
|
||||
{ inApi: "anthropic-chat", outApi: "anthropic-chat", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
),
|
||||
awsProxy
|
||||
);
|
||||
// Temporary force-Claude3 endpoint
|
||||
awsRouter.post(
|
||||
"/v1/claude-3/complete",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
|
||||
{
|
||||
beforeTransform: [(req) => void (req.body.model = CLAUDE_3_COMPAT_MODEL)],
|
||||
}
|
||||
),
|
||||
awsProxy
|
||||
);
|
||||
// OpenAI-to-AWS Anthropic compatibility endpoint.
|
||||
awsRouter.post(
|
||||
"/v1/chat/completions",
|
||||
@@ -178,7 +222,8 @@ function maybeReassignModel(req: Request) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pattern = /^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?$/i;
|
||||
const pattern =
|
||||
/^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?(-sonnet-?|-opus-?)(\d*)/i;
|
||||
const match = model.match(pattern);
|
||||
|
||||
// If there's no match, return the latest v2 model
|
||||
@@ -187,7 +232,9 @@ function maybeReassignModel(req: Request) {
|
||||
return;
|
||||
}
|
||||
|
||||
const [, , instant, , major, , minor] = match;
|
||||
const instant = match[2];
|
||||
const major = match[4];
|
||||
const minor = match[6];
|
||||
|
||||
if (instant) {
|
||||
req.body.model = "anthropic.claude-instant-v1";
|
||||
@@ -210,6 +257,14 @@ function maybeReassignModel(req: Request) {
|
||||
return;
|
||||
}
|
||||
|
||||
// AWS currently only supports one v3 model.
|
||||
const variant = match[8]; // sonnet or opus
|
||||
const variantVersion = match[9];
|
||||
if (major === "3") {
|
||||
req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback to latest v2 model
|
||||
req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
|
||||
return;
|
||||
|
||||
@@ -15,15 +15,19 @@ const AMZ_HOST =
|
||||
/**
|
||||
* Signs an outgoing AWS request with the appropriate headers modifies the
|
||||
* request object in place to fix the path.
|
||||
* This happens AFTER request transformation.
|
||||
*/
|
||||
export const signAwsRequest: RequestPreprocessor = async (req) => {
|
||||
req.key = keyPool.get("anthropic.claude-v2", "aws");
|
||||
|
||||
const { model, stream } = req.body;
|
||||
req.key = keyPool.get(model, "aws");
|
||||
|
||||
req.isStreaming = stream === true || stream === "true";
|
||||
|
||||
let preamble = req.body.prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
|
||||
req.body.prompt = preamble + req.body.prompt;
|
||||
// same as addAnthropicPreamble for non-AWS requests, but has to happen here
|
||||
if (req.outboundApi === "anthropic-text") {
|
||||
let preamble = req.body.prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
|
||||
req.body.prompt = preamble + req.body.prompt;
|
||||
}
|
||||
|
||||
// AWS uses mostly the same parameters as Anthropic, with a few removed params
|
||||
// and much stricter validation on unused parameters. Rather than treating it
|
||||
@@ -31,28 +35,27 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
|
||||
// parameters.
|
||||
// TODO: This should happen in transform-outbound-payload.ts
|
||||
let strippedParams: Record<string, unknown>;
|
||||
if (req.inboundApi === "anthropic-chat") {
|
||||
strippedParams = AnthropicV1MessagesSchema
|
||||
.pick({
|
||||
messages: true,
|
||||
max_tokens: true,
|
||||
stop_sequences: true,
|
||||
temperature: true,
|
||||
top_k: true,
|
||||
top_p: true,
|
||||
})
|
||||
if (req.outboundApi === "anthropic-chat") {
|
||||
strippedParams = AnthropicV1MessagesSchema.pick({
|
||||
messages: true,
|
||||
max_tokens: true,
|
||||
stop_sequences: true,
|
||||
temperature: true,
|
||||
top_k: true,
|
||||
top_p: true,
|
||||
})
|
||||
.strip()
|
||||
.parse(req.body);
|
||||
strippedParams.anthropic_version = "bedrock-2023-05-31";
|
||||
} else {
|
||||
strippedParams = AnthropicV1TextSchema
|
||||
.pick({
|
||||
prompt: true,
|
||||
max_tokens_to_sample: true,
|
||||
stop_sequences: true,
|
||||
temperature: true,
|
||||
top_k: true,
|
||||
top_p: true,
|
||||
})
|
||||
strippedParams = AnthropicV1TextSchema.pick({
|
||||
prompt: true,
|
||||
max_tokens_to_sample: true,
|
||||
stop_sequences: true,
|
||||
temperature: true,
|
||||
top_k: true,
|
||||
top_p: true,
|
||||
})
|
||||
.strip()
|
||||
.parse(req.body);
|
||||
}
|
||||
|
||||
@@ -332,12 +332,17 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||
errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
|
||||
break;
|
||||
case "AccessDeniedException":
|
||||
req.log.error(
|
||||
{ key: req.key?.hash, model: req.body?.model },
|
||||
"Disabling key due to AccessDeniedException when invoking model. If credentials are valid, check IAM permissions."
|
||||
const isModelAccessError = errorPayload.error?.message?.includes(
|
||||
`access to the model with the specified model ID`
|
||||
);
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `API key doesn't have access to the requested resource.`;
|
||||
if (!isModelAccessError) {
|
||||
req.log.error(
|
||||
{ key: req.key?.hash, model: req.body?.model },
|
||||
"Disabling key due to AccessDeniedException when invoking model. If credentials are valid, check IAM permissions."
|
||||
);
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
}
|
||||
errorPayload.proxy_note = `API key doesn't have access to the requested resource. Model ID: ${req.body?.model}`;
|
||||
break;
|
||||
default:
|
||||
errorPayload.proxy_note = `Received 403 error. Key may be invalid.`;
|
||||
|
||||
@@ -49,7 +49,16 @@ export class SSEStreamAdapter extends Transform {
|
||||
if (contentType === "application/json" && eventType === "chunk") {
|
||||
const { bytes } = JSON.parse(bodyStr);
|
||||
const event = Buffer.from(bytes, "base64").toString("utf8");
|
||||
return ["event: completion", `data: ${event}`].join(`\n`);
|
||||
const eventObj = JSON.parse(event);
|
||||
|
||||
if ('completion' in eventObj) {
|
||||
return ["event: completion", `data: ${event}`].join(`\n`);
|
||||
} else {
|
||||
return [
|
||||
`event: ${eventObj.type}`,
|
||||
`data: ${event}`,
|
||||
].join(`\n`);
|
||||
}
|
||||
}
|
||||
// Intentional fallthrough, as non-JSON events may as well be errors
|
||||
// noinspection FallThroughInSwitchStatementJS
|
||||
|
||||
Reference in New Issue
Block a user