allows selecting compat model via endpoint name and makes errors less confusing
This commit is contained in:
+23
-43
@@ -16,12 +16,8 @@ import {
|
||||
ProxyResHandlerWithBody,
|
||||
createOnProxyResHandler,
|
||||
} from "./middleware/response";
|
||||
import { HttpError } from "../shared/errors";
|
||||
import { sendErrorToClient } from "./middleware/response/error-generator";
|
||||
|
||||
const CLAUDE_3_COMPAT_MODEL =
|
||||
process.env.CLAUDE_3_COMPAT_MODEL || "claude-3-sonnet-20240229";
|
||||
|
||||
let modelsCache: any = null;
|
||||
let modelsCacheTime = 0;
|
||||
|
||||
@@ -97,7 +93,7 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
|
||||
req.outboundApi === "anthropic-chat"
|
||||
) {
|
||||
req.log.info("Transforming Anthropic text to Anthropic chat format");
|
||||
body = transformAnthropicChatResponseToAnthropicText(body, req);
|
||||
body = transformAnthropicChatResponseToAnthropicText(body);
|
||||
}
|
||||
|
||||
if (req.tokenizerInfo) {
|
||||
@@ -108,8 +104,7 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
|
||||
};
|
||||
|
||||
export function transformAnthropicChatResponseToAnthropicText(
|
||||
anthropicBody: Record<string, any>,
|
||||
req: Request
|
||||
anthropicBody: Record<string, any>
|
||||
): Record<string, any> {
|
||||
return {
|
||||
type: "completion",
|
||||
@@ -183,7 +178,7 @@ const anthropicProxy = createQueueMiddleware({
|
||||
if (isText && pathname === "/v1/chat/completions") {
|
||||
req.url = "/v1/complete";
|
||||
}
|
||||
if (isChat && pathname === "/v1/claude-3/complete") {
|
||||
if (isChat && ["sonnet", "opus"].includes(req.params.type)) {
|
||||
req.url = "/v1/messages";
|
||||
}
|
||||
return true;
|
||||
@@ -249,7 +244,7 @@ anthropicRouter.post(
|
||||
// yet support the new model. Forces claude-3. Will be removed once common
|
||||
// frontends have been updated.
|
||||
anthropicRouter.post(
|
||||
"/v1/claude-3/complete",
|
||||
"/v1/:type(sonnet|opus)/:action(complete|messages)",
|
||||
ipLimiter,
|
||||
handleCompatibilityRequest,
|
||||
createPreprocessorMiddleware({
|
||||
@@ -259,51 +254,36 @@ anthropicRouter.post(
|
||||
}),
|
||||
anthropicProxy
|
||||
);
|
||||
// This is not a valid route but clients may attempt to use it.
|
||||
anthropicRouter.post("/v1/claude-3/messages", (req, res) => {
|
||||
sendErrorToClient({
|
||||
req,
|
||||
res,
|
||||
options: {
|
||||
title: "Proxy error (wrong endpoint)",
|
||||
message:
|
||||
"Your client is attempting to use the /anthropic/claude-3 compatibility endpoint, but it supports the new API format.\n\nUse the normal /anthropic endpoint instead.",
|
||||
format: "unknown",
|
||||
statusCode: 404,
|
||||
reqId: req.id,
|
||||
obj: { original_url: req.originalUrl, router_url: req.url },
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
export function handleCompatibilityRequest(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: any
|
||||
) {
|
||||
function handleCompatibilityRequest(req: Request, res: Response, next: any) {
|
||||
const type = req.params.type;
|
||||
const action = req.params.action;
|
||||
const alreadyInChatFormat = Boolean(req.body.messages);
|
||||
const alreadyUsingClaude3 = req.body.model?.includes("claude-3");
|
||||
const compatModel = `claude-3-${type}-20240229`;
|
||||
req.log.info(
|
||||
{ type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
|
||||
"Handling Anthropic compatibility request"
|
||||
);
|
||||
|
||||
if (!alreadyUsingClaude3) {
|
||||
req.body.model = CLAUDE_3_COMPAT_MODEL;
|
||||
}
|
||||
|
||||
if (!alreadyInChatFormat) {
|
||||
return next();
|
||||
} else {
|
||||
sendErrorToClient({
|
||||
if (action === "messages" || alreadyInChatFormat) {
|
||||
return sendErrorToClient({
|
||||
req,
|
||||
res,
|
||||
options: {
|
||||
title: "Proxy error (incompatible request for endpoint)",
|
||||
message:
|
||||
"Your request is already using the new API format and does not need to use the compatibility endpoint.\n\nUse the /proxy/anthropic endpoint instead.",
|
||||
title: "Unnecessary usage of compatibility endpoint",
|
||||
message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/anthropic\` proxy endpoint instead.`,
|
||||
format: "unknown",
|
||||
statusCode: 400,
|
||||
reqId: req.id,
|
||||
obj: {
|
||||
requested_endpoint: "/anthropic/" + type,
|
||||
correct_endpoint: "/anthropic",
|
||||
},
|
||||
},
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
req.body.model = compatModel;
|
||||
next();
|
||||
}
|
||||
|
||||
|
||||
+45
-30
@@ -1,4 +1,4 @@
|
||||
import { Request, RequestHandler, Router } from "express";
|
||||
import { Request, RequestHandler, Response, Router } from "express";
|
||||
import { createProxyMiddleware } from "http-proxy-middleware";
|
||||
import { v4 } from "uuid";
|
||||
import { config } from "../config";
|
||||
@@ -16,14 +16,10 @@ import {
|
||||
ProxyResHandlerWithBody,
|
||||
createOnProxyResHandler,
|
||||
} from "./middleware/response";
|
||||
import {
|
||||
handleCompatibilityRequest,
|
||||
transformAnthropicChatResponseToAnthropicText,
|
||||
} from "./anthropic";
|
||||
import { transformAnthropicChatResponseToAnthropicText } from "./anthropic";
|
||||
import { sendErrorToClient } from "./middleware/response/error-generator";
|
||||
|
||||
const LATEST_AWS_V2_MINOR_VERSION = "1";
|
||||
const CLAUDE_3_COMPAT_MODEL = "anthropic.claude-3-sonnet-20240229-v1:0";
|
||||
|
||||
let modelsCache: any = null;
|
||||
let modelsCacheTime = 0;
|
||||
@@ -88,7 +84,7 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (
|
||||
req.outboundApi === "anthropic-chat"
|
||||
) {
|
||||
req.log.info("Transforming AWS Claude chat response to Text format");
|
||||
body = transformAnthropicChatResponseToAnthropicText(body, req);
|
||||
body = transformAnthropicChatResponseToAnthropicText(body);
|
||||
}
|
||||
|
||||
if (req.tokenizerInfo) {
|
||||
@@ -192,33 +188,17 @@ awsRouter.post(
|
||||
);
|
||||
// Temporary force-Claude3 endpoint
|
||||
awsRouter.post(
|
||||
"/v1/claude-3/complete",
|
||||
"/v1/sonnet/:action(complete|messages)",
|
||||
ipLimiter,
|
||||
handleCompatibilityRequest,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
|
||||
{
|
||||
beforeTransform: [(req) => void (req.body.model = CLAUDE_3_COMPAT_MODEL)],
|
||||
}
|
||||
),
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "anthropic-text",
|
||||
outApi: "anthropic-chat",
|
||||
service: "aws",
|
||||
}),
|
||||
awsProxy
|
||||
);
|
||||
// This is not a valid route but clients may attempt to use it.
|
||||
awsRouter.post("/v1/claude-3/messages", (req, res) => {
|
||||
sendErrorToClient({
|
||||
req,
|
||||
res,
|
||||
options: {
|
||||
title: "Proxy error (wrong endpoint)",
|
||||
message:
|
||||
"Your client is attempting to use the /aws/claude/claude-3 compatibility endpoint, but supports the new API format and should use the normal /aws/claude endpoint instead.",
|
||||
format: "unknown",
|
||||
statusCode: 404,
|
||||
reqId: req.id,
|
||||
obj: { original_url: req.originalUrl, router_url: req.url },
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
// OpenAI-to-AWS Anthropic compatibility endpoint.
|
||||
awsRouter.post(
|
||||
"/v1/chat/completions",
|
||||
@@ -294,4 +274,39 @@ function maybeReassignModel(req: Request) {
|
||||
return;
|
||||
}
|
||||
|
||||
export function handleCompatibilityRequest(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: any
|
||||
) {
|
||||
const action = req.params.action;
|
||||
const alreadyInChatFormat = Boolean(req.body.messages);
|
||||
const compatModel = "anthropic.claude-3-sonnet-20240229-v1:0";
|
||||
req.log.info(
|
||||
{ inputModel: req.body.model, compatModel, alreadyInChatFormat },
|
||||
"Handling AWS compatibility request"
|
||||
);
|
||||
|
||||
if (action === "messages" || alreadyInChatFormat) {
|
||||
return sendErrorToClient({
|
||||
req,
|
||||
res,
|
||||
options: {
|
||||
title: "Unnecessary usage of compatibility endpoint",
|
||||
message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/aws/claude\` proxy endpoint instead.`,
|
||||
format: "unknown",
|
||||
statusCode: 400,
|
||||
reqId: req.id,
|
||||
obj: {
|
||||
requested_endpoint: "/aws/claude/sonnet",
|
||||
correct_endpoint: "/aws/claude",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
req.body.model = compatModel;
|
||||
next();
|
||||
}
|
||||
|
||||
export const aws = awsRouter;
|
||||
|
||||
@@ -4,7 +4,7 @@ import { ZodError } from "zod";
|
||||
import { generateErrorMessage } from "zod-error";
|
||||
import { assertNever } from "../../shared/utils";
|
||||
import { QuotaExceededError } from "./request/preprocessors/apply-quota-limits";
|
||||
import { buildSpoofedSSE, sendErrorToClient } from "./response/error-generator";
|
||||
import { sendErrorToClient } from "./response/error-generator";
|
||||
import { HttpError } from "../../shared/errors";
|
||||
|
||||
const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
|
||||
@@ -13,7 +13,8 @@ const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
|
||||
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
|
||||
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
|
||||
const ANTHROPIC_MESSAGES_ENDPOINT = "/v1/messages";
|
||||
const ANTHROPIC_CLAUDE3_COMPAT_ENDPOINT = "/v1/claude-3/complete";
|
||||
const ANTHROPIC_SONNET_COMPAT_ENDPOINT = "/v1/sonnet";
|
||||
const ANTHROPIC_OPUS_COMPAT_ENDPOINT = "/v1/opus";
|
||||
|
||||
export function isTextGenerationRequest(req: Request) {
|
||||
return (
|
||||
@@ -23,7 +24,8 @@ export function isTextGenerationRequest(req: Request) {
|
||||
OPENAI_TEXT_COMPLETION_ENDPOINT,
|
||||
ANTHROPIC_COMPLETION_ENDPOINT,
|
||||
ANTHROPIC_MESSAGES_ENDPOINT,
|
||||
ANTHROPIC_CLAUDE3_COMPAT_ENDPOINT,
|
||||
ANTHROPIC_SONNET_COMPAT_ENDPOINT,
|
||||
ANTHROPIC_OPUS_COMPAT_ENDPOINT,
|
||||
].some((endpoint) => req.path.startsWith(endpoint))
|
||||
);
|
||||
}
|
||||
|
||||
+2
-3
@@ -59,9 +59,8 @@ proxyRouter.use((req, res) => {
|
||||
format: "unknown",
|
||||
obj: {
|
||||
proxy_note:
|
||||
"Your chat client is using the wrong endpoint. Please check your configuration.",
|
||||
original_url: req.originalUrl,
|
||||
router_url: req.url,
|
||||
"Your chat client is using the wrong endpoint. Check the Service Info page for the list of available endpoints.",
|
||||
requested_url: req.originalUrl,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user