24 Commits

Author SHA1 Message Date
nai-degen cfc1290f83 fixes aws keychecker not detecting claude 2.1 2024-08-14 10:46:54 -05:00
nai-degen 14f228f666 always applies Mistral prompt fixes on messages input 2024-08-14 10:44:37 -05:00
nai-degen d264fdd573 adds mistral chat-to-text transformation, for better prefix compatibility 2024-08-13 23:24:36 -05:00
nai-degen 9c3e345720 update deps 2024-08-13 20:31:19 -05:00
nai-degen 37c421bb45 fixes token counting for streaming Mistral Text prompts 2024-08-13 20:29:24 -05:00
nai-degen 6c5fed90e2 rename function 2024-08-13 20:15:14 -05:00
nai-degen 9479fa4ab0 serviceinfo tweak 2024-08-13 20:13:46 -05:00
nai-degen e145f5757e implements aws mistral streaming 2024-08-13 20:04:07 -05:00
nai-degen 2fe6e07cf5 error better 2024-08-12 20:49:21 -05:00
nai-degen bc340c1be6 non-streaming aws mistral works 2024-08-12 20:37:14 -05:00
nai-degen 45c5d3d338 fixes aws mistral keychecker model invocation 2024-08-12 19:32:26 -05:00
nai-degen 3032ae3198 express route matching is a pain in the ass 2024-08-12 19:31:53 -05:00
nai-degen 49a89122f5 fixes aws models endpoint 2024-08-12 19:26:55 -05:00
nai-degen 2d8e1dac13 adds /aws/mistral endpoint 2024-08-12 19:10:49 -05:00
nai-degen 9e5a660ef5 refactors aws endpoint router to split claude/mistral 2024-08-12 19:10:49 -05:00
nai-degen 6cf8c09fad removes 'server greeting' header from info page 2024-08-12 19:10:49 -05:00
nai-degen dc1b573020 small KeyProvider#get refactor 2024-08-12 19:10:49 -05:00
nai-degen 3ff771d945 fix gcp rebase issue 2024-08-12 19:10:49 -05:00
nai-degen 985035fe80 adds old test script to repo 2024-08-12 19:10:49 -05:00
nai-degen 442f9529de comments 2024-08-12 19:10:49 -05:00
nai-degen 598ac8e4e1 tries to unfuck service info stat aggregation slightly 2024-08-12 19:10:49 -05:00
nai-degen 750dbee483 adds support for non-Anthropic models to AWS key manager 2024-08-12 19:10:49 -05:00
nai-degen a2d64e281e minor KeyProvider#getLockoutPeriod refactor 2024-08-12 19:10:49 -05:00
nai-degen c6467b02f3 adds AWS mistral model families and checker IDs 2024-08-12 19:10:49 -05:00
11 changed files with 83 additions and 64 deletions
+4 -4
View File
@@ -415,18 +415,18 @@ export const config: Config = {
firebaseKey: getEnvWithDefault("FIREBASE_KEY", undefined),
textModelRateLimit: getEnvWithDefault("TEXT_MODEL_RATE_LIMIT", 4),
imageModelRateLimit: getEnvWithDefault("IMAGE_MODEL_RATE_LIMIT", 4),
maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 32768),
maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 16384),
maxContextTokensAnthropic: getEnvWithDefault(
"MAX_CONTEXT_TOKENS_ANTHROPIC",
32768
0
),
maxOutputTokensOpenAI: getEnvWithDefault(
["MAX_OUTPUT_TOKENS_OPENAI", "MAX_OUTPUT_TOKENS"],
1024
400
),
maxOutputTokensAnthropic: getEnvWithDefault(
["MAX_OUTPUT_TOKENS_ANTHROPIC", "MAX_OUTPUT_TOKENS"],
1024
400
),
allowedModelFamilies: getEnvWithDefault(
"ALLOWED_MODEL_FAMILIES",
+68 -31
View File
@@ -46,7 +46,7 @@ const getModelsResponse = () => {
"claude-3-haiku-20240307",
"claude-3-opus-20240229",
"claude-3-sonnet-20240229",
"claude-3-5-sonnet-20240620",
"claude-3-5-sonnet-20240620"
];
const models = claudeVariants.map((id) => ({
@@ -70,7 +70,7 @@ const handleModelRequest: RequestHandler = (_req, res) => {
};
/** Only used for non-streaming requests. */
const anthropicBlockingResponseHandler: ProxyResHandlerWithBody = async (
const anthropicResponseHandler: ProxyResHandlerWithBody = async (
_proxyRes,
req,
res,
@@ -179,28 +179,6 @@ export function transformAnthropicChatResponseToOpenAI(
};
}
/**
* If a client using the OpenAI compatibility endpoint requests an actual OpenAI
* model, reassigns it to Claude 3 Sonnet.
*/
function maybeReassignModel(req: Request) {
const model = req.body.model;
if (!model.startsWith("gpt-")) return;
req.body.model = "claude-3-sonnet-20240229";
}
/**
* If client requests more than 4096 output tokens the request must have a
* particular version header.
* https://docs.anthropic.com/en/release-notes/api#july-15th-2024
*/
function setAnthropicBetaHeader(req: Request) {
const { max_tokens_to_sample } = req.body;
if (max_tokens_to_sample > 4096) {
req.headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
}
}
const anthropicProxy = createQueueMiddleware({
proxyMiddleware: createProxyMiddleware({
target: "https://api.anthropic.com",
@@ -211,7 +189,7 @@ const anthropicProxy = createQueueMiddleware({
proxyReq: createOnProxyReqHandler({
pipeline: [addKey, addAnthropicPreamble, finalizeBody],
}),
proxyRes: createOnProxyResHandler([anthropicBlockingResponseHandler]),
proxyRes: createOnProxyResHandler([anthropicResponseHandler]),
error: handleProxyError,
},
// Abusing pathFilter to rewrite the paths dynamically.
@@ -235,11 +213,6 @@ const anthropicProxy = createQueueMiddleware({
}),
});
const nativeAnthropicChatPreprocessor = createPreprocessorMiddleware(
{ inApi: "anthropic-chat", outApi: "anthropic-chat", service: "anthropic" },
{ afterTransform: [setAnthropicBetaHeader] }
);
const nativeTextPreprocessor = createPreprocessorMiddleware({
inApi: "anthropic-text",
outApi: "anthropic-text",
@@ -295,7 +268,11 @@ anthropicRouter.get("/v1/models", handleModelRequest);
anthropicRouter.post(
"/v1/messages",
ipLimiter,
nativeAnthropicChatPreprocessor,
createPreprocessorMiddleware({
inApi: "anthropic-chat",
outApi: "anthropic-chat",
service: "anthropic",
}),
anthropicProxy
);
// Anthropic text completion endpoint. Translates to Anthropic chat completion
@@ -315,5 +292,65 @@ anthropicRouter.post(
preprocessOpenAICompatRequest,
anthropicProxy
);
// Temporarily force Anthropic Text to Anthropic Chat for frontends which do not
// yet support the new model. Forces claude-3. Will be removed once common
// frontends have been updated.
anthropicRouter.post(
"/v1/:type(sonnet|opus)/:action(complete|messages)",
ipLimiter,
handleAnthropicTextCompatRequest,
createPreprocessorMiddleware({
inApi: "anthropic-text",
outApi: "anthropic-chat",
service: "anthropic",
}),
anthropicProxy
);
function handleAnthropicTextCompatRequest(
req: Request,
res: Response,
next: any
) {
const type = req.params.type;
const action = req.params.action;
const alreadyInChatFormat = Boolean(req.body.messages);
const compatModel = `claude-3-${type}-20240229`;
req.log.info(
{ type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
"Handling Anthropic compatibility request"
);
if (action === "messages" || alreadyInChatFormat) {
return sendErrorToClient({
req,
res,
options: {
title: "Unnecessary usage of compatibility endpoint",
message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/anthropic\` proxy endpoint instead.`,
format: "unknown",
statusCode: 400,
reqId: req.id,
obj: {
requested_endpoint: "/anthropic/" + type,
correct_endpoint: "/anthropic",
},
},
});
}
req.body.model = compatModel;
next();
}
/**
* If a client using the OpenAI compatibility endpoint requests an actual OpenAI
* model, reassigns it to Claude 3 Sonnet.
*/
function maybeReassignModel(req: Request) {
const model = req.body.model;
if (!model.startsWith("gpt-")) return;
req.body.model = "claude-3-sonnet-20240229";
}
export const anthropic = anthropicRouter;
@@ -1,16 +1,14 @@
import { HPMRequestCallback } from "../index";
import { config } from "../../../../config";
import { ForbiddenError } from "../../../../shared/errors";
import { getModelFamilyForRequest } from "../../../../shared/models";
import { HPMRequestCallback } from "../index";
/**
* Ensures the selected model family is enabled by the proxy configuration.
*/
export const checkModelFamily: HPMRequestCallback = (_proxyReq, req) => {
**/
export const checkModelFamily: HPMRequestCallback = (_proxyReq, req, res) => {
const family = getModelFamilyForRequest(req);
if (!config.allowedModelFamilies.includes(family)) {
throw new ForbiddenError(
`Model family '${family}' is not enabled on this proxy`
);
throw new ForbiddenError(`Model family '${family}' is not enabled on this proxy`);
}
};
@@ -35,7 +35,6 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
const credential = getCredentialParts(req);
const host = AMZ_HOST.replace("%REGION%", credential.region);
// AWS only uses 2023-06-01 and does not actually check this header, but we
// set it so that the stream adapter always selects the correct transformer.
req.headers["anthropic-version"] = "2023-06-01";
@@ -67,13 +67,11 @@ function applyMistralPromptFixes(req: Request): void {
);
// If the prompt relies on `prefix: true` for the last message, we need to
// convert it to a text completions request because AWS Mistral support for
// this feature is broken.
// On Mistral La Plateforme, we can't do this because they don't expose
// a text completions endpoint.
// convert it to a text completions request because Mistral support for
// this feature is limited (and completely broken on AWS Mistral).
const { messages } = req.body;
const lastMessage = messages && messages[messages.length - 1];
if (lastMessage?.role === "assistant" && req.service === "aws") {
if (lastMessage && lastMessage.role === "assistant") {
// enable prefix if client forgot, otherwise the template will insert an
// eos token which is very unlikely to be what the client wants.
lastMessage.prefix = true;
@@ -58,8 +58,6 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
modelMax = 16384;
} else if (model.match(/^gpt-4o/)) {
modelMax = 128000;
} else if (model.match(/^chatgpt-4o/)) {
modelMax = 128000;
} else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
modelMax = 131072;
} else if (model.match(/gpt-4-turbo(-preview)?$/)) {
+1 -7
View File
@@ -21,7 +21,6 @@ import {
createOnProxyResHandler,
ProxyResHandlerWithBody,
} from "./middleware/response";
import { BadRequestError } from "../shared/errors";
// Mistral can't settle on a single naming scheme and deprecates models within
// months of releasing them so this list is hard to keep up to date. 2024-07-28
@@ -171,12 +170,7 @@ export function detectMistralInputApi(req: Request) {
if (messages) {
req.inboundApi = "mistral-ai";
req.outboundApi = "mistral-ai";
} else if (prompt && req.service === "mistral-ai") {
// Mistral La Plateforme doesn't expose a text completions endpoint.
throw new BadRequestError(
"Mistral (via La Plateforme API) does not support text completions. This format is only supported on Mistral via the AWS API."
);
} else if (prompt && req.service === "aws") {
} else if (prompt) {
req.inboundApi = "mistral-text";
req.outboundApi = "mistral-text";
}
-2
View File
@@ -35,8 +35,6 @@ export const KNOWN_OPENAI_MODELS = [
// GPT4o Mini
"gpt-4o-mini",
"gpt-4o-mini-2024-07-18",
// GPT4o (ChatGPT)
"chatgpt-4o-latest",
// GPT4 Turbo (superceded by GPT4o)
"gpt-4-turbo",
"gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
+1 -3
View File
@@ -45,9 +45,7 @@ const BaseMistralAIV1CompletionsSchema = z.object({
.default([])
.transform((v) => (Array.isArray(v) ? v : [v])),
random_seed: z.number().int().min(0).optional(),
response_format: z
.object({ type: z.enum(["text", "json_object"]) })
.optional(),
response_format: z.enum(["text", "json_object"]).optional().default("text"),
safe_prompt: z.boolean().optional().default(false),
});
+2 -2
View File
@@ -25,7 +25,7 @@ export const OpenAIV1ChatCompletionSchema = z
content: z.union([z.string(), OpenAIV1ChatContentArraySchema]),
name: z.string().optional(),
tool_calls: z.array(z.any()).optional(),
function_call: z.any().optional(),
function_call: z.array(z.any()).optional(),
tool_call_id: z.string().optional(),
}),
{
@@ -52,7 +52,7 @@ export const OpenAIV1ChatCompletionSchema = z
.number()
.int()
.nullish()
.default(Math.min(OPENAI_OUTPUT_MAX, 16384))
.default(Math.min(OPENAI_OUTPUT_MAX, 4096))
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
frequency_penalty: z.number().optional().default(0),
presence_penalty: z.number().optional().default(0),
-1
View File
@@ -130,7 +130,6 @@ export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e"];
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-4o(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4o",
"^chatgpt-4o": "gpt4o",
"^gpt-4o-mini(-\\d{4}-\\d{2}-\\d{2})?$": "turbo", // closest match
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
"^gpt-4-turbo(-preview)?$": "gpt4-turbo",