fixes aws keychecker not detecting claude 2.1

always applies Mistral prompt fixes on messages input
adds mistral chat-to-text transformation, for better prefix compatibility
2024-08-14 10:46:54 -05:00 · 2024-08-14 10:44:37 -05:00 · 2024-08-13 23:24:36 -05:00 · 2024-08-13 20:31:19 -05:00 · 2024-08-13 20:29:24 -05:00 · 2024-08-13 20:15:14 -05:00
11 changed files with 83 additions and 64 deletions
@@ -415,18 +415,18 @@ export const config: Config = {
  firebaseKey: getEnvWithDefault("FIREBASE_KEY", undefined),
  textModelRateLimit: getEnvWithDefault("TEXT_MODEL_RATE_LIMIT", 4),
  imageModelRateLimit: getEnvWithDefault("IMAGE_MODEL_RATE_LIMIT", 4),
-  maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 32768),
+  maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 16384),
  maxContextTokensAnthropic: getEnvWithDefault(
    "MAX_CONTEXT_TOKENS_ANTHROPIC",
-    32768
+    0
  ),
  maxOutputTokensOpenAI: getEnvWithDefault(
    ["MAX_OUTPUT_TOKENS_OPENAI", "MAX_OUTPUT_TOKENS"],
-    1024
+    400
  ),
  maxOutputTokensAnthropic: getEnvWithDefault(
    ["MAX_OUTPUT_TOKENS_ANTHROPIC", "MAX_OUTPUT_TOKENS"],
-    1024
+    400
  ),
  allowedModelFamilies: getEnvWithDefault(
    "ALLOWED_MODEL_FAMILIES",
@@ -46,7 +46,7 @@ const getModelsResponse = () => {
    "claude-3-haiku-20240307",
    "claude-3-opus-20240229",
    "claude-3-sonnet-20240229",
-    "claude-3-5-sonnet-20240620",
+    "claude-3-5-sonnet-20240620"
  ];

  const models = claudeVariants.map((id) => ({
@@ -70,7 +70,7 @@ const handleModelRequest: RequestHandler = (_req, res) => {
 };

 /** Only used for non-streaming requests. */
-const anthropicBlockingResponseHandler: ProxyResHandlerWithBody = async (
+const anthropicResponseHandler: ProxyResHandlerWithBody = async (
  _proxyRes,
  req,
  res,
@@ -179,28 +179,6 @@ export function transformAnthropicChatResponseToOpenAI(
  };
 }

-/**
- * If a client using the OpenAI compatibility endpoint requests an actual OpenAI
- * model, reassigns it to Claude 3 Sonnet.
- */
-function maybeReassignModel(req: Request) {
-  const model = req.body.model;
-  if (!model.startsWith("gpt-")) return;
-  req.body.model = "claude-3-sonnet-20240229";
-}
-
-/**
- * If client requests more than 4096 output tokens the request must have a
- * particular version header.
- * https://docs.anthropic.com/en/release-notes/api#july-15th-2024
- */
-function setAnthropicBetaHeader(req: Request) {
-  const { max_tokens_to_sample } = req.body;
-  if (max_tokens_to_sample > 4096) {
-    req.headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
-  }
-}
-
 const anthropicProxy = createQueueMiddleware({
  proxyMiddleware: createProxyMiddleware({
    target: "https://api.anthropic.com",
@@ -211,7 +189,7 @@ const anthropicProxy = createQueueMiddleware({
      proxyReq: createOnProxyReqHandler({
        pipeline: [addKey, addAnthropicPreamble, finalizeBody],
      }),
-      proxyRes: createOnProxyResHandler([anthropicBlockingResponseHandler]),
+      proxyRes: createOnProxyResHandler([anthropicResponseHandler]),
      error: handleProxyError,
    },
    // Abusing pathFilter to rewrite the paths dynamically.
@@ -235,11 +213,6 @@ const anthropicProxy = createQueueMiddleware({
  }),
 });

-const nativeAnthropicChatPreprocessor = createPreprocessorMiddleware(
-  { inApi: "anthropic-chat", outApi: "anthropic-chat", service: "anthropic" },
-  { afterTransform: [setAnthropicBetaHeader] }
-);
-
 const nativeTextPreprocessor = createPreprocessorMiddleware({
  inApi: "anthropic-text",
  outApi: "anthropic-text",
@@ -295,7 +268,11 @@ anthropicRouter.get("/v1/models", handleModelRequest);
 anthropicRouter.post(
  "/v1/messages",
  ipLimiter,
-  nativeAnthropicChatPreprocessor,
+  createPreprocessorMiddleware({
+    inApi: "anthropic-chat",
+    outApi: "anthropic-chat",
+    service: "anthropic",
+  }),
  anthropicProxy
 );
 // Anthropic text completion endpoint. Translates to Anthropic chat completion
@@ -315,5 +292,65 @@ anthropicRouter.post(
  preprocessOpenAICompatRequest,
  anthropicProxy
 );
+// Temporarily force Anthropic Text to Anthropic Chat for frontends which do not
+// yet support the new model. Forces claude-3. Will be removed once common
+// frontends have been updated.
+anthropicRouter.post(
+  "/v1/:type(sonnet|opus)/:action(complete|messages)",
+  ipLimiter,
+  handleAnthropicTextCompatRequest,
+  createPreprocessorMiddleware({
+    inApi: "anthropic-text",
+    outApi: "anthropic-chat",
+    service: "anthropic",
+  }),
+  anthropicProxy
+);
+
+function handleAnthropicTextCompatRequest(
+  req: Request,
+  res: Response,
+  next: any
+) {
+  const type = req.params.type;
+  const action = req.params.action;
+  const alreadyInChatFormat = Boolean(req.body.messages);
+  const compatModel = `claude-3-${type}-20240229`;
+  req.log.info(
+    { type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
+    "Handling Anthropic compatibility request"
+  );
+
+  if (action === "messages" || alreadyInChatFormat) {
+    return sendErrorToClient({
+      req,
+      res,
+      options: {
+        title: "Unnecessary usage of compatibility endpoint",
+        message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/anthropic\` proxy endpoint instead.`,
+        format: "unknown",
+        statusCode: 400,
+        reqId: req.id,
+        obj: {
+          requested_endpoint: "/anthropic/" + type,
+          correct_endpoint: "/anthropic",
+        },
+      },
+    });
+  }
+
+  req.body.model = compatModel;
+  next();
+}
+
+/**
+ * If a client using the OpenAI compatibility endpoint requests an actual OpenAI
+ * model, reassigns it to Claude 3 Sonnet.
+ */
+function maybeReassignModel(req: Request) {
+  const model = req.body.model;
+  if (!model.startsWith("gpt-")) return;
+  req.body.model = "claude-3-sonnet-20240229";
+}

 export const anthropic = anthropicRouter;
@@ -1,16 +1,14 @@
+import { HPMRequestCallback } from "../index";
 import { config } from "../../../../config";
 import { ForbiddenError } from "../../../../shared/errors";
 import { getModelFamilyForRequest } from "../../../../shared/models";
-import { HPMRequestCallback } from "../index";

 /**
 * Ensures the selected model family is enabled by the proxy configuration.
- */
-export const checkModelFamily: HPMRequestCallback = (_proxyReq, req) => {
+ **/
+export const checkModelFamily: HPMRequestCallback = (_proxyReq, req, res) => {
  const family = getModelFamilyForRequest(req);
  if (!config.allowedModelFamilies.includes(family)) {
-    throw new ForbiddenError(
-      `Model family '${family}' is not enabled on this proxy`
-    );
+    throw new ForbiddenError(`Model family '${family}' is not enabled on this proxy`);
  }
 };
@@ -35,7 +35,6 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {

  const credential = getCredentialParts(req);
  const host = AMZ_HOST.replace("%REGION%", credential.region);
-
  // AWS only uses 2023-06-01 and does not actually check this header, but we
  // set it so that the stream adapter always selects the correct transformer.
  req.headers["anthropic-version"] = "2023-06-01";
@@ -67,13 +67,11 @@ function applyMistralPromptFixes(req: Request): void {
    );

    // If the prompt relies on `prefix: true` for the last message, we need to
-    // convert it to a text completions request because AWS Mistral support for
-    // this feature is broken.
-    // On Mistral La Plateforme, we can't do this because they don't expose
-    // a text completions endpoint.
+    // convert it to a text completions request because Mistral support for
+    // this feature is limited (and completely broken on AWS Mistral).
    const { messages } = req.body;
    const lastMessage = messages && messages[messages.length - 1];
-    if (lastMessage?.role === "assistant" && req.service === "aws") {
+    if (lastMessage && lastMessage.role === "assistant") {
      // enable prefix if client forgot, otherwise the template will insert an
      // eos token which is very unlikely to be what the client wants.
      lastMessage.prefix = true;
@@ -58,8 +58,6 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 16384;
  } else if (model.match(/^gpt-4o/)) {
    modelMax = 128000;
-  } else if (model.match(/^chatgpt-4o/)) {
-    modelMax = 128000;
  } else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
    modelMax = 131072;
  } else if (model.match(/gpt-4-turbo(-preview)?$/)) {
@@ -21,7 +21,6 @@ import {
  createOnProxyResHandler,
  ProxyResHandlerWithBody,
 } from "./middleware/response";
-import { BadRequestError } from "../shared/errors";

 // Mistral can't settle on a single naming scheme and deprecates models within
 // months of releasing them so this list is hard to keep up to date. 2024-07-28
@@ -171,12 +170,7 @@ export function detectMistralInputApi(req: Request) {
  if (messages) {
    req.inboundApi = "mistral-ai";
    req.outboundApi = "mistral-ai";
-  } else if (prompt && req.service === "mistral-ai") {
-    // Mistral La Plateforme doesn't expose a text completions endpoint.
-    throw new BadRequestError(
-      "Mistral (via La Plateforme API) does not support text completions. This format is only supported on Mistral via the AWS API."
-    );
-  } else if (prompt && req.service === "aws") {
+  } else if (prompt) {
    req.inboundApi = "mistral-text";
    req.outboundApi = "mistral-text";
  }
@@ -35,8 +35,6 @@ export const KNOWN_OPENAI_MODELS = [
  // GPT4o Mini
  "gpt-4o-mini",
  "gpt-4o-mini-2024-07-18",
-  // GPT4o (ChatGPT)
-  "chatgpt-4o-latest",
  // GPT4 Turbo (superceded by GPT4o)
  "gpt-4-turbo",
  "gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
@@ -45,9 +45,7 @@ const BaseMistralAIV1CompletionsSchema = z.object({
    .default([])
    .transform((v) => (Array.isArray(v) ? v : [v])),
  random_seed: z.number().int().min(0).optional(),
-  response_format: z
-    .object({ type: z.enum(["text", "json_object"]) })
-    .optional(),
+  response_format: z.enum(["text", "json_object"]).optional().default("text"),
  safe_prompt: z.boolean().optional().default(false),
 });

@@ -25,7 +25,7 @@ export const OpenAIV1ChatCompletionSchema = z
        content: z.union([z.string(), OpenAIV1ChatContentArraySchema]),
        name: z.string().optional(),
        tool_calls: z.array(z.any()).optional(),
-        function_call: z.any().optional(),
+        function_call: z.array(z.any()).optional(),
        tool_call_id: z.string().optional(),
      }),
      {
@@ -52,7 +52,7 @@ export const OpenAIV1ChatCompletionSchema = z
      .number()
      .int()
      .nullish()
-      .default(Math.min(OPENAI_OUTPUT_MAX, 16384))
+      .default(Math.min(OPENAI_OUTPUT_MAX, 4096))
      .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
    frequency_penalty: z.number().optional().default(0),
    presence_penalty: z.number().optional().default(0),
@@ -130,7 +130,6 @@ export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e"];

 export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^gpt-4o(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4o",
-  "^chatgpt-4o": "gpt4o",
  "^gpt-4o-mini(-\\d{4}-\\d{2}-\\d{2})?$": "turbo", // closest match
  "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
  "^gpt-4-turbo(-preview)?$": "gpt4-turbo",
Author	SHA1	Message	Date
nai-degen	cfc1290f83	fixes aws keychecker not detecting claude 2.1	2024-08-14 10:46:54 -05:00
nai-degen	14f228f666	always applies Mistral prompt fixes on messages input	2024-08-14 10:44:37 -05:00
nai-degen	d264fdd573	adds mistral chat-to-text transformation, for better prefix compatibility	2024-08-13 23:24:36 -05:00
nai-degen	9c3e345720	update deps	2024-08-13 20:31:19 -05:00
nai-degen	37c421bb45	fixes token counting for streaming Mistral Text prompts	2024-08-13 20:29:24 -05:00
nai-degen	6c5fed90e2	rename function	2024-08-13 20:15:14 -05:00
nai-degen	9479fa4ab0	serviceinfo tweak	2024-08-13 20:13:46 -05:00
nai-degen	e145f5757e	implements aws mistral streaming	2024-08-13 20:04:07 -05:00
nai-degen	2fe6e07cf5	error better	2024-08-12 20:49:21 -05:00
nai-degen	bc340c1be6	non-streaming aws mistral works	2024-08-12 20:37:14 -05:00
nai-degen	45c5d3d338	fixes aws mistral keychecker model invocation	2024-08-12 19:32:26 -05:00
nai-degen	3032ae3198	express route matching is a pain in the ass	2024-08-12 19:31:53 -05:00
nai-degen	49a89122f5	fixes aws models endpoint	2024-08-12 19:26:55 -05:00
nai-degen	2d8e1dac13	adds /aws/mistral endpoint	2024-08-12 19:10:49 -05:00
nai-degen	9e5a660ef5	refactors aws endpoint router to split claude/mistral	2024-08-12 19:10:49 -05:00
nai-degen	6cf8c09fad	removes 'server greeting' header from info page	2024-08-12 19:10:49 -05:00
nai-degen	dc1b573020	small KeyProvider#get refactor	2024-08-12 19:10:49 -05:00
nai-degen	3ff771d945	fix gcp rebase issue	2024-08-12 19:10:49 -05:00
nai-degen	985035fe80	adds old test script to repo	2024-08-12 19:10:49 -05:00
nai-degen	442f9529de	comments	2024-08-12 19:10:49 -05:00
nai-degen	598ac8e4e1	tries to unfuck service info stat aggregation slightly	2024-08-12 19:10:49 -05:00
nai-degen	750dbee483	adds support for non-Anthropic models to AWS key manager	2024-08-12 19:10:49 -05:00
nai-degen	a2d64e281e	minor KeyProvider#getLockoutPeriod refactor	2024-08-12 19:10:49 -05:00
nai-degen	c6467b02f3	adds AWS mistral model families and checker IDs	2024-08-12 19:10:49 -05:00