adds new mistral models and updates older model lists/context limits

2024-07-28 13:15:03 -05:00
parent 584bb3fbc7
commit 9a3cca6b80
5 changed files with 90 additions and 53 deletions
@@ -12,7 +12,7 @@ import { checkCsrfToken, injectCsrfToken } from "./shared/inject-csrf";

 const INFO_PAGE_TTL = 2000;
 const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
-  turbo: "GPT-3.5 Turbo",
+  turbo: "GPT-4o Mini / 3.5 Turbo",
  gpt4: "GPT-4",
  "gpt4-32k": "GPT-4 32k",
  "gpt4-turbo": "GPT-4 Turbo",
@@ -22,7 +22,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "claude-opus": "Claude (Opus)",
  "gemini-pro": "Gemini Pro",
  "mistral-tiny": "Mistral 7B",
-  "mistral-small": "Mixtral Small", // Originally 8x7B, but that now refers to the older open-weight version. Mixtral Small is a newer closed-weight update to the 8x7B model.
+  "mistral-small": "Mistral Nemo",
  "mistral-medium": "Mistral Medium",
  "mistral-large": "Mistral Large",
  "aws-claude": "AWS Claude (Sonnet)",
@@ -6,8 +6,9 @@ import { RequestPreprocessor } from "../index";

 const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
 const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
-const GOOGLE_AI_MAX_CONTEXT = 32000;
-const MISTRAL_AI_MAX_CONTENT = 32768;
+// todo: make configurable
+const GOOGLE_AI_MAX_CONTEXT = 1024000;
+const MISTRAL_AI_MAX_CONTENT = 131072;

 /**
 * Assigns `req.promptTokens` and `req.outputTokens` based on the request body
@@ -81,16 +82,18 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  } else if (model.match(/^claude-3/)) {
    modelMax = 200000;
  } else if (model.match(/^gemini-\d{3}$/)) {
-    modelMax = GOOGLE_AI_MAX_CONTEXT;
-  } else if (model.match(/^mistral-(tiny|small|medium)$/)) {
-    modelMax = MISTRAL_AI_MAX_CONTENT;
+    modelMax = 1024000;
  } else if (model.match(/^anthropic\.claude-3/)) {
    modelMax = 200000;
  } else if (model.match(/^anthropic\.claude-v2:\d/)) {
    modelMax = 200000;
  } else if (model.match(/^anthropic\.claude/)) {
-    // Not sure if AWS Claude has the same context limit as Anthropic Claude.
    modelMax = 100000;
+  } else if (model.match(/tral/)) {
+    // catches mistral, mixtral, codestral, mathstral, etc. mistral models have
+    // no name convention and wildly different context windows so this is a
+    // catch-all
+    modelMax = MISTRAL_AI_MAX_CONTENT;
  } else {
    req.log.warn({ model }, "Unknown model, using 200k token limit.");
    modelMax = 200000;
@@ -22,27 +22,46 @@ import {
  ProxyResHandlerWithBody,
 } from "./middleware/response";

+// Mistral can't settle on a single naming scheme and deprecates models within
+// months of releasing them so this list is hard to keep up to date. 2024-07-28
 // https://docs.mistral.ai/platform/endpoints
 export const KNOWN_MISTRAL_AI_MODELS = [
-  // Mistral 7b (open weight, legacy)
+  /*
+  Mistral Nemo
+  "A 12B model built with the partnership with Nvidia.  It is easy to use and a
+  drop-in replacement in any system using Mistral 7B that it supersedes."
+  */
+  "open-mistral-nemo",
+  "open-mistral-nemo-2407",
+  /*
+  Mistral Large
+  "Our flagship model with state-of-the-art reasoning, knowledge, and coding
+  capabilities."
+  */
+  "mistral-large-latest",
+  "mistral-large-2407",
+  "mistral-large-2402", // deprecated
+  /*
+  Codestral
+  "A cutting-edge generative model that has been specifically designed and
+  optimized for code generation tasks, including fill-in-the-middle and code
+  completion."
+  note: this uses a separate bidi completion endpoint that is not implemented
+  */
+  "codestral-latest",
+  "codestral-2405",
+  /* So-called "Research Models" */
  "open-mistral-7b",
-  "mistral-tiny-2312",
-  // Mixtral 8x7b (open weight, legacy)
  "open-mixtral-8x7b",
-  "mistral-small-2312",
-  // Mixtral Small (newer 8x7b, closed weight)
+  "open-mistral-8x22b",
+  "open-codestral-mamba",
+  /* Deprecated production models */
  "mistral-small-latest",
  "mistral-small-2402",
-  // Mistral Medium
  "mistral-medium-latest",
  "mistral-medium-2312",
-  // Mistral Large
-  "mistral-large-latest",
-  "mistral-large-2402",
-  // Deprecated identifiers (2024-05-01)
  "mistral-tiny",
-  "mistral-small",
-  "mistral-medium",
+  "mistral-tiny-2312"
 ];

 let modelsCache: any = null;
@@ -28,28 +28,41 @@ import {

 // https://platform.openai.com/docs/models/overview
 export const KNOWN_OPENAI_MODELS = [
+  // GPT4o
  "gpt-4o",
  "gpt-4o-2024-05-13",
-  "gpt-4-turbo", // alias for latest gpt4-turbo stable
+  // GPT4o Mini
+  "gpt-4o-mini",
+  "gpt-4o-mini-2024-07-18",
+  // GPT4 Turbo (superceded by GPT4o)
+  "gpt-4-turbo",
  "gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
  "gpt-4-turbo-preview", // alias for latest turbo preview
  "gpt-4-0125-preview", // gpt4-turbo preview 2
  "gpt-4-1106-preview", // gpt4-turbo preview 1
-  "gpt-4-vision-preview", // gpt4-turbo preview 1 with vision
+  // Launch GPT4
  "gpt-4",
  "gpt-4-0613",
-  "gpt-4-0314", // EOL 2024-06-13
-  "gpt-4-32k",
-  "gpt-4-32k-0314", // EOL 2024-06-13
-  "gpt-4-32k-0613",
+  "gpt-4-0314", // legacy
+  // GPT3.5 Turbo (superceded by GPT4o Mini)
  "gpt-3.5-turbo",
-  "gpt-3.5-turbo-0301", // EOL 2024-06-13
-  "gpt-3.5-turbo-0613",
-  "gpt-3.5-turbo-16k",
-  "gpt-3.5-turbo-16k-0613",
+  "gpt-3.5-turbo-0125", // latest turbo
+  "gpt-3.5-turbo-1106", // older turbo
+  // Text Completion
  "gpt-3.5-turbo-instruct",
  "gpt-3.5-turbo-instruct-0914",
+  // Embeddings
  "text-embedding-ada-002",
+  // Known deprecated models
+  "gpt-4-32k", // alias for 0613
+  "gpt-4-32k-0314", // EOL 2025-06-06
+  "gpt-4-32k-0613", // EOL 2025-06-06
+  "gpt-4-vision-preview", // EOL 2024-12-06
+  "gpt-4-1106-vision-preview", // EOL 2024-12-06
+  "gpt-3.5-turbo-0613", // EOL 2024-09-13
+  "gpt-3.5-turbo-0301", // not on the website anymore, maybe unavailable
+  "gpt-3.5-turbo-16k", // alias for 0613
+  "gpt-3.5-turbo-16k-0613", // EOL 2024-09-13
 ];

 let modelsCache: any = null;
@@ -1,7 +1,6 @@
 // Don't import any other project files here as this is one of the first modules
 // loaded and it will cause circular imports.

-import pino from "pino";
 import type { Request } from "express";

 /**
@@ -26,10 +25,9 @@ export type OpenAIModelFamily =
 export type AnthropicModelFamily = "claude" | "claude-opus";
 export type GoogleAIModelFamily = "gemini-pro";
 export type MistralAIModelFamily =
-  | "mistral-tiny"
-  | "mistral-small"
-  | "mistral-medium"
-  | "mistral-large";
+  // mistral changes their model classes frequently so these no longer
+  // correspond to specific models. consider them rough pricing tiers.
+  "mistral-tiny" | "mistral-small" | "mistral-medium" | "mistral-large";
 export type AwsBedrockModelFamily = "aws-claude" | "aws-claude-opus";
 export type AzureOpenAIModelFamily = `azure-${OpenAIModelFamily}`;
 export type ModelFamily =
@@ -77,21 +75,6 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
  "azure",
 ] as const);

-export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
-  "^gpt-4o": "gpt4o",
-  "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
-  "^gpt-4-turbo(-preview)?$": "gpt4-turbo",
-  "^gpt-4-(0125|1106)(-preview)?$": "gpt4-turbo",
-  "^gpt-4(-\\d{4})?-vision(-preview)?$": "gpt4-turbo",
-  "^gpt-4-32k-\\d{4}$": "gpt4-32k",
-  "^gpt-4-32k$": "gpt4-32k",
-  "^gpt-4-\\d{4}$": "gpt4",
-  "^gpt-4$": "gpt4",
-  "^gpt-3.5-turbo": "turbo",
-  "^text-embedding-ada-002$": "turbo",
-  "^dall-e-\\d{1}$": "dall-e",
-};
-
 export const MODEL_FAMILY_SERVICE: {
  [f in ModelFamily]: LLMService;
 } = {
@@ -99,7 +82,7 @@ export const MODEL_FAMILY_SERVICE: {
  gpt4: "openai",
  "gpt4-turbo": "openai",
  "gpt4-32k": "openai",
-  "gpt4o": "openai",
+  gpt4o: "openai",
  "dall-e": "openai",
  claude: "anthropic",
  "claude-opus": "anthropic",
@@ -120,7 +103,21 @@ export const MODEL_FAMILY_SERVICE: {

 export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e"];

-pino({ level: "debug" }).child({ module: "startup" });
+export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
+  "^gpt-4o(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4o",
+  "^gpt-4o-mini(-\\d{4}-\\d{2}-\\d{2})?$": "turbo", // closest match
+  "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
+  "^gpt-4-turbo(-preview)?$": "gpt4-turbo",
+  "^gpt-4-(0125|1106)(-preview)?$": "gpt4-turbo",
+  "^gpt-4(-\\d{4})?-vision(-preview)?$": "gpt4-turbo",
+  "^gpt-4-32k-\\d{4}$": "gpt4-32k",
+  "^gpt-4-32k$": "gpt4-32k",
+  "^gpt-4-\\d{4}$": "gpt4",
+  "^gpt-4$": "gpt4",
+  "^gpt-3.5-turbo": "turbo",
+  "^text-embedding-ada-002$": "turbo",
+  "^dall-e-\\d{1}$": "dall-e",
+};

 export function getOpenAIModelFamily(
  model: string,
@@ -151,10 +148,15 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
      return prunedModel as MistralAIModelFamily;
    case "open-mistral-7b":
      return "mistral-tiny";
+    case "open-mistral-nemo":
    case "open-mixtral-8x7b":
+    case "codestral":
+    case "open-codestral-mamba":
      return "mistral-small";
+    case "open-mixtral-8x22b":
+      return "mistral-medium";
    default:
-      return "mistral-tiny";
+      return "mistral-small";
  }
 }