adds new mistral models and updates older model lists/context limits

This commit is contained in:
nai-degen
2024-07-28 13:15:03 -05:00
parent 584bb3fbc7
commit 9a3cca6b80
5 changed files with 90 additions and 53 deletions
+2 -2
View File
@@ -12,7 +12,7 @@ import { checkCsrfToken, injectCsrfToken } from "./shared/inject-csrf";
const INFO_PAGE_TTL = 2000;
const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
turbo: "GPT-3.5 Turbo",
turbo: "GPT-4o Mini / 3.5 Turbo",
gpt4: "GPT-4",
"gpt4-32k": "GPT-4 32k",
"gpt4-turbo": "GPT-4 Turbo",
@@ -22,7 +22,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
"claude-opus": "Claude (Opus)",
"gemini-pro": "Gemini Pro",
"mistral-tiny": "Mistral 7B",
"mistral-small": "Mixtral Small", // Originally 8x7B, but that now refers to the older open-weight version. Mixtral Small is a newer closed-weight update to the 8x7B model.
"mistral-small": "Mistral Nemo",
"mistral-medium": "Mistral Medium",
"mistral-large": "Mistral Large",
"aws-claude": "AWS Claude (Sonnet)",
@@ -6,8 +6,9 @@ import { RequestPreprocessor } from "../index";
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
const GOOGLE_AI_MAX_CONTEXT = 32000;
const MISTRAL_AI_MAX_CONTENT = 32768;
// todo: make configurable
const GOOGLE_AI_MAX_CONTEXT = 1024000;
const MISTRAL_AI_MAX_CONTENT = 131072;
/**
* Assigns `req.promptTokens` and `req.outputTokens` based on the request body
@@ -81,16 +82,18 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
} else if (model.match(/^claude-3/)) {
modelMax = 200000;
} else if (model.match(/^gemini-\d{3}$/)) {
modelMax = GOOGLE_AI_MAX_CONTEXT;
} else if (model.match(/^mistral-(tiny|small|medium)$/)) {
modelMax = MISTRAL_AI_MAX_CONTENT;
modelMax = 1024000;
} else if (model.match(/^anthropic\.claude-3/)) {
modelMax = 200000;
} else if (model.match(/^anthropic\.claude-v2:\d/)) {
modelMax = 200000;
} else if (model.match(/^anthropic\.claude/)) {
// Not sure if AWS Claude has the same context limit as Anthropic Claude.
modelMax = 100000;
} else if (model.match(/tral/)) {
// catches mistral, mixtral, codestral, mathstral, etc. mistral models have
// no name convention and wildly different context windows so this is a
// catch-all
modelMax = MISTRAL_AI_MAX_CONTENT;
} else {
req.log.warn({ model }, "Unknown model, using 200k token limit.");
modelMax = 200000;
+31 -12
View File
@@ -22,27 +22,46 @@ import {
ProxyResHandlerWithBody,
} from "./middleware/response";
// Mistral can't settle on a single naming scheme and deprecates models within
// months of releasing them so this list is hard to keep up to date. 2024-07-28
// https://docs.mistral.ai/platform/endpoints
export const KNOWN_MISTRAL_AI_MODELS = [
// Mistral 7b (open weight, legacy)
/*
Mistral Nemo
"A 12B model built with the partnership with Nvidia. It is easy to use and a
drop-in replacement in any system using Mistral 7B that it supersedes."
*/
"open-mistral-nemo",
"open-mistral-nemo-2407",
/*
Mistral Large
"Our flagship model with state-of-the-art reasoning, knowledge, and coding
capabilities."
*/
"mistral-large-latest",
"mistral-large-2407",
"mistral-large-2402", // deprecated
/*
Codestral
"A cutting-edge generative model that has been specifically designed and
optimized for code generation tasks, including fill-in-the-middle and code
completion."
note: this uses a separate bidi completion endpoint that is not implemented
*/
"codestral-latest",
"codestral-2405",
/* So-called "Research Models" */
"open-mistral-7b",
"mistral-tiny-2312",
// Mixtral 8x7b (open weight, legacy)
"open-mixtral-8x7b",
"mistral-small-2312",
// Mixtral Small (newer 8x7b, closed weight)
"open-mistral-8x22b",
"open-codestral-mamba",
/* Deprecated production models */
"mistral-small-latest",
"mistral-small-2402",
// Mistral Medium
"mistral-medium-latest",
"mistral-medium-2312",
// Mistral Large
"mistral-large-latest",
"mistral-large-2402",
// Deprecated identifiers (2024-05-01)
"mistral-tiny",
"mistral-small",
"mistral-medium",
"mistral-tiny-2312"
];
let modelsCache: any = null;
+23 -10
View File
@@ -28,28 +28,41 @@ import {
// https://platform.openai.com/docs/models/overview
export const KNOWN_OPENAI_MODELS = [
// GPT4o
"gpt-4o",
"gpt-4o-2024-05-13",
"gpt-4-turbo", // alias for latest gpt4-turbo stable
// GPT4o Mini
"gpt-4o-mini",
"gpt-4o-mini-2024-07-18",
// GPT4 Turbo (superceded by GPT4o)
"gpt-4-turbo",
"gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
"gpt-4-turbo-preview", // alias for latest turbo preview
"gpt-4-0125-preview", // gpt4-turbo preview 2
"gpt-4-1106-preview", // gpt4-turbo preview 1
"gpt-4-vision-preview", // gpt4-turbo preview 1 with vision
// Launch GPT4
"gpt-4",
"gpt-4-0613",
"gpt-4-0314", // EOL 2024-06-13
"gpt-4-32k",
"gpt-4-32k-0314", // EOL 2024-06-13
"gpt-4-32k-0613",
"gpt-4-0314", // legacy
// GPT3.5 Turbo (superceded by GPT4o Mini)
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301", // EOL 2024-06-13
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-0125", // latest turbo
"gpt-3.5-turbo-1106", // older turbo
// Text Completion
"gpt-3.5-turbo-instruct",
"gpt-3.5-turbo-instruct-0914",
// Embeddings
"text-embedding-ada-002",
// Known deprecated models
"gpt-4-32k", // alias for 0613
"gpt-4-32k-0314", // EOL 2025-06-06
"gpt-4-32k-0613", // EOL 2025-06-06
"gpt-4-vision-preview", // EOL 2024-12-06
"gpt-4-1106-vision-preview", // EOL 2024-12-06
"gpt-3.5-turbo-0613", // EOL 2024-09-13
"gpt-3.5-turbo-0301", // not on the website anymore, maybe unavailable
"gpt-3.5-turbo-16k", // alias for 0613
"gpt-3.5-turbo-16k-0613", // EOL 2024-09-13
];
let modelsCache: any = null;
+25 -23
View File
@@ -1,7 +1,6 @@
// Don't import any other project files here as this is one of the first modules
// loaded and it will cause circular imports.
import pino from "pino";
import type { Request } from "express";
/**
@@ -26,10 +25,9 @@ export type OpenAIModelFamily =
export type AnthropicModelFamily = "claude" | "claude-opus";
export type GoogleAIModelFamily = "gemini-pro";
export type MistralAIModelFamily =
| "mistral-tiny"
| "mistral-small"
| "mistral-medium"
| "mistral-large";
// mistral changes their model classes frequently so these no longer
// correspond to specific models. consider them rough pricing tiers.
"mistral-tiny" | "mistral-small" | "mistral-medium" | "mistral-large";
export type AwsBedrockModelFamily = "aws-claude" | "aws-claude-opus";
export type AzureOpenAIModelFamily = `azure-${OpenAIModelFamily}`;
export type ModelFamily =
@@ -77,21 +75,6 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
"azure",
] as const);
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-4o": "gpt4o",
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
"^gpt-4-turbo(-preview)?$": "gpt4-turbo",
"^gpt-4-(0125|1106)(-preview)?$": "gpt4-turbo",
"^gpt-4(-\\d{4})?-vision(-preview)?$": "gpt4-turbo",
"^gpt-4-32k-\\d{4}$": "gpt4-32k",
"^gpt-4-32k$": "gpt4-32k",
"^gpt-4-\\d{4}$": "gpt4",
"^gpt-4$": "gpt4",
"^gpt-3.5-turbo": "turbo",
"^text-embedding-ada-002$": "turbo",
"^dall-e-\\d{1}$": "dall-e",
};
export const MODEL_FAMILY_SERVICE: {
[f in ModelFamily]: LLMService;
} = {
@@ -99,7 +82,7 @@ export const MODEL_FAMILY_SERVICE: {
gpt4: "openai",
"gpt4-turbo": "openai",
"gpt4-32k": "openai",
"gpt4o": "openai",
gpt4o: "openai",
"dall-e": "openai",
claude: "anthropic",
"claude-opus": "anthropic",
@@ -120,7 +103,21 @@ export const MODEL_FAMILY_SERVICE: {
export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e"];
pino({ level: "debug" }).child({ module: "startup" });
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-4o(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4o",
"^gpt-4o-mini(-\\d{4}-\\d{2}-\\d{2})?$": "turbo", // closest match
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
"^gpt-4-turbo(-preview)?$": "gpt4-turbo",
"^gpt-4-(0125|1106)(-preview)?$": "gpt4-turbo",
"^gpt-4(-\\d{4})?-vision(-preview)?$": "gpt4-turbo",
"^gpt-4-32k-\\d{4}$": "gpt4-32k",
"^gpt-4-32k$": "gpt4-32k",
"^gpt-4-\\d{4}$": "gpt4",
"^gpt-4$": "gpt4",
"^gpt-3.5-turbo": "turbo",
"^text-embedding-ada-002$": "turbo",
"^dall-e-\\d{1}$": "dall-e",
};
export function getOpenAIModelFamily(
model: string,
@@ -151,10 +148,15 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
return prunedModel as MistralAIModelFamily;
case "open-mistral-7b":
return "mistral-tiny";
case "open-mistral-nemo":
case "open-mixtral-8x7b":
case "codestral":
case "open-codestral-mamba":
return "mistral-small";
case "open-mixtral-8x22b":
return "mistral-medium";
default:
return "mistral-tiny";
return "mistral-small";
}
}