adds new mistral models and updates older model lists/context limits
This commit is contained in:
+2
-2
@@ -12,7 +12,7 @@ import { checkCsrfToken, injectCsrfToken } from "./shared/inject-csrf";
|
||||
|
||||
const INFO_PAGE_TTL = 2000;
|
||||
const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
|
||||
turbo: "GPT-3.5 Turbo",
|
||||
turbo: "GPT-4o Mini / 3.5 Turbo",
|
||||
gpt4: "GPT-4",
|
||||
"gpt4-32k": "GPT-4 32k",
|
||||
"gpt4-turbo": "GPT-4 Turbo",
|
||||
@@ -22,7 +22,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
|
||||
"claude-opus": "Claude (Opus)",
|
||||
"gemini-pro": "Gemini Pro",
|
||||
"mistral-tiny": "Mistral 7B",
|
||||
"mistral-small": "Mixtral Small", // Originally 8x7B, but that now refers to the older open-weight version. Mixtral Small is a newer closed-weight update to the 8x7B model.
|
||||
"mistral-small": "Mistral Nemo",
|
||||
"mistral-medium": "Mistral Medium",
|
||||
"mistral-large": "Mistral Large",
|
||||
"aws-claude": "AWS Claude (Sonnet)",
|
||||
|
||||
@@ -6,8 +6,9 @@ import { RequestPreprocessor } from "../index";
|
||||
|
||||
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
|
||||
const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
|
||||
const GOOGLE_AI_MAX_CONTEXT = 32000;
|
||||
const MISTRAL_AI_MAX_CONTENT = 32768;
|
||||
// todo: make configurable
|
||||
const GOOGLE_AI_MAX_CONTEXT = 1024000;
|
||||
const MISTRAL_AI_MAX_CONTENT = 131072;
|
||||
|
||||
/**
|
||||
* Assigns `req.promptTokens` and `req.outputTokens` based on the request body
|
||||
@@ -81,16 +82,18 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
} else if (model.match(/^claude-3/)) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^gemini-\d{3}$/)) {
|
||||
modelMax = GOOGLE_AI_MAX_CONTEXT;
|
||||
} else if (model.match(/^mistral-(tiny|small|medium)$/)) {
|
||||
modelMax = MISTRAL_AI_MAX_CONTENT;
|
||||
modelMax = 1024000;
|
||||
} else if (model.match(/^anthropic\.claude-3/)) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^anthropic\.claude-v2:\d/)) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^anthropic\.claude/)) {
|
||||
// Not sure if AWS Claude has the same context limit as Anthropic Claude.
|
||||
modelMax = 100000;
|
||||
} else if (model.match(/tral/)) {
|
||||
// catches mistral, mixtral, codestral, mathstral, etc. mistral models have
|
||||
// no name convention and wildly different context windows so this is a
|
||||
// catch-all
|
||||
modelMax = MISTRAL_AI_MAX_CONTENT;
|
||||
} else {
|
||||
req.log.warn({ model }, "Unknown model, using 200k token limit.");
|
||||
modelMax = 200000;
|
||||
|
||||
+31
-12
@@ -22,27 +22,46 @@ import {
|
||||
ProxyResHandlerWithBody,
|
||||
} from "./middleware/response";
|
||||
|
||||
// Mistral can't settle on a single naming scheme and deprecates models within
|
||||
// months of releasing them so this list is hard to keep up to date. 2024-07-28
|
||||
// https://docs.mistral.ai/platform/endpoints
|
||||
export const KNOWN_MISTRAL_AI_MODELS = [
|
||||
// Mistral 7b (open weight, legacy)
|
||||
/*
|
||||
Mistral Nemo
|
||||
"A 12B model built with the partnership with Nvidia. It is easy to use and a
|
||||
drop-in replacement in any system using Mistral 7B that it supersedes."
|
||||
*/
|
||||
"open-mistral-nemo",
|
||||
"open-mistral-nemo-2407",
|
||||
/*
|
||||
Mistral Large
|
||||
"Our flagship model with state-of-the-art reasoning, knowledge, and coding
|
||||
capabilities."
|
||||
*/
|
||||
"mistral-large-latest",
|
||||
"mistral-large-2407",
|
||||
"mistral-large-2402", // deprecated
|
||||
/*
|
||||
Codestral
|
||||
"A cutting-edge generative model that has been specifically designed and
|
||||
optimized for code generation tasks, including fill-in-the-middle and code
|
||||
completion."
|
||||
note: this uses a separate bidi completion endpoint that is not implemented
|
||||
*/
|
||||
"codestral-latest",
|
||||
"codestral-2405",
|
||||
/* So-called "Research Models" */
|
||||
"open-mistral-7b",
|
||||
"mistral-tiny-2312",
|
||||
// Mixtral 8x7b (open weight, legacy)
|
||||
"open-mixtral-8x7b",
|
||||
"mistral-small-2312",
|
||||
// Mixtral Small (newer 8x7b, closed weight)
|
||||
"open-mistral-8x22b",
|
||||
"open-codestral-mamba",
|
||||
/* Deprecated production models */
|
||||
"mistral-small-latest",
|
||||
"mistral-small-2402",
|
||||
// Mistral Medium
|
||||
"mistral-medium-latest",
|
||||
"mistral-medium-2312",
|
||||
// Mistral Large
|
||||
"mistral-large-latest",
|
||||
"mistral-large-2402",
|
||||
// Deprecated identifiers (2024-05-01)
|
||||
"mistral-tiny",
|
||||
"mistral-small",
|
||||
"mistral-medium",
|
||||
"mistral-tiny-2312"
|
||||
];
|
||||
|
||||
let modelsCache: any = null;
|
||||
|
||||
+23
-10
@@ -28,28 +28,41 @@ import {
|
||||
|
||||
// https://platform.openai.com/docs/models/overview
|
||||
export const KNOWN_OPENAI_MODELS = [
|
||||
// GPT4o
|
||||
"gpt-4o",
|
||||
"gpt-4o-2024-05-13",
|
||||
"gpt-4-turbo", // alias for latest gpt4-turbo stable
|
||||
// GPT4o Mini
|
||||
"gpt-4o-mini",
|
||||
"gpt-4o-mini-2024-07-18",
|
||||
// GPT4 Turbo (superceded by GPT4o)
|
||||
"gpt-4-turbo",
|
||||
"gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
|
||||
"gpt-4-turbo-preview", // alias for latest turbo preview
|
||||
"gpt-4-0125-preview", // gpt4-turbo preview 2
|
||||
"gpt-4-1106-preview", // gpt4-turbo preview 1
|
||||
"gpt-4-vision-preview", // gpt4-turbo preview 1 with vision
|
||||
// Launch GPT4
|
||||
"gpt-4",
|
||||
"gpt-4-0613",
|
||||
"gpt-4-0314", // EOL 2024-06-13
|
||||
"gpt-4-32k",
|
||||
"gpt-4-32k-0314", // EOL 2024-06-13
|
||||
"gpt-4-32k-0613",
|
||||
"gpt-4-0314", // legacy
|
||||
// GPT3.5 Turbo (superceded by GPT4o Mini)
|
||||
"gpt-3.5-turbo",
|
||||
"gpt-3.5-turbo-0301", // EOL 2024-06-13
|
||||
"gpt-3.5-turbo-0613",
|
||||
"gpt-3.5-turbo-16k",
|
||||
"gpt-3.5-turbo-16k-0613",
|
||||
"gpt-3.5-turbo-0125", // latest turbo
|
||||
"gpt-3.5-turbo-1106", // older turbo
|
||||
// Text Completion
|
||||
"gpt-3.5-turbo-instruct",
|
||||
"gpt-3.5-turbo-instruct-0914",
|
||||
// Embeddings
|
||||
"text-embedding-ada-002",
|
||||
// Known deprecated models
|
||||
"gpt-4-32k", // alias for 0613
|
||||
"gpt-4-32k-0314", // EOL 2025-06-06
|
||||
"gpt-4-32k-0613", // EOL 2025-06-06
|
||||
"gpt-4-vision-preview", // EOL 2024-12-06
|
||||
"gpt-4-1106-vision-preview", // EOL 2024-12-06
|
||||
"gpt-3.5-turbo-0613", // EOL 2024-09-13
|
||||
"gpt-3.5-turbo-0301", // not on the website anymore, maybe unavailable
|
||||
"gpt-3.5-turbo-16k", // alias for 0613
|
||||
"gpt-3.5-turbo-16k-0613", // EOL 2024-09-13
|
||||
];
|
||||
|
||||
let modelsCache: any = null;
|
||||
|
||||
+25
-23
@@ -1,7 +1,6 @@
|
||||
// Don't import any other project files here as this is one of the first modules
|
||||
// loaded and it will cause circular imports.
|
||||
|
||||
import pino from "pino";
|
||||
import type { Request } from "express";
|
||||
|
||||
/**
|
||||
@@ -26,10 +25,9 @@ export type OpenAIModelFamily =
|
||||
export type AnthropicModelFamily = "claude" | "claude-opus";
|
||||
export type GoogleAIModelFamily = "gemini-pro";
|
||||
export type MistralAIModelFamily =
|
||||
| "mistral-tiny"
|
||||
| "mistral-small"
|
||||
| "mistral-medium"
|
||||
| "mistral-large";
|
||||
// mistral changes their model classes frequently so these no longer
|
||||
// correspond to specific models. consider them rough pricing tiers.
|
||||
"mistral-tiny" | "mistral-small" | "mistral-medium" | "mistral-large";
|
||||
export type AwsBedrockModelFamily = "aws-claude" | "aws-claude-opus";
|
||||
export type AzureOpenAIModelFamily = `azure-${OpenAIModelFamily}`;
|
||||
export type ModelFamily =
|
||||
@@ -77,21 +75,6 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
|
||||
"azure",
|
||||
] as const);
|
||||
|
||||
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^gpt-4o": "gpt4o",
|
||||
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
|
||||
"^gpt-4-turbo(-preview)?$": "gpt4-turbo",
|
||||
"^gpt-4-(0125|1106)(-preview)?$": "gpt4-turbo",
|
||||
"^gpt-4(-\\d{4})?-vision(-preview)?$": "gpt4-turbo",
|
||||
"^gpt-4-32k-\\d{4}$": "gpt4-32k",
|
||||
"^gpt-4-32k$": "gpt4-32k",
|
||||
"^gpt-4-\\d{4}$": "gpt4",
|
||||
"^gpt-4$": "gpt4",
|
||||
"^gpt-3.5-turbo": "turbo",
|
||||
"^text-embedding-ada-002$": "turbo",
|
||||
"^dall-e-\\d{1}$": "dall-e",
|
||||
};
|
||||
|
||||
export const MODEL_FAMILY_SERVICE: {
|
||||
[f in ModelFamily]: LLMService;
|
||||
} = {
|
||||
@@ -99,7 +82,7 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
gpt4: "openai",
|
||||
"gpt4-turbo": "openai",
|
||||
"gpt4-32k": "openai",
|
||||
"gpt4o": "openai",
|
||||
gpt4o: "openai",
|
||||
"dall-e": "openai",
|
||||
claude: "anthropic",
|
||||
"claude-opus": "anthropic",
|
||||
@@ -120,7 +103,21 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
|
||||
export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e"];
|
||||
|
||||
pino({ level: "debug" }).child({ module: "startup" });
|
||||
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^gpt-4o(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4o",
|
||||
"^gpt-4o-mini(-\\d{4}-\\d{2}-\\d{2})?$": "turbo", // closest match
|
||||
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
|
||||
"^gpt-4-turbo(-preview)?$": "gpt4-turbo",
|
||||
"^gpt-4-(0125|1106)(-preview)?$": "gpt4-turbo",
|
||||
"^gpt-4(-\\d{4})?-vision(-preview)?$": "gpt4-turbo",
|
||||
"^gpt-4-32k-\\d{4}$": "gpt4-32k",
|
||||
"^gpt-4-32k$": "gpt4-32k",
|
||||
"^gpt-4-\\d{4}$": "gpt4",
|
||||
"^gpt-4$": "gpt4",
|
||||
"^gpt-3.5-turbo": "turbo",
|
||||
"^text-embedding-ada-002$": "turbo",
|
||||
"^dall-e-\\d{1}$": "dall-e",
|
||||
};
|
||||
|
||||
export function getOpenAIModelFamily(
|
||||
model: string,
|
||||
@@ -151,10 +148,15 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
|
||||
return prunedModel as MistralAIModelFamily;
|
||||
case "open-mistral-7b":
|
||||
return "mistral-tiny";
|
||||
case "open-mistral-nemo":
|
||||
case "open-mixtral-8x7b":
|
||||
case "codestral":
|
||||
case "open-codestral-mamba":
|
||||
return "mistral-small";
|
||||
case "open-mixtral-8x22b":
|
||||
return "mistral-medium";
|
||||
default:
|
||||
return "mistral-tiny";
|
||||
return "mistral-small";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user