From 32739fa15cee26f9dfa416254f3c05010e1d809b Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Wed, 18 Sep 2024 20:28:45 -0700 Subject: [PATCH] Mistral: update models and pricing --- docs/config-local-localai.md | 2 +- .../chat/components/message/messageUtils.tsx | 7 + .../server/openai/models/mistral.models.ts | 397 ++++++++++++++++++ .../server/openai/{ => models}/models.data.ts | 271 +----------- .../llms/server/openai/openai.router.ts | 7 +- 5 files changed, 416 insertions(+), 268 deletions(-) create mode 100644 src/modules/llms/server/openai/models/mistral.models.ts rename src/modules/llms/server/openai/{ => models}/models.data.ts (79%) diff --git a/docs/config-local-localai.md b/docs/config-local-localai.md index fcd30894b..0bab8d45f 100644 --- a/docs/config-local-localai.md +++ b/docs/config-local-localai.md @@ -54,7 +54,7 @@ If the running LocalAI instance is configured with a [Model Gallery](https://loc At the time of writing, LocalAI does not publish the model `context window size`. Every model is assumed to be capable of chatting, and with a context window of 4096 tokens. -Please update the [src/modules/llms/transports/server/openai/models.data.ts](../src/modules/llms/server/openai/models.data.ts) +Please update the [src/modules/llms/transports/server/openai/models/models.data.ts](../src/modules/llms/server/openai/models/models.data.ts) file with the mapping information between LocalAI model IDs and names/descriptions/tokens, etc. # 🤝 Support diff --git a/src/apps/chat/components/message/messageUtils.tsx b/src/apps/chat/components/message/messageUtils.tsx index 120a905c8..f1fca47b7 100644 --- a/src/apps/chat/components/message/messageUtils.tsx +++ b/src/apps/chat/components/message/messageUtils.tsx @@ -64,6 +64,8 @@ export const messageAvatarLabelSx: SxProps = { export const messageAvatarLabelAnimatedSx: SxProps = { animation: `${animationColorRainbow} 5s linear infinite`, + // Extra hinting... but looks weird + // fontStyle: 'italic', }; export const aixSkipBoxSx = { @@ -364,9 +366,14 @@ export function prettyShortChatModelName(model: string | undefined): string { // [Anthropic] const prettyAnthropic = _prettyAnthropicModelName(model); if (prettyAnthropic) return prettyAnthropic; + // [Deepseek] + if (model.includes('deepseek-chat')) return 'Deepseek Chat'; + if (model.includes('deepseek-coder')) return 'Deepseek Coder'; // [LM Studio] if (model.startsWith('C:\\') || model.startsWith('D:\\')) return _prettyLMStudioFileModelName(model).replace('.gguf', ''); + // [Mistral] + if (model.includes('mistral-large')) return 'Mistral Large'; // [Ollama] if (model.includes(':')) return model.replace(':latest', '').replaceAll(':', ' '); diff --git a/src/modules/llms/server/openai/models/mistral.models.ts b/src/modules/llms/server/openai/models/mistral.models.ts new file mode 100644 index 000000000..71474581e --- /dev/null +++ b/src/modules/llms/server/openai/models/mistral.models.ts @@ -0,0 +1,397 @@ +import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; + +import { fromManualMapping, ManualMappings } from './models.data'; +import type { ModelDescriptionSchema } from '../../llm.server.types'; +import { wireMistralModelsListOutputSchema } from '../mistral.wiretypes'; + + +// [Mistral] +// updated from the models on: https://docs.mistral.ai/getting-started/models/ +// and the pricing available on: https://mistral.ai/technology/#pricing + +const _knownMistralChatModels: ManualMappings = [ + // General-purpose models + + // Mistral NeMo + { + idPrefix: 'open-mistral-nemo-2407', + label: 'Mistral NeMo (2407)', + description: 'Mistral NeMo is a state-of-the-art 12B model developed with NVIDIA.', + contextWindow: 131072, // 128K tokens + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.15, output: 0.15 }, // $0.15 /1M tokens input and output + }, + { + idPrefix: 'open-mistral-nemo', + label: '🔗 Mistral NeMo (latest) → open-mistral-nemo-2407', + symLink: 'open-mistral-nemo-2407', + hidden: true, + // Copied details + description: 'Mistral NeMo is a state-of-the-art 12B model developed with NVIDIA.', + contextWindow: 131072, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.15, output: 0.15 }, + }, + + // Mistral Large 2 + { + idPrefix: 'mistral-large-2407', + label: 'Mistral Large 2 (2407)', + description: 'Top-tier reasoning for high-complexity tasks, for your most sophisticated needs.', + contextWindow: 131072, // 128K tokens + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 2, output: 6 }, // $2 /1M tokens input, $6 /1M tokens output + }, + { + idPrefix: 'mistral-large-latest', + label: '🔗 Mistral Large (latest) → mistral-large-2407', + symLink: 'mistral-large-2407', + hidden: true, + // Copied details + description: 'Top-tier reasoning for high-complexity tasks, for your most sophisticated needs.', + contextWindow: 131072, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 2, output: 6 }, + }, + { + idPrefix: 'mistral-large-2402', + label: 'Mistral Large (2402)', + description: 'Top-tier reasoning for high-complexity tasks.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 4, output: 12 }, // $4 /1M tokens input, $12 /1M tokens output + isLegacy: true, + hidden: true, + }, + + // Mistral Small + { + idPrefix: 'mistral-small-2409', + label: 'Mistral Small (24.09)', + description: 'Cost-efficient, fast, and reliable option for use cases such as translation, summarization, and sentiment analysis.', + contextWindow: 131072, // 128K tokens + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.2, output: 0.6 }, // $0.2 /1M tokens input, $0.6 /1M tokens output + }, + { + idPrefix: 'mistral-small-latest', + label: '🔗 Mistral Small (latest) → mistral-small-2409', + symLink: 'mistral-small-2409', + hidden: true, + // Copied details + description: 'Cost-efficient, fast, and reliable option for use cases such as translation, summarization, and sentiment analysis.', + contextWindow: 131072, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.2, output: 0.6 }, + }, + { + idPrefix: 'mistral-small-2402', + label: 'Mistral Small (2402) [legacy]', + description: 'Suitable for simple tasks that one can do in bulk (Classification, Customer Support, or Text Generation).', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 1, output: 3 }, + isLegacy: true, + hidden: true, + }, + { + idPrefix: 'mistral-small-2312', + label: 'Mistral Small (2312) [legacy]', + description: 'Aka open-mixtral-8x7b. Suitable for simple tasks that one can do in bulk (Classification, Customer Support, or Text Generation).', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 1, output: 3 }, + isLegacy: true, + hidden: true, + }, + { + idPrefix: 'mistral-small', + label: '🔗 Mistral Small → mistral-small-2409', + symLink: 'mistral-small-2409', + hidden: true, + // Copied details + description: 'Cost-efficient, fast, and reliable option for use cases such as translation, summarization, and sentiment analysis.', + contextWindow: 131072, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.2, output: 0.6 }, + }, + + // Specialist models + + // Codestral + { + idPrefix: 'codestral-2405', + label: 'Codestral (2405)', + description: 'State-of-the-art Mistral model trained specifically for code tasks.', + contextWindow: 32768, // 32K tokens + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.2, output: 0.6 }, // $0.2 /1M tokens input, $0.6 /1M tokens output + }, + { + idPrefix: 'codestral-latest', + label: '🔗 Codestral (latest) → codestral-2405', + symLink: 'codestral-2405', + hidden: true, + // Copied details + description: 'State-of-the-art Mistral model trained specifically for code tasks.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.2, output: 0.6 }, + }, + + // Codestral Mamba + { + idPrefix: 'codestral-mamba-2407', + label: 'Codestral Mamba (2407)', + description: 'Our first Mamba 2 open-source model released July 2024.', + contextWindow: 262144, // 256K tokens + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.5, output: 0.5 }, // Placeholder pricing + hidden: true, // Not listed in pricing table + }, + { + idPrefix: 'codestral-mamba-latest', + label: '🔗 Codestral Mamba (latest) → codestral-mamba-2407', + symLink: 'codestral-mamba-2407', + hidden: true, + // Copied details + description: 'Our first Mamba 2 open-source model released July 2024.', + contextWindow: 262144, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.5, output: 0.5 }, + }, + { + idPrefix: 'open-codestral-mamba', + label: '🔗 Codestral Mamba → codestral-mamba-2407', + symLink: 'codestral-mamba-2407', + hidden: true, + // Copied details + description: 'Our first Mamba 2 open-source model released July 2024.', + contextWindow: 262144, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.5, output: 0.5 }, + }, + + // Pixtral + { + idPrefix: 'pixtral-12b-2409', + label: 'Pixtral 12B (24.09)', + description: 'Vision-capable model.', + contextWindow: 131072, // 128K tokens + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], + chatPrice: { input: 0.15, output: 0.15 }, // $0.15 /1M tokens input and output + }, + { + idPrefix: 'pixtral-latest', + label: '🔗 Pixtral (latest) → pixtral-12b-2409', + symLink: 'pixtral-12b-2409', + hidden: true, + // Copied details + description: 'Vision-capable model.', + contextWindow: 131072, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], + chatPrice: { input: 0.15, output: 0.15 }, + }, + { + idPrefix: 'pixtral-12b', + label: '🔗 Pixtral 12B → pixtral-12b-2409', + symLink: 'pixtral-12b-2409', + hidden: true, + // Copied details + description: 'Vision-capable model.', + contextWindow: 131072, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], + chatPrice: { input: 0.15, output: 0.15 }, + }, + { + idPrefix: 'pixtral', + label: '🔗 Pixtral → pixtral-12b-2409', + symLink: 'pixtral-12b-2409', + hidden: true, + // Copied details + description: 'Vision-capable model.', + contextWindow: 131072, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], + chatPrice: { input: 0.15, output: 0.15 }, + }, + + // Mistral Embed + { + idPrefix: 'mistral-embed', + label: 'Mistral Embed (23.12)', + description: 'State-of-the-art semantic model for extracting representations of text extracts.', + contextWindow: 8192, // 8K tokens + maxCompletionTokens: 8192, + interfaces: [], + chatPrice: { input: 0.1, output: 0 }, // $0.1 /1M tokens input, output not applicable + hidden: true, // Embedding models are usually hidden + }, + + // Research models + + // Mixtral Models + { + idPrefix: 'open-mixtral-8x22b-2404', + label: 'Open Mixtral 8x22B (2404)', + description: 'Mixtral 8x22B is currently the most performant open model.', + contextWindow: 65536, // 64K tokens + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 2, output: 6 }, // $2 /1M tokens input, $6 /1M tokens output + isLegacy: true, + hidden: true, + }, + { + idPrefix: 'open-mixtral-8x22b', + label: '🔗 Open Mixtral 8x22B → open-mixtral-8x22b-2404', + symLink: 'open-mixtral-8x22b-2404', + hidden: true, + // Copied details + description: 'Mixtral 8x22B is currently the most performant open model.', + contextWindow: 65536, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 2, output: 6 }, + isLegacy: true, + }, + { + idPrefix: 'open-mixtral-8x7b', + label: 'Open Mixtral 8x7B (v0.1)', + description: 'A 7B sparse Mixture-of-Experts (SMoE) model.', + contextWindow: 32768, // 32K tokens + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.7, output: 0.7 }, // $0.7 /1M tokens input and output + isLegacy: true, + hidden: true, + }, + + // Mathstral + { + idPrefix: 'mathstral-v0.1', + label: 'Mathstral (v0.1)', + description: 'Variant of Mistral-7B, optimized for solving advanced mathematics problems.', + contextWindow: 32768, // 32K tokens + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.25, output: 0.25 }, // Placeholder pricing + hidden: true, // Not listed in pricing table + }, + + // Legacy models + + // Mistral Medium + { + idPrefix: 'mistral-medium-2312', + label: 'Mistral Medium (2312)', + description: 'Our first commercial model.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 2.75, output: 8.1 }, // $2.75 /1M tokens input, $8.1 /1M tokens output + isLegacy: true, + hidden: true, + }, + { + idPrefix: 'mistral-medium-latest', + label: '🔗 Mistral Medium (latest) → mistral-medium-2312', + symLink: 'mistral-medium-2312', + hidden: true, + // Copied details + description: 'Our first commercial model.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 2.75, output: 8.1 }, + isLegacy: true, + }, + { + idPrefix: 'mistral-medium', + label: '🔗 Mistral Medium → mistral-medium-2312', + symLink: 'mistral-medium-2312', + hidden: true, + // Copied details + description: 'Our first commercial model.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 2.75, output: 8.1 }, + isLegacy: true, + }, + + // Mistral Tiny + { + idPrefix: 'mistral-tiny-2312', + label: 'Mistral Tiny (2312)', + description: 'Aka open-mistral-7b. Used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + isLegacy: true, + hidden: true, + }, + { + idPrefix: 'mistral-tiny-2407', + label: 'Mistral Tiny (2407)', + description: 'Aka open-mistral-7b. Used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + isLegacy: true, + hidden: true, + }, + { + idPrefix: 'mistral-tiny-latest', + label: '🔗 Mistral Tiny (latest) → mistral-tiny-2407', + symLink: 'mistral-tiny-2407', + hidden: true, + // Copied details + description: 'Aka open-mistral-7b. Used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + isLegacy: true, + }, + { + idPrefix: 'mistral-tiny', + label: '🔗 Mistral Tiny → mistral-tiny-2312', + symLink: 'mistral-tiny-2312', + hidden: true, + // Copied details + description: 'Aka open-mistral-7b. Used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], + isLegacy: true, + }, + + // Mistral 7B + { + idPrefix: 'open-mistral-7b', + label: 'Mistral 7B (v0.3)', + description: 'A 7B transformer model, fast-deployed and easily customizable.', + contextWindow: 32768, // 32K tokens + interfaces: [LLM_IF_OAI_Chat], + chatPrice: { input: 0.25, output: 0.25 }, + isLegacy: true, + hidden: true, + }, +]; + +const mistralModelFamilyOrder = [ + 'codestral', 'mistral-large', 'open-mixtral-8x22b', 'mistral-medium', 'open-mixtral-8x7b', 'mistral-small', 'open-mistral-7b', 'mistral-tiny', 'mistral-embed', '🔗', +]; + +export function mistralModelToModelDescription(_model: unknown): ModelDescriptionSchema { + const model = wireMistralModelsListOutputSchema.parse(_model); + return fromManualMapping(_knownMistralChatModels, model.id, model.created, undefined, { + idPrefix: model.id, + label: model.id.replaceAll(/[_-]/g, ' '), + description: 'New Mistral Model', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat], // assume.. + hidden: true, + }); +} + +export function mistralModelsSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number { + if (a.label.startsWith('🔗') && !b.label.startsWith('🔗')) return 1; + if (!a.label.startsWith('🔗') && b.label.startsWith('🔗')) return -1; + const aPrefixIndex = mistralModelFamilyOrder.findIndex(prefix => a.id.startsWith(prefix)); + const bPrefixIndex = mistralModelFamilyOrder.findIndex(prefix => b.id.startsWith(prefix)); + if (aPrefixIndex !== -1 && bPrefixIndex !== -1) { + if (aPrefixIndex !== bPrefixIndex) + return aPrefixIndex - bPrefixIndex; + return b.label.localeCompare(a.label); + } + return aPrefixIndex !== -1 ? 1 : -1; +} diff --git a/src/modules/llms/server/openai/models.data.ts b/src/modules/llms/server/openai/models/models.data.ts similarity index 79% rename from src/modules/llms/server/openai/models.data.ts rename to src/modules/llms/server/openai/models/models.data.ts index 9c4cd8bb5..d1baf4600 100644 --- a/src/modules/llms/server/openai/models.data.ts +++ b/src/modules/llms/server/openai/models/models.data.ts @@ -2,13 +2,12 @@ import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/w import { LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types'; -import type { ModelDescriptionSchema } from '../llm.server.types'; +import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { wireGroqModelsListOutputSchema } from './groq.wiretypes'; -import { wireMistralModelsListOutputSchema } from './mistral.wiretypes'; -import { wireOpenPipeModelOutputSchema } from './openpipe.wiretypes'; -import { wireOpenrouterModelsListOutputSchema } from './openrouter.wiretypes'; -import { wireTogetherAIListOutputSchema } from './togetherai.wiretypes'; +import { wireGroqModelsListOutputSchema } from '../groq.wiretypes'; +import { wireOpenPipeModelOutputSchema } from '../openpipe.wiretypes'; +import { wireOpenrouterModelsListOutputSchema } from '../openrouter.wiretypes'; +import { wireTogetherAIListOutputSchema } from '../togetherai.wiretypes'; // [Azure] / [OpenAI] @@ -491,259 +490,7 @@ export function localAIModelToModelDescription(modelId: string): ModelDescriptio } -// [Mistral] -// updated from the models on: https://docs.mistral.ai/getting-started/models/ -// and the pricing available on: https://mistral.ai/technology/#pricing - -const _knownMistralChatModels: ManualMappings = [ - // Mistral Nemo - { - idPrefix: 'open-mistral-nemo-2407', - label: 'Mistral Nemo (2407)', - description: 'Mistral Nemo is a state-of-the-art 12B model developed with NVIDIA.', - contextWindow: 131072, // 128K tokens - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 0.3, output: 0.3 }, - }, - - // Codestral - { - idPrefix: 'codestral-2405', - label: 'Codestral (2405)', - description: 'State-of-the-art Mistral model trained specifically for code tasks.', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 1, output: 3 }, - }, - { - idPrefix: 'codestral-latest', - label: 'Mistral Large (latest)', - symLink: 'mistral-codestral-2405', - hidden: true, - // copied - description: 'State-of-the-art Mistral model trained specifically for code tasks.', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 1, output: 3 }, - }, - - // Large - { - idPrefix: 'mistral-large-2407', - label: 'Mistral Large 2 (2407)', - description: 'Top-tier reasoning for high-complexity tasks, for your most sophisticated needs.', - contextWindow: 131072, // 128K tokens - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 3, output: 9 }, - }, - { - idPrefix: 'mistral-large-2402', - label: 'Mistral Large (2402)', - description: 'Top-tier reasoning for high-complexity tasks.', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 4, output: 12 }, - benchmark: { cbaElo: 1159 }, - }, - { - idPrefix: 'mistral-large-latest', - label: 'Mistral Large (latest)', - symLink: 'mistral-large-2407', - hidden: true, - // copied - description: 'Top-tier reasoning for high-complexity tasks, for your most sophisticated needs.', - contextWindow: 131072, // 128K tokens - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 3, output: 9 }, - benchmark: { cbaElo: 1159 }, - }, - - // Open Mixtral (8x22B) - { - idPrefix: 'open-mixtral-8x22b-2404', - label: 'Open Mixtral 8x22B (2404)', - description: 'Mixtral 8x22B model', - contextWindow: 65536, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 2, output: 6 }, - }, - { - idPrefix: 'open-mixtral-8x22b', - label: 'Open Mixtral 8x22B', - symLink: 'open-mixtral-8x22b-2404', - hidden: true, - // copied - description: 'Mixtral 8x22B model', - contextWindow: 65536, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 2, output: 6 }, - }, - // Medium (Deprecated) - { - idPrefix: 'mistral-medium-2312', - label: 'Mistral Medium (2312)', - description: 'Ideal for intermediate tasks that require moderate reasoning (Data extraction, Summarizing a Document, Writing emails, Writing a Job Description, or Writing Product Descriptions)', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 2.7, output: 8.1 }, - benchmark: { cbaElo: 1148 }, - isLegacy: true, - hidden: true, - }, - { - idPrefix: 'mistral-medium-latest', - label: 'Mistral Medium (latest)', - symLink: 'mistral-medium-2312', - // copied - description: 'Ideal for intermediate tasks that require moderate reasoning (Data extraction, Summarizing a Document, Writing emails, Writing a Job Description, or Writing Product Descriptions)', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 2.7, output: 8.1 }, - benchmark: { cbaElo: 1148 }, - isLegacy: true, - hidden: true, - }, - { - idPrefix: 'mistral-medium', - label: 'Mistral Medium', - symLink: 'mistral-medium-2312', - // copied - description: 'Ideal for intermediate tasks that require moderate reasoning (Data extraction, Summarizing a Document, Writing emails, Writing a Job Description, or Writing Product Descriptions)', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 2.7, output: 8.1 }, - benchmark: { cbaElo: 1148 }, - isLegacy: true, - hidden: true, - }, - - // Open Mixtral (8x7B) -> currently points to `mistral-small-2312` (as per the docs) - { - idPrefix: 'open-mixtral-8x7b', - label: 'Open Mixtral (8x7B)', - description: 'A sparse mixture of experts model. As such, it leverages up to 45B parameters but only uses about 12B during inference, leading to better inference throughput at the cost of more vRAM.', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 0.7, output: 0.7 }, - }, - // Small (deprecated) - { - idPrefix: 'mistral-small-2402', - label: 'Mistral Small (2402)', - description: 'Suitable for simple tasks that one can do in bulk (Classification, Customer Support, or Text Generation)', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 1, output: 3 }, - hidden: true, - isLegacy: true, - }, - { - idPrefix: 'mistral-small-latest', - label: 'Mistral Small (latest)', - symLink: 'mistral-small-2402', - // copied - description: 'Suitable for simple tasks that one can do in bulk (Classification, Customer Support, or Text Generation)', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 1, output: 3 }, - hidden: true, - isLegacy: true, - }, - { - idPrefix: 'mistral-small-2312', - label: 'Mistral Small (2312)', - description: 'Aka open-mixtral-8x7b. Suitable for simple tasks that one can do in bulk (Classification, Customer Support, or Text Generation)', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 1, output: 3 }, - hidden: true, - isLegacy: true, - }, - { - idPrefix: 'mistral-small', - label: 'Mistral Small', - symLink: 'mistral-small-2312', - // copied - description: 'Aka open-mixtral-8x7b. Suitable for simple tasks that one can do in bulk (Classification, Customer Support, or Text Generation)', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 1, output: 3 }, - hidden: true, - isLegacy: true, - }, - - - // Open Mistral (7B) -> currently points to mistral-tiny-2312 (as per the docs) - { - idPrefix: 'open-mistral-7b', - label: 'Open Mistral (7B)', - description: 'The first dense model released by Mistral AI, perfect for experimentation, customization, and quick iteration. At the time of the release, it matched the capabilities of models up to 30B parameters.', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - chatPrice: { input: 0.25, output: 0.25 }, - }, - // Tiny (deprecated) - { - idPrefix: 'mistral-tiny-2312', - label: 'Mistral Tiny (2312)', - description: 'Aka open-mistral-7b. Used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - hidden: true, - isLegacy: true, - }, - { - idPrefix: 'mistral-tiny', - label: 'Mistral Tiny', - symLink: 'mistral-tiny-2312', - // copied - description: 'Aka open-mistral-7b. Used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], - hidden: true, - isLegacy: true, - }, - - { - idPrefix: 'mistral-embed', - label: 'Mistral Embed', - description: 'A model that converts text into numerical vectors of embeddings in 1024 dimensions. Embedding models enable retrieval and retrieval-augmented generation applications.', - maxCompletionTokens: 1024, // HACK - it's 1024 dimensions, but those are not 'completion tokens' - contextWindow: 8192, // Updated context window - interfaces: [], - chatPrice: { input: 0.1, output: 0.1 }, - hidden: true, - }, -]; - -const mistralModelFamilyOrder = [ - 'codestral', 'mistral-large', 'open-mixtral-8x22b', 'mistral-medium', 'open-mixtral-8x7b', 'mistral-small', 'open-mistral-7b', 'mistral-tiny', 'mistral-embed', '🔗', -]; - -export function mistralModelToModelDescription(_model: unknown): ModelDescriptionSchema { - const model = wireMistralModelsListOutputSchema.parse(_model); - return fromManualMapping(_knownMistralChatModels, model.id, model.created, undefined, { - idPrefix: model.id, - label: model.id.replaceAll(/[_-]/g, ' '), - description: 'New Mistral Model', - contextWindow: 32768, - interfaces: [LLM_IF_OAI_Chat], // assume.. - hidden: true, - }); -} - -export function mistralModelsSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number { - if (a.label.startsWith('🔗') && !b.label.startsWith('🔗')) return 1; - if (!a.label.startsWith('🔗') && b.label.startsWith('🔗')) return -1; - const aPrefixIndex = mistralModelFamilyOrder.findIndex(prefix => a.id.startsWith(prefix)); - const bPrefixIndex = mistralModelFamilyOrder.findIndex(prefix => b.id.startsWith(prefix)); - if (aPrefixIndex !== -1 && bPrefixIndex !== -1) { - if (aPrefixIndex !== bPrefixIndex) - return aPrefixIndex - bPrefixIndex; - return b.label.localeCompare(a.label); - } - return aPrefixIndex !== -1 ? 1 : -1; -} +// [Mistral] moved to own file // [OpenPipe] @@ -1262,7 +1009,7 @@ export function groqModelSortFn(a: ModelDescriptionSchema, b: ModelDescriptionSc // Helpers -type ManualMapping = ({ +export type ManualMapping = ({ idPrefix: string, isLatest?: boolean, isPreview?: boolean, @@ -1270,9 +1017,9 @@ type ManualMapping = ({ symLink?: string } & Omit); -type ManualMappings = ManualMapping[]; +export type ManualMappings = ManualMapping[]; -function fromManualMapping(mappings: ManualMappings, id: string, created?: number, updated?: number, fallback?: ManualMapping, disableSymLink?: boolean): ModelDescriptionSchema { +export function fromManualMapping(mappings: ManualMappings, id: string, created?: number, updated?: number, fallback?: ManualMapping, disableSymLink?: boolean): ModelDescriptionSchema { // find the closest known model, or fall back, or take the last const known = mappings.find(base => id === base.idPrefix) diff --git a/src/modules/llms/server/openai/openai.router.ts b/src/modules/llms/server/openai/openai.router.ts index c33de6fe2..8014c18c6 100644 --- a/src/modules/llms/server/openai/openai.router.ts +++ b/src/modules/llms/server/openai/openai.router.ts @@ -13,14 +13,11 @@ import { fixupHost } from '~/common/util/urlUtils'; import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes'; import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types'; -import { azureModelToModelDescription, deepseekModelToModelDescription, groqModelSortFn, groqModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, mistralModelsSort, mistralModelToModelDescription, openAIModelFilter, openAIModelToModelDescription, openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions, openRouterModelFamilySortFn, openRouterModelToModelDescription, perplexityAIModelDescriptions, perplexityAIModelSort, togetherAIModelsToModelDescriptions } from './models.data'; +import { azureModelToModelDescription, deepseekModelToModelDescription, groqModelSortFn, groqModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, openAIModelFilter, openAIModelToModelDescription, openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions, openRouterModelFamilySortFn, openRouterModelToModelDescription, perplexityAIModelDescriptions, perplexityAIModelSort, togetherAIModelsToModelDescriptions } from './models/models.data'; +import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models'; import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes'; -// module configuration -const ABERRATION_FIXUP_SQUASH = '\n\n\n---\n\n\n'; - - const openAIDialects = z.enum([ 'azure', 'deepseek', 'groq', 'lmstudio', 'localai', 'mistral', 'openai', 'openpipe', 'openrouter', 'perplexity', 'togetherai', ]);