mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Merge branch 'enricoros:v2-dev' into v2-dev
This commit is contained in:
@@ -356,7 +356,13 @@ export function prettyShortChatModelName(model: string | undefined): string {
|
||||
if (model.includes(':'))
|
||||
return model.replace(':latest', '').replaceAll(':', ' ');
|
||||
// [xAI]
|
||||
if (model.includes('grok-beta')) return 'Grok Beta';
|
||||
if (model.includes('grok-')) {
|
||||
if (model.includes('grok-3')) return 'Grok 3';
|
||||
if (model.includes('grok-2-vision')) return 'Grok 2 Vision';
|
||||
if (model.includes('grok-2')) return 'Grok 2';
|
||||
if (model.includes('grok-beta')) return 'Grok Beta';
|
||||
if (model.includes('grok-vision-beta')) return 'Grok Vision Beta';
|
||||
}
|
||||
return model;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ import { _knownOpenAIChatModels } from './openai.models';
|
||||
import { wireGroqModelsListOutputSchema } from '../groq.wiretypes';
|
||||
import { wireOpenPipeModelOutputSchema } from '../openpipe.wiretypes';
|
||||
import { wireOpenrouterModelsListOutputSchema } from '../openrouter.wiretypes';
|
||||
import { wireTogetherAIListOutputSchema } from '../togetherai.wiretypes';
|
||||
|
||||
|
||||
export function azureModelToModelDescription(azureDeploymentRef: string, openAIModelIdBase: string, modelCreated: number, modelUpdated?: number): ModelDescriptionSchema {
|
||||
@@ -338,78 +337,6 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
|
||||
}
|
||||
|
||||
|
||||
// [Together AI]
|
||||
|
||||
const _knownTogetherAIChatModels: ManualMappings = [
|
||||
{
|
||||
idPrefix: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO',
|
||||
label: 'Nous Hermes 2 - Mixtral 8x7B-DPO',
|
||||
description: 'Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT',
|
||||
label: 'Nous Hermes 2 - Mixtral 8x7B-SFT',
|
||||
description: 'Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'mistralai/Mixtral-8x7B-Instruct-v0.1',
|
||||
label: 'Mixtral-8x7B Instruct',
|
||||
description: 'The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'mistralai/Mistral-7B-Instruct-v0.2',
|
||||
label: 'Mistral (7B) Instruct v0.2',
|
||||
description: 'The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'NousResearch/Nous-Hermes-2-Yi-34B',
|
||||
label: 'Nous Hermes-2 Yi (34B)',
|
||||
description: 'Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune',
|
||||
contextWindow: 4097,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
] as const;
|
||||
|
||||
export function togetherAIModelsToModelDescriptions(wireModels: unknown): ModelDescriptionSchema[] {
|
||||
|
||||
function togetherAIModelToModelDescription(model: { id: string, created: number }) {
|
||||
return fromManualMapping(_knownTogetherAIChatModels, model.id, model.created, undefined, {
|
||||
idPrefix: model.id,
|
||||
label: model.id.replaceAll('/', ' · ').replaceAll(/[_-]/g, ' '),
|
||||
description: 'New Togehter AI Model',
|
||||
contextWindow: null, // unknown
|
||||
interfaces: [LLM_IF_OAI_Chat], // assume..
|
||||
hidden: true,
|
||||
});
|
||||
}
|
||||
|
||||
function togetherAIModelsSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
|
||||
if (a.hidden && !b.hidden)
|
||||
return 1;
|
||||
if (!a.hidden && b.hidden)
|
||||
return -1;
|
||||
if (a.created !== b.created)
|
||||
return (b.created || 0) - (a.created || 0);
|
||||
return a.id.localeCompare(b.id);
|
||||
}
|
||||
|
||||
return wireTogetherAIListOutputSchema.parse(wireModels)
|
||||
.map(togetherAIModelToModelDescription)
|
||||
.sort(togetherAIModelsSort);
|
||||
}
|
||||
|
||||
|
||||
// Perplexity
|
||||
|
||||
|
||||
// Groq - https://console.groq.com/docs/models
|
||||
|
||||
const _knownGroqModels: ManualMappings = [
|
||||
|
||||
@@ -0,0 +1,283 @@
|
||||
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { wireTogetherAIListOutputSchema } from '../togetherai.wiretypes';
|
||||
|
||||
|
||||
const _knownTogetherAIChatModels: ManualMappings = [
|
||||
{
|
||||
idPrefix: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
|
||||
label: 'Llama 3.3 70B Instruct Turbo',
|
||||
description: 'Llama 3.3 70B Instruct Turbo is an advanced model from Meta with a context length of 131072 tokens, using FP8 quantization.',
|
||||
contextWindow: 131072,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo',
|
||||
label: 'Llama 3.1 8B Instruct Turbo',
|
||||
description: 'Llama 3.1 8B Instruct Turbo is an advanced model from Meta with a context length of 131072 tokens, using FP8 quantization.',
|
||||
contextWindow: 131072,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo',
|
||||
label: 'Llama 3.1 70B Instruct Turbo',
|
||||
description: 'Llama 3.1 70B Instruct Turbo is an advanced model from Meta with a context length of 131072 tokens, using FP8 quantization.',
|
||||
contextWindow: 131072,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo',
|
||||
label: 'Llama 3.1 405B Instruct Turbo',
|
||||
description: 'Llama 3.1 405B Instruct Turbo is a large-scale model from Meta with a context length of 130815 tokens, using FP8 quantization.',
|
||||
contextWindow: 130815,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo',
|
||||
label: 'Llama 3 8B Instruct Turbo',
|
||||
description: 'Llama 3 8B Instruct Turbo is an advanced model from Meta with a context length of 8192 tokens, using FP8 quantization.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Meta-Llama-3-70B-Instruct-Turbo',
|
||||
label: 'Llama 3 70B Instruct Turbo',
|
||||
description: 'Llama 3 70B Instruct Turbo is an advanced model from Meta with a context length of 8192 tokens, using FP8 quantization.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Llama-3.2-3B-Instruct-Turbo',
|
||||
label: 'Llama 3.2 3B Instruct Turbo',
|
||||
description: 'Llama 3.2 3B Instruct Turbo is a model from Meta with a context length of 131072 tokens, using FP16 precision.',
|
||||
contextWindow: 131072,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Meta-Llama-3-8B-Instruct-Lite',
|
||||
label: 'Llama 3 8B Instruct Lite',
|
||||
description: 'Llama 3 8B Instruct Lite is an efficient model from Meta with a context length of 8192 tokens, using INT4 quantization.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Meta-Llama-3-70B-Instruct-Lite',
|
||||
label: 'Llama 3 70B Instruct Lite',
|
||||
description: 'Llama 3 70B Instruct Lite is an efficient model from Meta with a context length of 8192 tokens, using INT4 quantization.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Llama-3-8b-chat-hf',
|
||||
label: 'Llama 3 8B Instruct Reference',
|
||||
description: 'Llama 3 8B Instruct Reference is a base model from Meta with a context length of 8192 tokens.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Llama-3-70b-chat-hf',
|
||||
label: 'Llama 3 70B Instruct Reference',
|
||||
description: 'Llama 3 70B Instruct Reference is a base model from Meta with a context length of 8192 tokens.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
|
||||
label: 'Llama 3.1 Nemotron 70B',
|
||||
description: 'Llama 3.1 Nemotron 70B is a model from Nvidia with a context length of 32768 tokens, using FP16 precision.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'Qwen/Qwen2.5-Coder-32B-Instruct',
|
||||
label: 'Qwen 2.5 Coder 32B Instruct',
|
||||
description: 'Qwen 2.5 Coder 32B Instruct is a model from Qwen optimized for code tasks, with a context length of 32768 tokens.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'Qwen/QwQ-32B-Preview',
|
||||
label: 'QwQ-32B-Preview',
|
||||
description: 'QwQ-32B-Preview is a preview model from Qwen with a context length of 32768 tokens.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'microsoft/WizardLM-2-8x22B',
|
||||
label: 'WizardLM-2 8x22B',
|
||||
description: 'WizardLM-2 8x22B is a model from Microsoft with a context length of 65536 tokens.',
|
||||
contextWindow: 65536,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'google/gemma-2-27b-it',
|
||||
label: 'Gemma 2 27B',
|
||||
description: 'Gemma 2 27B is a model from Google with a context length of 8192 tokens.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'google/gemma-2-9b-it',
|
||||
label: 'Gemma 2 9B',
|
||||
description: 'Gemma 2 9B is a model from Google with a context length of 8192 tokens.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'databricks/dbrx-instruct',
|
||||
label: 'DBRX Instruct',
|
||||
description: 'DBRX Instruct is a model from Databricks with a context length of 32768 tokens.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'deepseek-ai/deepseek-llm-67b-chat',
|
||||
label: 'DeepSeek LLM Chat (67B)',
|
||||
description: 'DeepSeek LLM Chat (67B) is a model from DeepSeek AI with a context length of 4096 tokens.',
|
||||
contextWindow: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'deepseek-ai/DeepSeek-V3',
|
||||
label: 'DeepSeek-V3',
|
||||
description: 'DeepSeek-V3 is a model from DeepSeek AI with a context length of 131072 tokens, using FP8 quantization.',
|
||||
contextWindow: 131072,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'google/gemma-2b-it',
|
||||
label: 'Gemma Instruct (2B)',
|
||||
description: 'Gemma Instruct (2B) is a model from Google with a context length of 8192 tokens.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'Gryphe/MythoMax-L2-13b',
|
||||
label: 'MythoMax-L2 (13B)',
|
||||
description: 'MythoMax-L2 (13B) is a model from Gryphe with a context length of 4096 tokens.',
|
||||
contextWindow: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'meta-llama/Llama-2-13b-chat-hf',
|
||||
label: 'LLaMA-2 Chat (13B)',
|
||||
description: 'LLaMA-2 Chat (13B) is a model from Meta with a context length of 4096 tokens.',
|
||||
contextWindow: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'mistralai/Mistral-7B-Instruct-v0.1',
|
||||
label: 'Mistral (7B) Instruct',
|
||||
description: 'Mistral (7B) Instruct v0.1 is the initial instruct fine-tuned version of the Mistral 7B model.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'mistralai/Mistral-7B-Instruct-v0.2',
|
||||
label: 'Mistral (7B) Instruct v0.2',
|
||||
description: 'Mistral (7B) Instruct v0.2 is an improved version with extended context length.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'mistralai/Mistral-7B-Instruct-v0.3',
|
||||
label: 'Mistral (7B) Instruct v0.3',
|
||||
description: 'Mistral (7B) Instruct v0.3 is the latest version of Mistral 7B Instruct.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'mistralai/Mixtral-8x7B-Instruct-v0.1',
|
||||
label: 'Mixtral-8x7B Instruct (46.7B)',
|
||||
description: 'Mixtral-8x7B Instruct is a model from Mistral AI with a context length of 32768 tokens.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'mistralai/Mixtral-8x22B-Instruct-v0.1',
|
||||
label: 'Mixtral-8x22B Instruct (141B)',
|
||||
description: 'Mixtral-8x22B Instruct is a larger model from Mistral AI with a context length of 65536 tokens.',
|
||||
contextWindow: 65536,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO',
|
||||
label: 'Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B)',
|
||||
description: 'Nous Hermes 2 - Mixtral 8x7B-DPO is a model from NousResearch with a context length of 32768 tokens.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT',
|
||||
label: 'Nous Hermes 2 - Mixtral 8x7B-SFT',
|
||||
description: 'Nous Hermes 2 Mixtral 8x7B-SFT is a model from Nous Research trained over the Mixtral 8x7B MoE LLM.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'NousResearch/Nous-Hermes-2-Yi-34B',
|
||||
label: 'Nous Hermes-2 Yi (34B)',
|
||||
description: 'Nous Hermes 2 - Yi-34B is a state-of-the-art model from Nous Research.',
|
||||
contextWindow: 4097,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'Qwen/Qwen2.5-7B-Instruct-Turbo',
|
||||
label: 'Qwen 2.5 7B Instruct Turbo',
|
||||
description: 'Qwen 2.5 7B Instruct Turbo is a model from Qwen with a context length of 32768 tokens, using FP8 quantization.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'Qwen/Qwen2.5-72B-Instruct-Turbo',
|
||||
label: 'Qwen 2.5 72B Instruct Turbo',
|
||||
description: 'Qwen 2.5 72B Instruct Turbo is a model from Qwen with a context length of 32768 tokens, using FP8 quantization.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'Qwen/Qwen2-72B-Instruct',
|
||||
label: 'Qwen 2 Instruct (72B)',
|
||||
description: 'Qwen 2 Instruct (72B) is a model from Qwen with a context length of 32768 tokens.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'upstage/SOLAR-10.7B-Instruct-v1.0',
|
||||
label: 'Upstage SOLAR Instruct v1 (11B)',
|
||||
description: 'Upstage SOLAR Instruct v1 is a model from Upstage with a context length of 4096 tokens.',
|
||||
contextWindow: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
] as const;
|
||||
|
||||
|
||||
export function togetherAIModelsToModelDescriptions(wireModels: unknown): ModelDescriptionSchema[] {
|
||||
|
||||
function togetherAIModelToModelDescription(model: { id: string, created: number }) {
|
||||
return fromManualMapping(_knownTogetherAIChatModels, model.id, model.created, undefined, {
|
||||
idPrefix: model.id,
|
||||
label: model.id.replaceAll('/', ' · ').replaceAll(/[_-]/g, ' '),
|
||||
description: 'New Together AI Model',
|
||||
contextWindow: null, // unknown
|
||||
interfaces: [LLM_IF_OAI_Chat], // assume
|
||||
hidden: true,
|
||||
});
|
||||
}
|
||||
|
||||
function togetherAIModelsSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
|
||||
if (a.hidden && !b.hidden)
|
||||
return 1;
|
||||
if (!a.hidden && b.hidden)
|
||||
return -1;
|
||||
if (a.created !== b.created)
|
||||
return (b.created || 0) - (a.created || 0);
|
||||
return a.id.localeCompare(b.id);
|
||||
}
|
||||
|
||||
return wireTogetherAIListOutputSchema.parse(wireModels)
|
||||
.map(togetherAIModelToModelDescription)
|
||||
.sort(togetherAIModelsSort);
|
||||
}
|
||||
@@ -5,18 +5,47 @@ import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, ManualMapping, ManualMappings } from './models.data';
|
||||
import { openAIAccess, OpenAIAccessSchema } from '../openai.router';
|
||||
|
||||
|
||||
// Known xAI Models - Manual Mappings
|
||||
// List on: https://console.x.ai/team/_TEAM_ID_/models
|
||||
const _knownXAIChatModels: ManualMappings = [
|
||||
|
||||
{
|
||||
idPrefix: 'grok-2-vision-1212',
|
||||
label: `Grok 2 Vision (1212)`,
|
||||
description: 'xAI model grok-2-vision-1212 with image and text input capabilities. Supports text generation with an 8,192 token context window.',
|
||||
contextWindow: 8192,
|
||||
maxCompletionTokens: undefined,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision],
|
||||
chatPrice: { input: 2, output: 10 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'grok-2-1212',
|
||||
label: `Grok 2 (1212)`,
|
||||
description: 'xAI model grok-2-1212 with text input capabilities. Supports text generation with a 131,072 token context window.',
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: undefined,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
chatPrice: { input: 2, output: 10 },
|
||||
},
|
||||
|
||||
{
|
||||
idPrefix: 'grok-vision-beta',
|
||||
label: `Grok Vision Beta`,
|
||||
description: 'xAI model grok-vision-beta with image and text input capabilities. Supports text generation with an 8,192 token context window.',
|
||||
contextWindow: 8192,
|
||||
maxCompletionTokens: undefined,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision],
|
||||
chatPrice: { input: 5, output: 15 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'grok-beta',
|
||||
label: `Grok Beta`,
|
||||
description: 'xAI\'s flagship model with real-time knowledge from the X platform. Supports text generation with a 131K token context window.',
|
||||
contextWindow: 131072, // 131,072 tokens as shown in the Context column
|
||||
description: 'xAI\'s flagship model with real-time knowledge from the X platform. Supports text generation with a 131,072 token context window.',
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: 16384,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
chatPrice: { input: 5, output: 15 },
|
||||
@@ -33,23 +62,62 @@ export async function xaiModelDescriptions(access: OpenAIAccessSchema): Promise<
|
||||
|
||||
const xaiModels = wireXAIModelsListSchema.parse(modelsResponse);
|
||||
|
||||
return xaiModels.models.map(model => fromManualMapping(_knownXAIChatModels, model.id, model.created, undefined, {
|
||||
idPrefix: model.id,
|
||||
label: `${model.id} ${model.version || ''}`, // {{Created}}`,
|
||||
description: `xAI model ${model.id}`,
|
||||
contextWindow: 16384,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, ...(model.input_modalities?.includes('image') ? [LLM_IF_OAI_Vision] : [])],
|
||||
...(model.prompt_text_token_price && model.completion_text_token_price && {
|
||||
chatPrice: {
|
||||
input: model.prompt_text_token_price / 10000, // FIXME: SCALE UNKNOWN for now
|
||||
output: model.completion_text_token_price / 10000,
|
||||
},
|
||||
}),
|
||||
}));
|
||||
return xaiModels.models.reduce((acc, xm) => {
|
||||
|
||||
// Fallback for unknown models
|
||||
const unknownModelFallback: ManualMapping = {
|
||||
idPrefix: xm.id,
|
||||
label: `${xm.id}${xm.version ? ' ' + xm.version : ''}`,
|
||||
description: `xAI model ${xm.id}`,
|
||||
contextWindow: 16384,
|
||||
interfaces: [
|
||||
LLM_IF_OAI_Chat,
|
||||
LLM_IF_OAI_Fn,
|
||||
...(xm.input_modalities?.includes('image') ? [LLM_IF_OAI_Vision] : []),
|
||||
],
|
||||
...(xm.prompt_text_token_price != null && xm.completion_text_token_price != null && {
|
||||
chatPrice: {
|
||||
input: xm.prompt_text_token_price / 10000, // Scaling factor applied as per API data
|
||||
output: xm.completion_text_token_price / 10000,
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
// xAI model description
|
||||
const modelDescription = fromManualMapping(_knownXAIChatModels, xm.id, xm.created, undefined, unknownModelFallback);
|
||||
acc.push(modelDescription);
|
||||
|
||||
// NOTE: disabled, as this is not useful
|
||||
// if there are aliases, add them as 'symlinked' models
|
||||
// if (xm.aliases?.length) {
|
||||
// xm.aliases.forEach((alias) => {
|
||||
// const aliasedModel = fromManualMapping([{
|
||||
// idPrefix: alias,
|
||||
// label: alias,
|
||||
// symLink: xm.id,
|
||||
// description: `xAI model ${alias}`,
|
||||
// contextWindow: 16384,
|
||||
// interfaces: unknownModelFallback.interfaces,
|
||||
// }], alias, xm.created, xm.updated, unknownModelFallback);
|
||||
// acc.push(aliasedModel);
|
||||
// });
|
||||
// }
|
||||
|
||||
return acc;
|
||||
}, [] as ModelDescriptionSchema[]);
|
||||
}
|
||||
|
||||
// manual sort order
|
||||
const _xaiLabelStartsWithOrder = ['Grok 3', 'Grok 2', 'Grok'];
|
||||
|
||||
export function xaiModelSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
|
||||
return b.label.localeCompare(a.label);
|
||||
const aStartsWith = _xaiLabelStartsWithOrder.findIndex((prefix) => a.label.startsWith(prefix));
|
||||
const bStartsWith = _xaiLabelStartsWithOrder.findIndex((prefix) => b.label.startsWith(prefix));
|
||||
|
||||
if (aStartsWith !== bStartsWith)
|
||||
return aStartsWith - bStartsWith;
|
||||
|
||||
return a.label.localeCompare(b.label);
|
||||
}
|
||||
|
||||
|
||||
@@ -71,15 +139,16 @@ export const wireXAIModelSchema = z.object({
|
||||
version: z.string().optional(),
|
||||
|
||||
// modalities
|
||||
input_modalities: z.array(z.string()), // relaxing it
|
||||
output_modalities: z.array(z.string()), // relaxing it
|
||||
// input_modalities: z.array(z.enum(['text'])),
|
||||
// output_modalities: z.array(z.enum(['text'])),
|
||||
input_modalities: z.array(z.string()), // 'text', 'image', etc.
|
||||
output_modalities: z.array(z.string()), // 'text', 'image', etc.
|
||||
|
||||
// pricing - FIXME: SCALE UNKNOWN for now
|
||||
prompt_text_token_price: z.number().optional(),
|
||||
prompt_image_token_price: z.number().optional(),
|
||||
completion_text_token_price: z.number().optional(),
|
||||
|
||||
// Aliases for models
|
||||
aliases: z.array(z.string()).optional(),
|
||||
});
|
||||
|
||||
export const wireXAIModelsListSchema = z.object({
|
||||
|
||||
@@ -13,11 +13,12 @@ import { fixupHost } from '~/common/util/urlUtils';
|
||||
import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
|
||||
|
||||
import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types';
|
||||
import { azureModelToModelDescription, groqModelSortFn, groqModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions, openRouterModelFamilySortFn, openRouterModelToModelDescription, togetherAIModelsToModelDescriptions } from './models/models.data';
|
||||
import { azureModelToModelDescription, groqModelSortFn, groqModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/models.data';
|
||||
import { deepseekModelFilter, deepseekModelToModelDescription } from './models/deepseek.models';
|
||||
import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models';
|
||||
import { openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
|
||||
import { perplexityAIModelDescriptions, perplexityAIModelSort } from './models/perplexity.models';
|
||||
import { togetherAIModelsToModelDescriptions } from './models/together.models';
|
||||
import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
|
||||
import { xaiModelDescriptions, xaiModelSort } from './models/xai.models';
|
||||
|
||||
|
||||
Reference in New Issue
Block a user