diff --git a/src/modules/llms/server/openai/groq.wiretypes.ts b/src/modules/llms/server/openai/groq.wiretypes.ts index 1ac6f168d..752d78d21 100644 --- a/src/modules/llms/server/openai/groq.wiretypes.ts +++ b/src/modules/llms/server/openai/groq.wiretypes.ts @@ -9,5 +9,7 @@ export const wireGroqModelsListOutputSchema = z.object({ created: z.number(), owned_by: z.string(), active: z.boolean(), + context_window: z.number(), + // public_apps: z.any(), }); diff --git a/src/modules/llms/server/openai/models/groq.models.ts b/src/modules/llms/server/openai/models/groq.models.ts index eefd3e59c..df47bafdb 100644 --- a/src/modules/llms/server/openai/models/groq.models.ts +++ b/src/modules/llms/server/openai/models/groq.models.ts @@ -1,4 +1,4 @@ -import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types'; +import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; import { fromManualMapping, ManualMappings } from './models.data'; @@ -12,95 +12,146 @@ import { wireGroqModelsListOutputSchema } from '../groq.wiretypes'; */ const _knownGroqModels: ManualMappings = [ { - isLatest: true, - idPrefix: 'llama-3.1-405b-reasoning', - label: 'Llama 3.1 · 405B', - description: 'LLaMA 3.1 405B developed by Meta with a context window of 131,072 tokens. Supports tool use.', + isPreview: true, + idPrefix: 'deepseek-r1-distill-llama-70b', + label: 'DeepSeek R1 Distill Llama 70B (Preview)', + description: 'DeepSeek R1 Distill Llama 70B with a context window of 131,072 tokens. Preview model.', contextWindow: 131072, - maxCompletionTokens: 8000, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - }, - { - isLatest: true, - idPrefix: 'llama-3.1-70b-versatile', - label: 'Llama 3.1 · 70B', - description: 'LLaMA 3.1 70B developed by Meta with a context window of 131,072 tokens. Supports tool use.', - contextWindow: 131072, - maxCompletionTokens: 8000, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - }, - { - isLatest: true, - idPrefix: 'llama-3.1-8b-instant', - label: 'Llama 3.1 · 8B', - description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Supports tool use.', - contextWindow: 131072, - maxCompletionTokens: 8000, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - }, - { - idPrefix: 'llama3-groq-70b-8192-tool-use-preview', - label: 'Llama 3 Groq · 70B Tool Use', - description: 'LLaMA 3 70B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.', - contextWindow: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - }, - { - idPrefix: 'llama3-groq-8b-8192-tool-use-preview', - label: 'Llama 3 Groq · 8B Tool Use', - description: 'LLaMA 3 8B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.', - contextWindow: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - }, - { - idPrefix: 'llama3-70b-8192', - label: 'Llama 3 · 70B', - description: 'LLaMA3 70B developed by Meta with a context window of 8,192 tokens. Supports tool use.', - contextWindow: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - // isLegacy: true, - hidden: true, - }, - { - idPrefix: 'llama3-8b-8192', - label: 'Llama 3 · 8B', - description: 'LLaMA3 8B developed by Meta with a context window of 8,192 tokens. Supports tool use.', - contextWindow: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - // isLegacy: true, - hidden: true, - }, - { - idPrefix: 'mixtral-8x7b-32768', - label: 'Mixtral 8x7B', - description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Supports tool use.', - contextWindow: 32768, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], }, { idPrefix: 'gemma2-9b-it', label: 'Gemma 2 · 9B Instruct', - description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Supports tool use.', + description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Production model.', contextWindow: 8192, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.20, output: 0.20 }, }, { - idPrefix: 'gemma-7b-it', - label: 'Gemma 1.1 · 7B Instruct', - description: 'Gemma 7B developed by Google with a context window of 8,192 tokens. Supports tool use.', + idPrefix: 'llama-3.3-70b-versatile', + label: 'Llama 3.3 · 70B Versatile', + description: 'LLaMA 3.3 70B developed by Meta with a context window of 131,072 tokens. Production model.', + contextWindow: 131072, + maxCompletionTokens: 32768, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.59, output: 0.79 }, + }, + { + idPrefix: 'llama-3.1-8b-instant', + label: 'Llama 3.1 · 8B Instant', + description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Production model.', + contextWindow: 131072, + maxCompletionTokens: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.05, output: 0.08 }, + }, + { + idPrefix: 'llama-guard-3-8b', + label: 'Llama Guard 3 · 8B', + description: 'LLaMA Guard 3 8B developed by Meta with a context window of 8,192 tokens.', contextWindow: 8192, - interfaces: [LLM_IF_OAI_Chat], - hidden: true, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.20, output: 0.20 }, + }, + { + idPrefix: 'llama3-70b-8192', + label: 'Llama 3 · 70B', + description: 'LLaMA 3 70B developed by Meta with a context window of 8,192 tokens. Production model.', + contextWindow: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.59, output: 0.79 }, + }, + { + idPrefix: 'llama3-8b-8192', + label: 'Llama 3 · 8B', + description: 'LLaMA 3 8B developed by Meta with a context window of 8,192 tokens. Production model.', + contextWindow: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.05, output: 0.08 }, + }, + { + idPrefix: 'mixtral-8x7b-32768', + label: 'Mixtral 8x7B Instruct 32k', + description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Production model.', + contextWindow: 32768, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.24, output: 0.24 }, + }, + + // Preview Models + { + isPreview: true, + idPrefix: 'llama-3.3-70b-specdec', + label: 'Llama 3.3 · 70B SpecDec (Preview)', + description: 'LLaMA 3.3 70B SpecDec with a context window of 8,192 tokens. Preview model.', + contextWindow: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.59, output: 0.99 }, + }, + { + isPreview: true, + idPrefix: 'llama-3.2-1b-preview', + label: 'Llama 3.2 · 1B (Preview)', + description: 'LLaMA 3.2 1B with a context window of 131,072 tokens. Preview model.', + contextWindow: 131072, + maxCompletionTokens: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.04, output: 0.04 }, + }, + { + isPreview: true, + idPrefix: 'llama-3.2-3b-preview', + label: 'Llama 3.2 · 3B (Preview)', + description: 'LLaMA 3.2 3B with a context window of 131,072 tokens. Preview model.', + contextWindow: 131072, + maxCompletionTokens: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.06, output: 0.06 }, + }, + { + isPreview: true, + idPrefix: 'llama-3.2-11b-vision-preview', + label: 'Llama 3.2 · 11B Vision (Preview)', + description: 'Vision model, 8,192 tokens context. Preview model.', + contextWindow: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], + chatPrice: { input: 0.18, output: 0.18 }, + }, + { + isPreview: true, + idPrefix: 'llama-3.2-90b-vision-preview', + label: 'Llama 3.2 · 90B Vision (Preview)', + description: 'Vision model, 8,192 tokens context. Preview model.', + contextWindow: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], + chatPrice: { input: 0.90, output: 0.90 }, }, ]; + +const groqDenyList: string[] = [ + 'whisper-', +]; + +export function groqModelFilter(model: { id: string }): boolean { + return !groqDenyList.some(prefix => model.id.includes(prefix)); +} + export function groqModelToModelDescription(_model: unknown): ModelDescriptionSchema { const model = wireGroqModelsListOutputSchema.parse(_model); + + // warn if the context window parsed is different than the mapped + const knownModel = _knownGroqModels.find(base => model.id.startsWith(base.idPrefix)); + if (!knownModel) + console.log(`groq.models: unknown model ${model.id}`, model); + if (knownModel && model.context_window !== knownModel.contextWindow) + console.warn(`groq.models: context window mismatch for ${model.id}: expected ${model.context_window} !== ${knownModel.contextWindow}`); + return fromManualMapping(_knownGroqModels, model.id, model.created, undefined, { idPrefix: model.id, label: model.id.replaceAll(/[_-]/g, ' '), description: 'New Model', - contextWindow: 32768, + contextWindow: model.context_window || 32768, interfaces: [LLM_IF_OAI_Chat], hidden: true, }); diff --git a/src/modules/llms/server/openai/openai.router.ts b/src/modules/llms/server/openai/openai.router.ts index 129a9714c..3a35244f9 100644 --- a/src/modules/llms/server/openai/openai.router.ts +++ b/src/modules/llms/server/openai/openai.router.ts @@ -15,7 +15,7 @@ import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWi import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types'; import { azureModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/models.data'; import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models'; -import { groqModelSortFn, groqModelToModelDescription } from './models/groq.models'; +import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './models/groq.models'; import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models'; import { openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models'; import { perplexityAIModelDescriptions, perplexityAIModelSort } from './models/perplexity.models'; @@ -162,6 +162,7 @@ export const llmOpenAIRouter = createTRPCRouter({ case 'groq': models = openAIModels + .filter(groqModelFilter) .map(groqModelToModelDescription) .sort(groqModelSortFn); break;