Groq: update models

This commit is contained in:
Enrico Ros
2025-01-28 22:19:49 -08:00
parent e1d8dabd3d
commit 57f2ca6460
3 changed files with 125 additions and 71 deletions
@@ -9,5 +9,7 @@ export const wireGroqModelsListOutputSchema = z.object({
created: z.number(),
owned_by: z.string(),
active: z.boolean(),
context_window: z.number(),
// public_apps: z.any(),
});
@@ -1,4 +1,4 @@
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
@@ -12,95 +12,146 @@ import { wireGroqModelsListOutputSchema } from '../groq.wiretypes';
*/
const _knownGroqModels: ManualMappings = [
{
isLatest: true,
idPrefix: 'llama-3.1-405b-reasoning',
label: 'Llama 3.1 · 405B',
description: 'LLaMA 3.1 405B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
isPreview: true,
idPrefix: 'deepseek-r1-distill-llama-70b',
label: 'DeepSeek R1 Distill Llama 70B (Preview)',
description: 'DeepSeek R1 Distill Llama 70B with a context window of 131,072 tokens. Preview model.',
contextWindow: 131072,
maxCompletionTokens: 8000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
isLatest: true,
idPrefix: 'llama-3.1-70b-versatile',
label: 'Llama 3.1 · 70B',
description: 'LLaMA 3.1 70B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
contextWindow: 131072,
maxCompletionTokens: 8000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
isLatest: true,
idPrefix: 'llama-3.1-8b-instant',
label: 'Llama 3.1 · 8B',
description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
contextWindow: 131072,
maxCompletionTokens: 8000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'llama3-groq-70b-8192-tool-use-preview',
label: 'Llama 3 Groq · 70B Tool Use',
description: 'LLaMA 3 70B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'llama3-groq-8b-8192-tool-use-preview',
label: 'Llama 3 Groq · 8B Tool Use',
description: 'LLaMA 3 8B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'llama3-70b-8192',
label: 'Llama 3 · 70B',
description: 'LLaMA3 70B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
// isLegacy: true,
hidden: true,
},
{
idPrefix: 'llama3-8b-8192',
label: 'Llama 3 · 8B',
description: 'LLaMA3 8B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
// isLegacy: true,
hidden: true,
},
{
idPrefix: 'mixtral-8x7b-32768',
label: 'Mixtral 8x7B',
description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Supports tool use.',
contextWindow: 32768,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'gemma2-9b-it',
label: 'Gemma 2 · 9B Instruct',
description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Supports tool use.',
description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Production model.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.20, output: 0.20 },
},
{
idPrefix: 'gemma-7b-it',
label: 'Gemma 1.1 · 7B Instruct',
description: 'Gemma 7B developed by Google with a context window of 8,192 tokens. Supports tool use.',
idPrefix: 'llama-3.3-70b-versatile',
label: 'Llama 3.3 · 70B Versatile',
description: 'LLaMA 3.3 70B developed by Meta with a context window of 131,072 tokens. Production model.',
contextWindow: 131072,
maxCompletionTokens: 32768,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.59, output: 0.79 },
},
{
idPrefix: 'llama-3.1-8b-instant',
label: 'Llama 3.1 · 8B Instant',
description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Production model.',
contextWindow: 131072,
maxCompletionTokens: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.05, output: 0.08 },
},
{
idPrefix: 'llama-guard-3-8b',
label: 'Llama Guard 3 · 8B',
description: 'LLaMA Guard 3 8B developed by Meta with a context window of 8,192 tokens.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.20, output: 0.20 },
},
{
idPrefix: 'llama3-70b-8192',
label: 'Llama 3 · 70B',
description: 'LLaMA 3 70B developed by Meta with a context window of 8,192 tokens. Production model.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.59, output: 0.79 },
},
{
idPrefix: 'llama3-8b-8192',
label: 'Llama 3 · 8B',
description: 'LLaMA 3 8B developed by Meta with a context window of 8,192 tokens. Production model.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.05, output: 0.08 },
},
{
idPrefix: 'mixtral-8x7b-32768',
label: 'Mixtral 8x7B Instruct 32k',
description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Production model.',
contextWindow: 32768,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.24, output: 0.24 },
},
// Preview Models
{
isPreview: true,
idPrefix: 'llama-3.3-70b-specdec',
label: 'Llama 3.3 · 70B SpecDec (Preview)',
description: 'LLaMA 3.3 70B SpecDec with a context window of 8,192 tokens. Preview model.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.59, output: 0.99 },
},
{
isPreview: true,
idPrefix: 'llama-3.2-1b-preview',
label: 'Llama 3.2 · 1B (Preview)',
description: 'LLaMA 3.2 1B with a context window of 131,072 tokens. Preview model.',
contextWindow: 131072,
maxCompletionTokens: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.04, output: 0.04 },
},
{
isPreview: true,
idPrefix: 'llama-3.2-3b-preview',
label: 'Llama 3.2 · 3B (Preview)',
description: 'LLaMA 3.2 3B with a context window of 131,072 tokens. Preview model.',
contextWindow: 131072,
maxCompletionTokens: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
chatPrice: { input: 0.06, output: 0.06 },
},
{
isPreview: true,
idPrefix: 'llama-3.2-11b-vision-preview',
label: 'Llama 3.2 · 11B Vision (Preview)',
description: 'Vision model, 8,192 tokens context. Preview model.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
chatPrice: { input: 0.18, output: 0.18 },
},
{
isPreview: true,
idPrefix: 'llama-3.2-90b-vision-preview',
label: 'Llama 3.2 · 90B Vision (Preview)',
description: 'Vision model, 8,192 tokens context. Preview model.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
chatPrice: { input: 0.90, output: 0.90 },
},
];
const groqDenyList: string[] = [
'whisper-',
];
export function groqModelFilter(model: { id: string }): boolean {
return !groqDenyList.some(prefix => model.id.includes(prefix));
}
export function groqModelToModelDescription(_model: unknown): ModelDescriptionSchema {
const model = wireGroqModelsListOutputSchema.parse(_model);
// warn if the context window parsed is different than the mapped
const knownModel = _knownGroqModels.find(base => model.id.startsWith(base.idPrefix));
if (!knownModel)
console.log(`groq.models: unknown model ${model.id}`, model);
if (knownModel && model.context_window !== knownModel.contextWindow)
console.warn(`groq.models: context window mismatch for ${model.id}: expected ${model.context_window} !== ${knownModel.contextWindow}`);
return fromManualMapping(_knownGroqModels, model.id, model.created, undefined, {
idPrefix: model.id,
label: model.id.replaceAll(/[_-]/g, ' '),
description: 'New Model',
contextWindow: 32768,
contextWindow: model.context_window || 32768,
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
});
@@ -15,7 +15,7 @@ import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWi
import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types';
import { azureModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/models.data';
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models';
import { groqModelSortFn, groqModelToModelDescription } from './models/groq.models';
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './models/groq.models';
import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models';
import { openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
import { perplexityAIModelDescriptions, perplexityAIModelSort } from './models/perplexity.models';
@@ -162,6 +162,7 @@ export const llmOpenAIRouter = createTRPCRouter({
case 'groq':
models = openAIModels
.filter(groqModelFilter)
.map(groqModelToModelDescription)
.sort(groqModelSortFn);
break;