Groq: update models

2026-05-10 21:50:14 -07:00 · 2025-01-28 22:19:49 -08:00
parent e1d8dabd3d
commit 57f2ca6460
3 changed files with 125 additions and 71 deletions
@@ -9,5 +9,7 @@ export const wireGroqModelsListOutputSchema = z.object({
  created: z.number(),
  owned_by: z.string(),
  active: z.boolean(),
+  context_window: z.number(),
+  // public_apps: z.any(),
 });

@@ -1,4 +1,4 @@
-import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types';
+import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
 import { fromManualMapping, ManualMappings } from './models.data';
@@ -12,95 +12,146 @@ import { wireGroqModelsListOutputSchema } from '../groq.wiretypes';
 */
 const _knownGroqModels: ManualMappings = [
  {
-    isLatest: true,
-    idPrefix: 'llama-3.1-405b-reasoning',
-    label: 'Llama 3.1 · 405B',
-    description: 'LLaMA 3.1 405B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
+    isPreview: true,
+    idPrefix: 'deepseek-r1-distill-llama-70b',
+    label: 'DeepSeek R1 Distill Llama 70B (Preview)',
+    description: 'DeepSeek R1 Distill Llama 70B with a context window of 131,072 tokens. Preview model.',
    contextWindow: 131072,
-    maxCompletionTokens: 8000,
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
-  },
-  {
-    isLatest: true,
-    idPrefix: 'llama-3.1-70b-versatile',
-    label: 'Llama 3.1 · 70B',
-    description: 'LLaMA 3.1 70B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
-    contextWindow: 131072,
-    maxCompletionTokens: 8000,
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
-  },
-  {
-    isLatest: true,
-    idPrefix: 'llama-3.1-8b-instant',
-    label: 'Llama 3.1 · 8B',
-    description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
-    contextWindow: 131072,
-    maxCompletionTokens: 8000,
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
-  },
-  {
-    idPrefix: 'llama3-groq-70b-8192-tool-use-preview',
-    label: 'Llama 3 Groq · 70B Tool Use',
-    description: 'LLaMA 3 70B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
-    contextWindow: 8192,
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
-  },
-  {
-    idPrefix: 'llama3-groq-8b-8192-tool-use-preview',
-    label: 'Llama 3 Groq · 8B Tool Use',
-    description: 'LLaMA 3 8B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
-    contextWindow: 8192,
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
-  },
-  {
-    idPrefix: 'llama3-70b-8192',
-    label: 'Llama 3 · 70B',
-    description: 'LLaMA3 70B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
-    contextWindow: 8192,
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
-    // isLegacy: true,
-    hidden: true,
-  },
-  {
-    idPrefix: 'llama3-8b-8192',
-    label: 'Llama 3 · 8B',
-    description: 'LLaMA3 8B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
-    contextWindow: 8192,
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
-    // isLegacy: true,
-    hidden: true,
-  },
-  {
-    idPrefix: 'mixtral-8x7b-32768',
-    label: 'Mixtral 8x7B',
-    description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Supports tool use.',
-    contextWindow: 32768,
    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
  },
  {
    idPrefix: 'gemma2-9b-it',
    label: 'Gemma 2 · 9B Instruct',
-    description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Supports tool use.',
+    description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Production model.',
    contextWindow: 8192,
    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.20, output: 0.20 },
  },
  {
-    idPrefix: 'gemma-7b-it',
-    label: 'Gemma 1.1 · 7B Instruct',
-    description: 'Gemma 7B developed by Google with a context window of 8,192 tokens. Supports tool use.',
+    idPrefix: 'llama-3.3-70b-versatile',
+    label: 'Llama 3.3 · 70B Versatile',
+    description: 'LLaMA 3.3 70B developed by Meta with a context window of 131,072 tokens. Production model.',
+    contextWindow: 131072,
+    maxCompletionTokens: 32768,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.59, output: 0.79 },
+  },
+  {
+    idPrefix: 'llama-3.1-8b-instant',
+    label: 'Llama 3.1 · 8B Instant',
+    description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Production model.',
+    contextWindow: 131072,
+    maxCompletionTokens: 8192,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.05, output: 0.08 },
+  },
+  {
+    idPrefix: 'llama-guard-3-8b',
+    label: 'Llama Guard 3 · 8B',
+    description: 'LLaMA Guard 3 8B developed by Meta with a context window of 8,192 tokens.',
    contextWindow: 8192,
-    interfaces: [LLM_IF_OAI_Chat],
-    hidden: true,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.20, output: 0.20 },
+  },
+  {
+    idPrefix: 'llama3-70b-8192',
+    label: 'Llama 3 · 70B',
+    description: 'LLaMA 3 70B developed by Meta with a context window of 8,192 tokens. Production model.',
+    contextWindow: 8192,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.59, output: 0.79 },
+  },
+  {
+    idPrefix: 'llama3-8b-8192',
+    label: 'Llama 3 · 8B',
+    description: 'LLaMA 3 8B developed by Meta with a context window of 8,192 tokens. Production model.',
+    contextWindow: 8192,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.05, output: 0.08 },
+  },
+  {
+    idPrefix: 'mixtral-8x7b-32768',
+    label: 'Mixtral 8x7B Instruct 32k',
+    description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Production model.',
+    contextWindow: 32768,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.24, output: 0.24 },
+  },
+
+  // Preview Models
+  {
+    isPreview: true,
+    idPrefix: 'llama-3.3-70b-specdec',
+    label: 'Llama 3.3 · 70B SpecDec (Preview)',
+    description: 'LLaMA 3.3 70B SpecDec with a context window of 8,192 tokens. Preview model.',
+    contextWindow: 8192,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.59, output: 0.99 },
+  },
+  {
+    isPreview: true,
+    idPrefix: 'llama-3.2-1b-preview',
+    label: 'Llama 3.2 · 1B (Preview)',
+    description: 'LLaMA 3.2 1B with a context window of 131,072 tokens. Preview model.',
+    contextWindow: 131072,
+    maxCompletionTokens: 8192,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.04, output: 0.04 },
+  },
+  {
+    isPreview: true,
+    idPrefix: 'llama-3.2-3b-preview',
+    label: 'Llama 3.2 · 3B (Preview)',
+    description: 'LLaMA 3.2 3B with a context window of 131,072 tokens. Preview model.',
+    contextWindow: 131072,
+    maxCompletionTokens: 8192,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
+    chatPrice: { input: 0.06, output: 0.06 },
+  },
+  {
+    isPreview: true,
+    idPrefix: 'llama-3.2-11b-vision-preview',
+    label: 'Llama 3.2 · 11B Vision (Preview)',
+    description: 'Vision model, 8,192 tokens context. Preview model.',
+    contextWindow: 8192,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
+    chatPrice: { input: 0.18, output: 0.18 },
+  },
+  {
+    isPreview: true,
+    idPrefix: 'llama-3.2-90b-vision-preview',
+    label: 'Llama 3.2 · 90B Vision (Preview)',
+    description: 'Vision model, 8,192 tokens context. Preview model.',
+    contextWindow: 8192,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
+    chatPrice: { input: 0.90, output: 0.90 },
  },
 ];

+
+const groqDenyList: string[] = [
+  'whisper-',
+];
+
+export function groqModelFilter(model: { id: string }): boolean {
+  return !groqDenyList.some(prefix => model.id.includes(prefix));
+}
+
 export function groqModelToModelDescription(_model: unknown): ModelDescriptionSchema {
  const model = wireGroqModelsListOutputSchema.parse(_model);
+
+  // warn if the context window parsed is different than the mapped
+  const knownModel = _knownGroqModels.find(base => model.id.startsWith(base.idPrefix));
+  if (!knownModel)
+    console.log(`groq.models: unknown model ${model.id}`, model);
+  if (knownModel && model.context_window !== knownModel.contextWindow)
+    console.warn(`groq.models: context window mismatch for ${model.id}: expected ${model.context_window} !== ${knownModel.contextWindow}`);
+
  return fromManualMapping(_knownGroqModels, model.id, model.created, undefined, {
    idPrefix: model.id,
    label: model.id.replaceAll(/[_-]/g, ' '),
    description: 'New Model',
-    contextWindow: 32768,
+    contextWindow: model.context_window || 32768,
    interfaces: [LLM_IF_OAI_Chat],
    hidden: true,
  });
@@ -15,7 +15,7 @@ import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWi
 import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types';
 import { azureModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/models.data';
 import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models';
-import { groqModelSortFn, groqModelToModelDescription } from './models/groq.models';
+import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './models/groq.models';
 import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models';
 import { openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
 import { perplexityAIModelDescriptions, perplexityAIModelSort } from './models/perplexity.models';
@@ -162,6 +162,7 @@ export const llmOpenAIRouter = createTRPCRouter({

        case 'groq':
          models = openAIModels
+            .filter(groqModelFilter)
            .map(groqModelToModelDescription)
            .sort(groqModelSortFn);
          break;