From ff06f6f04c2853e15fc55452f4dba77ce58fbd70 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Wed, 15 Oct 2025 15:51:18 -0700 Subject: [PATCH] LLMs: Groq: update --- .../llms/server/openai/models/groq.models.ts | 141 +++++++++--------- 1 file changed, 72 insertions(+), 69 deletions(-) diff --git a/src/modules/llms/server/openai/models/groq.models.ts b/src/modules/llms/server/openai/models/groq.models.ts index 6b5964ce8..41709013b 100644 --- a/src/modules/llms/server/openai/models/groq.models.ts +++ b/src/modules/llms/server/openai/models/groq.models.ts @@ -9,6 +9,7 @@ import { wireGroqModelsListOutputSchema } from '../groq.wiretypes'; * Groq models. * - models list: https://console.groq.com/docs/models * - pricing: https://groq.com/pricing/ + * - updated: 2025-01-15 */ const _knownGroqModels: ManualMappings = [ @@ -35,68 +36,93 @@ const _knownGroqModels: ManualMappings = [ }, { isPreview: true, - idPrefix: 'deepseek-r1-distill-llama-70b', - label: 'DeepSeek R1 Distill Llama 70B (Preview)', - description: 'DeepSeek R1 Distill Llama 70B with a context window of 128K tokens. Preview model.', + idPrefix: 'qwen/qwen3-32b', + label: 'Qwen 3 · 32B (Preview)', + description: 'Qwen3 32B developed by Alibaba Cloud with a 131K token context window. Preview model.', contextWindow: 131072, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 0.75, output: 0.99 }, + chatPrice: { input: 0.29, output: 0.59 }, }, { isPreview: true, - idPrefix: 'qwen-qwq-32b', - label: 'Qwen QwQ 32B (Preview)', - description: 'Qwen QwQ 32B developed by Alibaba Cloud with a context window of 128K tokens. Preview model.', + idPrefix: 'moonshotai/kimi-k2-instruct-0905', + label: 'Kimi K2 Instruct 0905 (Preview)', + description: 'Kimi K2 Instruct 0905 1T model with a 262K token context window, up to 16,384 completion tokens. Preview model.', + contextWindow: 262144, + maxCompletionTokens: 16384, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 1.00, output: 3.00 }, + }, + { + idPrefix: 'moonshotai/kimi-k2-instruct', + label: 'Kimi K2 Instruct (Deprecated)', + description: 'Kimi K2 Instruct 1T model with a 131K token context window, up to 16,384 completion tokens. Deprecated - redirects to 0905 version. Production model.', contextWindow: 131072, + maxCompletionTokens: 16384, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 0.29, output: 0.39 }, + chatPrice: { input: 1.00, output: 3.00 }, + hidden: true, // Deprecated + }, + + + // Production Models - Compound Systems + { + idPrefix: 'groq/compound', + label: 'Compound (Production System)', + description: 'Groq\'s agentic system with web search and code execution capabilities. 131,072 token context window, up to 8,192 completion tokens. Production system.', + contextWindow: 131072, + maxCompletionTokens: 8192, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + hidden: true, // Pricing unknown }, { - isPreview: true, - idPrefix: 'mistral-saba-24b', - label: 'Mistral Saba 24B (Preview)', - description: 'Mistral Saba 24B with a context window of 32K tokens. Preview model.', - contextWindow: 32768, + idPrefix: 'groq/compound-mini', + label: 'Compound Mini (Production System)', + description: 'Lighter version of Groq\'s agentic system with web search and code execution capabilities. 131,072 token context window, up to 8,192 completion tokens. Production system.', + contextWindow: 131072, + maxCompletionTokens: 8192, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 0.79, output: 0.79 }, + hidden: true, // Pricing unknown + }, + + // Production Models - OpenAI + { + idPrefix: 'openai/gpt-oss-120b', + label: 'GPT OSS 120B', + description: 'OpenAI GPT-OSS 120B with reasoning, browser search, and code execution capabilities. 131,072 token context window, up to 65,536 completion tokens. Production model.', + contextWindow: 131072, + maxCompletionTokens: 65536, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.15, output: 0.75 }, }, { - isPreview: true, + idPrefix: 'openai/gpt-oss-20b', + label: 'GPT OSS 20B', + description: 'OpenAI GPT-OSS 20B with a 131,072 token context window and up to 65,536 completion tokens. Production model.', + contextWindow: 131072, + maxCompletionTokens: 65536, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], + chatPrice: { input: 0.10, output: 0.50 }, + }, + + // Production Models - SDAIA + { idPrefix: 'allam-2-7b', - label: 'ALLaM 2 7B (Preview)', - description: 'ALLaM 2 7B developed by Saudi Data and AI Authority (SDAIA) with a context window of 4,096 tokens. Preview model.', + label: 'ALLaM 2 · 7B', + description: 'ALLaM 2 7B bilingual Arabic-English model developed by SDAIA with a 4,096 token context window. Production model.', contextWindow: 4096, + maxCompletionTokens: 4096, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - hidden: true, // Pricing unknown - }, - { - isPreview: true, - idPrefix: 'compound-beta', - label: 'Compound Beta (Preview System)', - description: 'Groq\'s agentic system with web search and code execution capabilities. Preview system with a context window of 128K tokens, up to 8,192 completion tokens.', - contextWindow: 8192, - // maxCompletionTokens: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - hidden: true, // Pricing unknown - }, - { - isPreview: true, - idPrefix: 'compound-beta-mini', - label: 'Compound Beta Mini (Preview System)', - description: 'Lighter version of Groq\'s agentic system with web search and code execution capabilities. Preview system with a context window of 128K tokens, up to 8,192 completion tokens.', - contextWindow: 8192, - // maxCompletionTokens: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - hidden: true, // Pricing unknown + hidden: true, // Pricing pending }, - - // Production Models + // Production Models - Meta { - idPrefix: 'gemma2-9b-it', - label: 'Gemma 2 · 9B Instruct', - description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Production model.', - contextWindow: 8192, + idPrefix: 'meta-llama/llama-guard-4-12b', + label: 'Llama Guard 4 · 12B', + description: 'LLaMA Guard 4 12B developed by Meta with a 128K token context window, up to 1,024 completion tokens. Production model.', + contextWindow: 131072, + maxCompletionTokens: 1024, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], chatPrice: { input: 0.20, output: 0.20 }, }, @@ -112,36 +138,12 @@ const _knownGroqModels: ManualMappings = [ { idPrefix: 'llama-3.1-8b-instant', label: 'Llama 3.1 · 8B Instant', - description: 'LLaMA 3.1 8B developed by Meta with a context window of 128K tokens, up to 8,192 completion tokens. Production model.', + description: 'LLaMA 3.1 8B developed by Meta with a context window of 128K tokens, up to 131,072 completion tokens. Production model.', contextWindow: 131072, maxCompletionTokens: 131072, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], chatPrice: { input: 0.05, output: 0.08 }, }, - { - idPrefix: 'llama-guard-3-8b', - label: 'Llama Guard 3 · 8B', - description: 'LLaMA Guard 3 8B developed by Meta with a context window of 8,192 tokens. Production model.', - contextWindow: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 0.20, output: 0.20 }, - }, - { - idPrefix: 'llama3-70b-8192', - label: 'Llama 3 · 70B', - description: 'LLaMA 3 70B developed by Meta with a context window of 8,192 tokens. Production model.', - contextWindow: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 0.59, output: 0.79 }, - }, - { - idPrefix: 'llama3-8b-8192', - label: 'Llama 3 · 8B', - description: 'LLaMA 3 8B developed by Meta with a context window of 8,192 tokens. Production model.', - contextWindow: 8192, - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn], - chatPrice: { input: 0.05, output: 0.08 }, - }, ]; @@ -150,6 +152,7 @@ const groqDenyList: string[] = [ 'whisper-', 'playai-tts', 'distil-whisper', + 'llama-prompt-guard', // Text classification models ]; export function groqModelFilter(model: { id: string }): boolean {