mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
LLMs: sync scores
This commit is contained in:
@@ -77,7 +77,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high', 'xhigh', 'max'] },
|
||||
...ANT_TOOLS_DYNAMIC,
|
||||
],
|
||||
// benchmark: { cbaElo: ... }, // TBD
|
||||
benchmark: { cbaElo: 1504 }, // claude-opus-4-7-thinking
|
||||
},
|
||||
|
||||
// Claude 4.6 models with thinking variants
|
||||
@@ -92,7 +92,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAntInfSpeed' },
|
||||
...ANT_TOOLS_DYNAMIC,
|
||||
],
|
||||
// benchmark: { cbaElo: ... }, // TBD
|
||||
benchmark: { cbaElo: 1502 }, // claude-opus-4-6-thinking
|
||||
},
|
||||
|
||||
'claude-sonnet-4-6': {
|
||||
@@ -105,7 +105,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high', 'max'] },
|
||||
...ANT_TOOLS_DYNAMIC,
|
||||
],
|
||||
// benchmark: { cbaElo: ... }, // TBD
|
||||
benchmark: { cbaElo: 1463 + 1 }, // 1 (thinking) + claude-sonnet-4-6
|
||||
},
|
||||
|
||||
// Claude 4.5 models with thinking variants
|
||||
@@ -119,7 +119,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
benchmark: { cbaElo: 1468 }, // claude-opus-4-5-20251101-thinking-32k
|
||||
benchmark: { cbaElo: 1473 }, // claude-opus-4-5-20251101-thinking-32k
|
||||
maxCompletionTokens: 32000,
|
||||
},
|
||||
|
||||
@@ -134,7 +134,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAnt1MContext' },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
benchmark: { cbaElo: 1450 }, // claude-sonnet-4-5-20250929-thinking-32k
|
||||
benchmark: { cbaElo: 1452 }, // claude-sonnet-4-5-20250929-thinking-32k
|
||||
},
|
||||
|
||||
'claude-haiku-4-5-20251001': {
|
||||
@@ -147,6 +147,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAntThinkingBudget' },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
benchmark: { cbaElo: 1408 + 1 }, // 1 (thinking) + claude-haiku-4-5-20251001
|
||||
},
|
||||
|
||||
// Claude 4.1 models with thinking variants
|
||||
@@ -160,7 +161,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAntThinkingBudget' },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
benchmark: { cbaElo: 1448 }, // claude-opus-4-1-20250805-thinking-16k
|
||||
benchmark: { cbaElo: 1449 }, // claude-opus-4-1-20250805-thinking-16k
|
||||
},
|
||||
|
||||
// Claude 4 models with thinking variants
|
||||
@@ -189,7 +190,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAnt1MContext' },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
benchmark: { cbaElo: 1400 }, // claude-sonnet-4-20250514-thinking-32k
|
||||
benchmark: { cbaElo: 1399 }, // claude-sonnet-4-20250514-thinking-32k
|
||||
},
|
||||
|
||||
// Changes to the thinking variant (same model ID) for the Claude Sonnet 3.7 model
|
||||
@@ -203,7 +204,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
{ paramId: 'llmVndAntThinkingBudget' },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
benchmark: { cbaElo: 1389 }, // claude-3-7-sonnet-20250219-thinking-32k
|
||||
benchmark: { cbaElo: 1387 }, // claude-3-7-sonnet-20250219-thinking-32k
|
||||
},
|
||||
|
||||
} as const;
|
||||
@@ -231,7 +232,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
// Breaking changes vs 4.6: extended thinking budgets removed (adaptive-only), temperature/top_p/top_k rejected,
|
||||
// thinking content omitted by default, new tokenizer (~1x to 1.35x tokens for same text), no prefill.
|
||||
chatPrice: { input: 5, output: 25, cache: { cType: 'ant-bp', read: 0.50, write: 6.25, duration: 300 } },
|
||||
// benchmark: { cbaElo: ... }, // TBD
|
||||
benchmark: { cbaElo: 1497 }, // claude-opus-4-7
|
||||
},
|
||||
|
||||
// Claude 4.6 models
|
||||
@@ -249,7 +250,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
],
|
||||
// Opus 4.6: flat $5/$25 pricing (1M context GA at standard pricing since 2026-03-13, no opt-in required)
|
||||
chatPrice: { input: 5, output: 25, cache: { cType: 'ant-bp', read: 0.50, write: 6.25, duration: 300 } },
|
||||
// benchmark: { cbaElo: ... }, // TBD
|
||||
benchmark: { cbaElo: 1496 }, // claude-opus-4-6
|
||||
},
|
||||
{
|
||||
id: 'claude-sonnet-4-6', // Active
|
||||
@@ -264,7 +265,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
],
|
||||
// Sonnet 4.6: flat $3/$15 pricing (1M context GA at standard pricing since 2026-03-13, no opt-in required)
|
||||
chatPrice: { input: 3, output: 15, cache: { cType: 'ant-bp', read: 0.30, write: 3.75, duration: 300 } },
|
||||
// benchmark: { cbaElo: ... }, // TBD
|
||||
benchmark: { cbaElo: 1463 }, // claude-sonnet-4-6
|
||||
},
|
||||
|
||||
// Claude 4.5 models
|
||||
@@ -280,7 +281,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
chatPrice: { input: 5, output: 25, cache: { cType: 'ant-bp', read: 0.50, write: 6.25, duration: 300 } },
|
||||
benchmark: { cbaElo: 1466 }, // claude-opus-4-5-20251101
|
||||
benchmark: { cbaElo: 1469 }, // claude-opus-4-5-20251101
|
||||
},
|
||||
{
|
||||
id: 'claude-sonnet-4-5-20250929', // Active
|
||||
@@ -305,7 +306,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
duration: 300,
|
||||
},
|
||||
},
|
||||
benchmark: { cbaElo: 1450 }, // claude-sonnet-4-5-20250929
|
||||
benchmark: { cbaElo: 1452 }, // claude-sonnet-4-5-20250929
|
||||
},
|
||||
{
|
||||
id: 'claude-haiku-4-5-20251001', // Active
|
||||
@@ -316,7 +317,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
interfaces: IF_4,
|
||||
parameterSpecs: ANT_TOOLS,
|
||||
chatPrice: { input: 1, output: 5, cache: { cType: 'ant-bp', read: 0.10, write: 1.25, duration: 300 } },
|
||||
benchmark: { cbaElo: 1403 }, // claude-haiku-4-5-20251001
|
||||
benchmark: { cbaElo: 1408 }, // claude-haiku-4-5-20251001
|
||||
},
|
||||
|
||||
// Claude 4.1 models
|
||||
@@ -329,7 +330,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
interfaces: IF_4,
|
||||
parameterSpecs: ANT_TOOLS,
|
||||
chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
|
||||
benchmark: { cbaElo: 1445 }, // claude-opus-4-1-20250805
|
||||
benchmark: { cbaElo: 1447 }, // claude-opus-4-1-20250805
|
||||
},
|
||||
|
||||
// Claude 4 models
|
||||
@@ -343,7 +344,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
interfaces: IF_4,
|
||||
parameterSpecs: ANT_TOOLS,
|
||||
chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
|
||||
benchmark: { cbaElo: 1414 }, // claude-opus-4-20250514
|
||||
benchmark: { cbaElo: 1412 }, // claude-opus-4-20250514
|
||||
isLegacy: true,
|
||||
},
|
||||
{
|
||||
@@ -370,7 +371,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
duration: 300,
|
||||
},
|
||||
},
|
||||
benchmark: { cbaElo: 1390 }, // claude-sonnet-4-20250514
|
||||
benchmark: { cbaElo: 1389 }, // claude-sonnet-4-20250514
|
||||
isLegacy: true,
|
||||
},
|
||||
|
||||
@@ -384,7 +385,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
interfaces: IF_4,
|
||||
parameterSpecs: ANT_TOOLS,
|
||||
chatPrice: { input: 3, output: 15, cache: { cType: 'ant-bp', read: 0.30, write: 3.75, duration: 300 } },
|
||||
benchmark: { cbaElo: 1372 }, // claude-3-7-sonnet-20250219
|
||||
benchmark: { cbaElo: 1371 }, // claude-3-7-sonnet-20250219
|
||||
hidden: true, // retired
|
||||
isLegacy: true,
|
||||
},
|
||||
@@ -401,7 +402,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
interfaces: IF_4,
|
||||
parameterSpecs: ANT_TOOLS,
|
||||
chatPrice: { input: 0.80, output: 4.00, cache: { cType: 'ant-bp', read: 0.08, write: 1.00, duration: 300 } },
|
||||
benchmark: { cbaElo: 1324 }, // claude-3-5-haiku-20241022
|
||||
benchmark: { cbaElo: 1323 }, // claude-3-5-haiku-20241022
|
||||
hidden: true, // retired
|
||||
isLegacy: true,
|
||||
},
|
||||
@@ -417,7 +418,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: IF_4,
|
||||
chatPrice: { input: 0.25, output: 1.25, cache: { cType: 'ant-bp', read: 0.03, write: 0.30, duration: 300 } },
|
||||
benchmark: { cbaElo: 1262 }, // claude-3-haiku-20240307
|
||||
benchmark: { cbaElo: 1260 }, // claude-3-haiku-20240307
|
||||
isLegacy: true,
|
||||
},
|
||||
|
||||
|
||||
@@ -208,7 +208,7 @@ const _knownGeminiModels: ({
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
// { paramId: 'llmVndGeminiComputerUse' }, // we don't have the logic to handle this yet
|
||||
],
|
||||
benchmark: undefined, // too new for CBA ELO (released Feb 19, 2026)
|
||||
benchmark: { cbaElo: 1493 }, // gemini-3.1-pro-preview
|
||||
},
|
||||
// 3.1 Pro (Preview) - Custom Tools variant - Released February 19, 2026
|
||||
// Better at prioritizing custom tools for users building with a mix of bash and tools
|
||||
@@ -225,7 +225,7 @@ const _knownGeminiModels: ({
|
||||
{ paramId: 'llmVndGeminiCodeExecution' },
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
],
|
||||
benchmark: undefined,
|
||||
benchmark: { cbaElo: 1493 - 1 }, // -1 (deprio this variant) + gemini-3.1-pro-preview
|
||||
},
|
||||
|
||||
// 3.1 Flash Image Preview - Released February 26, 2026
|
||||
@@ -259,7 +259,7 @@ const _knownGeminiModels: ({
|
||||
{ paramId: 'llmVndGeminiCodeExecution' },
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
],
|
||||
benchmark: undefined, // too new (released March 3, 2026)
|
||||
benchmark: { cbaElo: 1438 }, // gemini-3.1-flash-lite-preview
|
||||
},
|
||||
|
||||
|
||||
@@ -280,7 +280,7 @@ const _knownGeminiModels: ({
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
// { paramId: 'llmVndGeminiComputerUse' }, // we don't have the logic to handle this yet
|
||||
],
|
||||
benchmark: { cbaElo: 1487 }, // gemini-3-pro
|
||||
benchmark: { cbaElo: 1486 }, // gemini-3-pro
|
||||
},
|
||||
|
||||
// 3.0 Pro Image Preview - Released November 20, 2025
|
||||
@@ -331,7 +331,7 @@ const _knownGeminiModels: ({
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
// { paramId: 'llmVndGeminiComputerUse' }, // we don't have the logic to handle this yet
|
||||
],
|
||||
benchmark: { cbaElo: 1471 }, // gemini-3-flash
|
||||
benchmark: { cbaElo: 1474 }, // gemini-3-flash
|
||||
},
|
||||
|
||||
/// Generation 2.5
|
||||
@@ -350,7 +350,7 @@ const _knownGeminiModels: ({
|
||||
},
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
],
|
||||
benchmark: { cbaElo: 1450 }, // gemini-2.5-pro
|
||||
benchmark: { cbaElo: 1448 }, // gemini-2.5-pro
|
||||
},
|
||||
|
||||
// REMOVED MODELS (no longer returned by API as of Jan 8, 2026):
|
||||
@@ -403,7 +403,7 @@ const _knownGeminiModels: ({
|
||||
{ paramId: 'llmVndGeminiThinkingBudget' },
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
],
|
||||
benchmark: { cbaElo: 1409 }, // gemini-2.5-flash
|
||||
benchmark: { cbaElo: 1411 }, // gemini-2.5-flash
|
||||
},
|
||||
|
||||
// REMOVED MODELS (no longer returned by API as of Nov 20, 2025):
|
||||
@@ -559,7 +559,7 @@ const _knownGeminiModels: ({
|
||||
deprecated: '2026-06-01',
|
||||
chatPrice: gemini20FlashPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_GEM_CodeExecution],
|
||||
benchmark: { cbaElo: 1361 }, // gemini-2.0-flash-001
|
||||
benchmark: { cbaElo: 1360 }, // gemini-2.0-flash-001
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-2.0-flash',
|
||||
@@ -568,7 +568,7 @@ const _knownGeminiModels: ({
|
||||
// copied from symlink
|
||||
chatPrice: gemini20FlashPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_GEM_CodeExecution],
|
||||
benchmark: { cbaElo: 1361 }, // gemini-2.0-flash
|
||||
benchmark: { cbaElo: 1360 }, // gemini-2.0-flash
|
||||
},
|
||||
|
||||
// 2.0 Flash Lite - DEPRECATED: shutdown June 1, 2026 (announced Feb 18, 2026)
|
||||
@@ -625,6 +625,7 @@ const _knownGeminiModels: ({
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_Sys0ToUsr0],
|
||||
parameterSpecs: [{ paramId: 'llmVndGemEffort', enumValues: ['minimal', 'high'] }],
|
||||
chatPrice: geminiExpFree, // Free tier only according to pricing page
|
||||
benchmark: { cbaElo: 1451 }, // gemma-4-31b
|
||||
},
|
||||
{
|
||||
hidden: true, // smaller MoE variant
|
||||
@@ -633,6 +634,7 @@ const _knownGeminiModels: ({
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_Sys0ToUsr0],
|
||||
parameterSpecs: [{ paramId: 'llmVndGemEffort', enumValues: ['minimal', 'high'] }],
|
||||
chatPrice: geminiExpFree, // Free tier only according to pricing page
|
||||
benchmark: { cbaElo: 1439 }, // gemma-4-26b-a4b
|
||||
},
|
||||
|
||||
// Gemma 3n Model (newer than 3, first seen on the May 2025 update)
|
||||
@@ -641,7 +643,7 @@ const _knownGeminiModels: ({
|
||||
isPreview: true,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_Sys0ToUsr0],
|
||||
chatPrice: geminiExpFree, // Free tier only according to pricing page
|
||||
benchmark: { cbaElo: 1319 }, // gemma-3n-e4b-it
|
||||
benchmark: { cbaElo: 1318 }, // gemma-3n-e4b-it
|
||||
},
|
||||
{
|
||||
id: 'models/gemma-3n-e2b-it',
|
||||
@@ -659,7 +661,7 @@ const _knownGeminiModels: ({
|
||||
isPreview: true,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_Sys0ToUsr0],
|
||||
chatPrice: geminiExpFree, // Pricing page indicates free tier only
|
||||
benchmark: { cbaElo: 1365 }, // gemma-3-27b-it
|
||||
benchmark: { cbaElo: 1366 }, // gemma-3-27b-it
|
||||
// hidden: true, // Keep visible if it's a distinct offering
|
||||
},
|
||||
{
|
||||
|
||||
@@ -22,7 +22,7 @@ const _knownDeepseekChatModels: ManualMappings = [
|
||||
// ],
|
||||
maxCompletionTokens: 32768, // default, max: 65536
|
||||
chatPrice: { input: 0.28, output: 0.42, cache: { cType: 'oai-ac', read: 0.028 } },
|
||||
benchmark: { cbaElo: 1412 }, //deepseek-v3.2-exp-thinking
|
||||
benchmark: { cbaElo: 1425 }, // deepseek-v3.2-exp-thinking
|
||||
},
|
||||
{
|
||||
idPrefix: 'deepseek-chat',
|
||||
@@ -32,7 +32,7 @@ const _knownDeepseekChatModels: ManualMappings = [
|
||||
interfaces: IF_3,
|
||||
maxCompletionTokens: 8192, // default is 4096, max is 8192
|
||||
chatPrice: { input: 0.28, output: 0.42, cache: { cType: 'oai-ac', read: 0.028 } },
|
||||
benchmark: { cbaElo: 1420 }, // deepseek-v3.2
|
||||
benchmark: { cbaElo: 1424 }, // deepseek-v3.2
|
||||
},
|
||||
];
|
||||
|
||||
|
||||
@@ -25,16 +25,16 @@ const _knownMistralModelDetails: Record<string, {
|
||||
}> = {
|
||||
|
||||
// Premier models - Mistral 3 (Dec 2025)
|
||||
'mistral-large-2512': { chatPrice: { input: 0.5, output: 1.5 }, benchmark: { cbaElo: 1414 } }, // Mistral Large 3 - MoE 41B active / 675B total
|
||||
'mistral-large-2512': { chatPrice: { input: 0.5, output: 1.5 }, benchmark: { cbaElo: 1415 } }, // Mistral Large 3 - MoE 41B active / 675B total
|
||||
'mistral-large-2411': { chatPrice: { input: 2, output: 6 }, benchmark: { cbaElo: 1305 }, hidden: true }, // older version
|
||||
'mistral-large-latest': { chatPrice: { input: 0.5, output: 1.5 }, hidden: true }, // → 2512
|
||||
|
||||
'mistral-medium-2508': { chatPrice: { input: 0.4, output: 2 }, benchmark: { cbaElo: 1412 } }, // Mistral Medium 3
|
||||
'mistral-medium-2505': { chatPrice: { input: 0.4, output: 2 }, benchmark: { cbaElo: 1384 }, hidden: true }, // older version
|
||||
'mistral-medium-2508': { chatPrice: { input: 0.4, output: 2 }, benchmark: { cbaElo: 1410 } }, // Mistral Medium 3
|
||||
'mistral-medium-2505': { chatPrice: { input: 0.4, output: 2 }, benchmark: { cbaElo: 1387 }, hidden: true }, // older version
|
||||
'mistral-medium-latest': { chatPrice: { input: 0.4, output: 2 }, hidden: true }, // → 2508
|
||||
'mistral-medium': { chatPrice: { input: 0.4, output: 2 }, hidden: true }, // symlink
|
||||
|
||||
'magistral-medium-2509': { chatPrice: { input: 2, output: 5 }, benchmark: { cbaElo: 1305 } }, // reasoning
|
||||
'magistral-medium-2509': { chatPrice: { input: 2, output: 5 }, benchmark: { cbaElo: 1304 } }, // reasoning (leaderboard: magistral-medium-2506 = 1304)
|
||||
'magistral-medium-latest': { chatPrice: { input: 2, output: 5 }, hidden: true }, // symlink
|
||||
|
||||
'devstral-2512': { label: 'Devstral 2 (2512)', chatPrice: { input: 0.4, output: 2 } }, // Devstral 2 - 123B coding agents (API returns "Mistral Vibe Cli")
|
||||
@@ -70,7 +70,7 @@ const _knownMistralModelDetails: Record<string, {
|
||||
|
||||
// Open models
|
||||
'mistral-small-2603': { chatPrice: { input: 0.15, output: 0.6 } }, // Mistral Small 4 - 119B hybrid (instruct+reasoning+coding), 256k ctx
|
||||
'mistral-small-2506': { chatPrice: { input: 0.1, output: 0.3 }, benchmark: { cbaElo: 1356 }, hidden: true }, // Mistral Small 3.2
|
||||
'mistral-small-2506': { chatPrice: { input: 0.1, output: 0.3 }, benchmark: { cbaElo: 1357 }, hidden: true }, // Mistral Small 3.2
|
||||
'mistral-small-latest': { chatPrice: { input: 0.15, output: 0.6 }, hidden: true }, // → 2603
|
||||
|
||||
'labs-mistral-small-creative': { label: 'Mistral Small Creative', chatPrice: { input: 0.1, output: 0.3 } }, // creative writing, roleplay (Labs)
|
||||
|
||||
@@ -42,7 +42,7 @@ const _knownMoonshotModels: ManualMappings = [
|
||||
interfaces: IF_K2_5,
|
||||
parameterSpecs: _PS_Reasoning,
|
||||
chatPrice: { input: 0.60, output: 3.00, cache: { cType: 'oai-ac', read: 0.10 } },
|
||||
benchmark: { cbaElo: 1450 }, // kimi-k2.5-thinking
|
||||
benchmark: { cbaElo: 1451 }, // kimi-k2.5-thinking
|
||||
},
|
||||
|
||||
// Kimi K2 Series - Latest Models
|
||||
@@ -57,7 +57,7 @@ const _knownMoonshotModels: ManualMappings = [
|
||||
interfaces: IF_K2_REASON,
|
||||
// parameterSpecs: [{ paramId: 'llmVndMoonshotWebSearch' }], // NOT WORKING YET
|
||||
chatPrice: { input: 1.15, output: 8.00, cache: { cType: 'oai-ac', read: 0.15 } },
|
||||
benchmark: { cbaElo: 1429 }, // kimi-k2-thinking-turbo
|
||||
benchmark: { cbaElo: 1430 }, // kimi-k2-thinking-turbo
|
||||
},
|
||||
// Thinking
|
||||
{
|
||||
|
||||
@@ -107,7 +107,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
chatPrice: { input: 2.5, cache: { cType: 'oai-ac', read: 0.25 }, output: 15 },
|
||||
benchmark: { cbaElo: 1481 }, // gpt-5.4-high (preliminary)
|
||||
benchmark: { cbaElo: 1482 }, // gpt-5.4-high
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5.4',
|
||||
@@ -156,7 +156,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
chatPrice: { input: 0.75, cache: { cType: 'oai-ac', read: 0.075 }, output: 4.5 },
|
||||
// benchmark: TBD
|
||||
benchmark: { cbaElo: 1458 }, // gpt-5.4-mini-high
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5.4-mini',
|
||||
@@ -181,7 +181,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
chatPrice: { input: 0.2, cache: { cType: 'oai-ac', read: 0.02 }, output: 1.25 },
|
||||
// benchmark: TBD
|
||||
benchmark: { cbaElo: 1404 }, // gpt-5.4-nano-high
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5.4-nano',
|
||||
@@ -242,7 +242,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmVndOaiCodeInterpreter' },
|
||||
],
|
||||
chatPrice: { input: 1.75, cache: { cType: 'oai-ac', read: 0.175 }, output: 14 },
|
||||
// benchmark: TBD
|
||||
benchmark: { cbaElo: 1451 }, // gpt-5.3-chat-latest
|
||||
},
|
||||
|
||||
|
||||
@@ -265,7 +265,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
chatPrice: { input: 1.75, cache: { cType: 'oai-ac', read: 0.175 }, output: 14 },
|
||||
benchmark: { cbaElo: 1440 }, // gpt-5.2-high
|
||||
benchmark: { cbaElo: 1441 }, // gpt-5.2-high
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5.2',
|
||||
@@ -306,7 +306,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmVndOaiCodeInterpreter' },
|
||||
],
|
||||
chatPrice: { input: 1.75, cache: { cType: 'oai-ac', read: 0.175 }, output: 14 },
|
||||
// benchmark: TBD
|
||||
benchmark: { cbaElo: 1477 }, // gpt-5.2-chat-latest-20260210
|
||||
},
|
||||
|
||||
// GPT-5.2 Pro
|
||||
@@ -352,7 +352,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
chatPrice: { input: 1.25, cache: { cType: 'oai-ac', read: 0.125 }, output: 10 },
|
||||
benchmark: { cbaElo: 1459 }, // gpt-5.1-high
|
||||
benchmark: { cbaElo: 1455 }, // gpt-5.1-high
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5.1',
|
||||
@@ -450,7 +450,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmForceNoStream' }, // non-streaming option for unverified organizations
|
||||
],
|
||||
chatPrice: { input: 1.25, cache: { cType: 'oai-ac', read: 0.125 }, output: 10 },
|
||||
benchmark: { cbaElo: 1435 }, // gpt-5-high
|
||||
benchmark: { cbaElo: 1433 }, // gpt-5-high
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5',
|
||||
@@ -553,7 +553,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }],
|
||||
chatPrice: { input: 0.05, cache: { cType: 'oai-ac', read: 0.005 }, output: 0.4 },
|
||||
benchmark: { cbaElo: 1338 }, // gpt-5-nano-high
|
||||
benchmark: { cbaElo: 1337 }, // gpt-5-nano-high
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5-nano',
|
||||
@@ -575,7 +575,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
// chatPrice: TBD - unknown pricing
|
||||
benchmark: { cbaElo: 1354 }, // gpt-oss-120b
|
||||
benchmark: { cbaElo: 1353 }, // gpt-oss-120b
|
||||
},
|
||||
|
||||
|
||||
@@ -630,7 +630,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
interfaces: IFS_CHAT_CACHE_REASON,
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
|
||||
chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.275 }, output: 4.4 },
|
||||
benchmark: { cbaElo: 1391 }, // o4-mini-2025-04-16
|
||||
benchmark: { cbaElo: 1390 }, // o4-mini-2025-04-16
|
||||
},
|
||||
{
|
||||
idPrefix: 'o4-mini',
|
||||
@@ -683,7 +683,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
interfaces: IFS_CHAT_CACHE_REASON,
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmForceNoStream' }],
|
||||
chatPrice: { input: 2, cache: { cType: 'oai-ac', read: 0.5 }, output: 8 },
|
||||
benchmark: { cbaElo: 1433 }, // o3-2025-04-16
|
||||
benchmark: { cbaElo: 1431 }, // o3-2025-04-16
|
||||
},
|
||||
{
|
||||
idPrefix: 'o3',
|
||||
@@ -892,7 +892,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: IFS_CHAT_MIN,
|
||||
chatPrice: { input: 5, output: 15 },
|
||||
benchmark: { cbaElo: 1346 }, // gpt-4o-2024-05-13
|
||||
benchmark: { cbaElo: 1345 }, // gpt-4o-2024-05-13
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4o',
|
||||
@@ -961,7 +961,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 16384,
|
||||
interfaces: IFS_CHAT_CACHE,
|
||||
chatPrice: { input: 0.15, cache: { cType: 'oai-ac', read: 0.075 }, output: 0.6 },
|
||||
benchmark: { cbaElo: 1318 }, // gpt-4o-mini-2024-07-18
|
||||
benchmark: { cbaElo: 1317 }, // gpt-4o-mini-2024-07-18
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4o-mini',
|
||||
@@ -1015,7 +1015,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: IFS_CHAT_MIN,
|
||||
chatPrice: { input: 10, output: 30 },
|
||||
benchmark: { cbaElo: 1325 }, // gpt-4-turbo-2024-04-09
|
||||
benchmark: { cbaElo: 1324 }, // gpt-4-turbo-2024-04-09
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-turbo',
|
||||
@@ -1031,7 +1031,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
chatPrice: { input: 10, output: 30 },
|
||||
benchmark: { cbaElo: 1314 }, // gpt-4-0125-preview
|
||||
benchmark: { cbaElo: 1313 }, // gpt-4-0125-preview
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-1106-preview', // GPT-4 Turbo preview model
|
||||
@@ -1042,7 +1042,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
chatPrice: { input: 10, output: 30 },
|
||||
benchmark: { cbaElo: 1314 }, // gpt-4-1106-preview
|
||||
benchmark: { cbaElo: 1312 }, // gpt-4-1106-preview
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-turbo-preview',
|
||||
@@ -1060,7 +1060,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
chatPrice: { input: 30, output: 60 },
|
||||
benchmark: { cbaElo: 1276 }, // gpt-4-0613
|
||||
benchmark: { cbaElo: 1274 }, // gpt-4-0613
|
||||
isLegacy: true,
|
||||
},
|
||||
{
|
||||
@@ -1071,7 +1071,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
chatPrice: { input: 30, output: 60 },
|
||||
benchmark: { cbaElo: 1288 }, // gpt-4-0314
|
||||
benchmark: { cbaElo: 1286 }, // gpt-4-0314
|
||||
isLegacy: true,
|
||||
},
|
||||
{
|
||||
@@ -1094,7 +1094,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
chatPrice: { input: 0.5, output: 1.5 },
|
||||
benchmark: { cbaElo: 1225 }, // gpt-3.5-turbo-0125
|
||||
benchmark: { cbaElo: 1223 }, // gpt-3.5-turbo-0125
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo-1106',
|
||||
@@ -1105,7 +1105,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
chatPrice: { input: 1, output: 2 },
|
||||
benchmark: { cbaElo: 1204 }, // gpt-3.5-turbo-1106
|
||||
benchmark: { cbaElo: 1202 }, // gpt-3.5-turbo-1106
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo',
|
||||
|
||||
@@ -92,7 +92,7 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
interfaces: [...XAI_IF_Vision, LLM_IF_OAI_Reasoning],
|
||||
parameterSpecs: XAI_PAR_Reasoning,
|
||||
chatPrice: PRICE_420,
|
||||
benchmark: { cbaElo: 1481 }, // grok-4.20-beta-0309-reasoning (CBA name)
|
||||
benchmark: { cbaElo: 1480 }, // grok-4.20-beta-0309-reasoning (CBA name)
|
||||
},
|
||||
{
|
||||
idPrefix: 'grok-4.20-0309-non-reasoning',
|
||||
@@ -103,7 +103,7 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
interfaces: XAI_IF_Vision,
|
||||
parameterSpecs: XAI_PAR,
|
||||
chatPrice: PRICE_420,
|
||||
benchmark: { cbaElo: 1492 }, // grok-4.20-beta1 (CBA name, preliminary)
|
||||
benchmark: { cbaElo: 1482 }, // grok-4.20-beta1 (CBA name)
|
||||
},
|
||||
{
|
||||
idPrefix: 'grok-4.20-multi-agent-0309',
|
||||
@@ -118,6 +118,7 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
...XAI_PAR_Reasoning,
|
||||
],
|
||||
chatPrice: PRICE_420,
|
||||
benchmark: { cbaElo: 1474 }, // grok-4.20-multi-agent-beta-0309
|
||||
},
|
||||
|
||||
// Grok 4.1
|
||||
@@ -130,7 +131,7 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
interfaces: [...XAI_IF_Vision, LLM_IF_OAI_Reasoning],
|
||||
parameterSpecs: XAI_PAR_Reasoning,
|
||||
chatPrice: PRICE_41,
|
||||
benchmark: { cbaElo: 1430 }, // grok-4-1-fast-reasoning
|
||||
benchmark: { cbaElo: 1432 }, // grok-4-1-fast-reasoning
|
||||
},
|
||||
{
|
||||
idPrefix: 'grok-4-1-fast-non-reasoning',
|
||||
@@ -141,7 +142,7 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
interfaces: XAI_IF_Vision,
|
||||
parameterSpecs: XAI_PAR,
|
||||
chatPrice: PRICE_41,
|
||||
benchmark: { cbaElo: 1466 }, // grok-4.1
|
||||
benchmark: { cbaElo: 1461 }, // grok-4.1
|
||||
},
|
||||
|
||||
// Grok 4
|
||||
@@ -167,6 +168,7 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
interfaces: XAI_IF_Vision,
|
||||
parameterSpecs: XAI_PAR,
|
||||
chatPrice: PRICE_40,
|
||||
benchmark: { cbaElo: 1421 }, // grok-4-fast-chat
|
||||
},
|
||||
{
|
||||
hidden: true, // yield to 4.20
|
||||
@@ -191,7 +193,7 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
interfaces: XAI_IF_Pre4,
|
||||
parameterSpecs: XAI_PAR_Pre4,
|
||||
chatPrice: { input: 3, output: 15, cache: { cType: 'oai-ac', read: 0.75 } },
|
||||
benchmark: { cbaElo: 1411 }, // grok-3-preview-02-24
|
||||
benchmark: { cbaElo: 1412 }, // grok-3-preview-02-24
|
||||
},
|
||||
{
|
||||
idPrefix: 'grok-3-mini',
|
||||
|
||||
Reference in New Issue
Block a user