LLMs: Anthropic: compress

This commit is contained in:
Enrico Ros
2025-10-18 14:18:24 -07:00
parent fbaff3bde3
commit 560a07b4fe
@@ -3,6 +3,17 @@ import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Re
import type { ModelDescriptionSchema } from '../llm.server.types';
const ANT_PAR_WEB: ModelDescriptionSchema['parameterSpecs'] = [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
] as const;
const ANT_PAR_WEB_THINKING: ModelDescriptionSchema['parameterSpecs'] = [
{ paramId: 'llmVndAntThinkingBudget', required: true, hidden: false },
...ANT_PAR_WEB,
] as const;
export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescriptionSchema> } = {
// Claude 4.5 models with thinking variants
@@ -10,11 +21,7 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Sonnet 4.5 (Thinking)',
description: 'Claude Sonnet 4.5 with extended thinking mode enabled for complex reasoning',
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget', required: true, hidden: false },
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
benchmark: { cbaElo: 1451 + 1 }, // FALLBACK-UNTIL-AVAILABLE: claude-opus-4-1-20250805-thinking-16k + 1
@@ -24,11 +31,7 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Haiku 4.5 (Thinking)',
description: 'Claude Haiku 4.5 with extended thinking mode - first Haiku model with reasoning capabilities',
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget', required: true, hidden: false },
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
},
@@ -38,11 +41,7 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Opus 4.1 (Thinking)',
description: 'Claude Opus 4.1 with extended thinking mode enabled for complex reasoning',
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget', required: true, hidden: false },
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 32000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
benchmark: { cbaElo: 1451 }, // claude-opus-4-1-20250805-thinking-16k
@@ -54,11 +53,7 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Opus 4 (Thinking)',
description: 'Claude Opus 4 with extended thinking mode enabled for complex reasoning',
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget', required: true, hidden: false },
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 32000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
benchmark: { cbaElo: 1420 }, // claude-opus-4-20250514-thinking-16k
@@ -68,11 +63,7 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Sonnet 4 (Thinking)',
description: 'Claude Sonnet 4 with extended thinking mode enabled for complex reasoning',
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget', required: true, hidden: false },
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
benchmark: { cbaElo: 1400 }, // claude-sonnet-4-20250514-thinking-32k
@@ -83,11 +74,7 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Sonnet 3.7 (Thinking)',
description: 'Claude 3.7 with extended thinking mode enabled for complex reasoning',
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget', required: true, hidden: false },
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
benchmark: { cbaElo: 1385 }, // claude-3-7-sonnet-20250219-thinking-32k
@@ -107,10 +94,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 64000,
trainingDataCutoff: 'Jul 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
parameterSpecs: [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB,
// Note: Tiered pricing - ≤200K: $3/$15, >200K: $6/$22.50. Using lower tier as base.
chatPrice: { input: 3, output: 15, cache: { cType: 'ant-bp', read: 0.30, write: 3.75, duration: 300 } },
benchmark: { cbaElo: 1438 + 1 }, // FALLBACK-UNTIL-AVAILABLE: claude-opus-4-1-20250805 + 1
@@ -123,10 +107,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 64000,
trainingDataCutoff: 'Jul 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
parameterSpecs: [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 1, output: 5, cache: { cType: 'ant-bp', read: 0.10, write: 1.25, duration: 300 } },
},
@@ -139,10 +120,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 32000,
trainingDataCutoff: 'Mar 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
parameterSpecs: [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
benchmark: { cbaElo: 1438 }, // claude-opus-4-1-20250805
},
@@ -157,10 +135,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 32000,
trainingDataCutoff: 'Mar 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
parameterSpecs: [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
benchmark: { cbaElo: 1411 }, // claude-opus-4-20250514
},
@@ -172,10 +147,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 64000,
trainingDataCutoff: 'Mar 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
parameterSpecs: [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 3, output: 15, cache: { cType: 'ant-bp', read: 0.30, write: 3.75, duration: 300 } },
benchmark: { cbaElo: 1386 }, // claude-sonnet-4-20250514
},
@@ -189,10 +161,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 64000,
trainingDataCutoff: 'Nov 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
parameterSpecs: [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 3, output: 15, cache: { cType: 'ant-bp', read: 0.30, write: 3.75, duration: 300 } },
benchmark: { cbaElo: 1369 }, // claude-3-7-sonnet-20250219
},
@@ -232,10 +201,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 8192,
trainingDataCutoff: 'Jul 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
parameterSpecs: [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
],
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 0.80, output: 4.00, cache: { cType: 'ant-bp', read: 0.08, write: 1.00, duration: 300 } },
benchmark: { cbaElo: 1319, cbaMmlu: 75.2 }, // claude-3-5-haiku-20241022
},