LLMs: Anthropic: Fast mode research preview

This commit is contained in:
Enrico Ros
2026-02-10 13:22:47 -08:00
parent 3b13580613
commit 4912a03250
10 changed files with 48 additions and 5 deletions
@@ -159,6 +159,15 @@ export const DModelParameterRegistry = {
// No initialValue - undefined means high effort (default, equivalent to omitting the parameter)
}),
llmVndAntInfSpeed: _enumDef({
label: 'Fast Mode',
type: 'enum',
description: 'Accelerated inference (~2.5x faster output) at 6x pricing. Preview access required.',
values: ['fast'],
enumPriceMultiplier: { fast: 6 },
// No initialValue - undefined means standard speed (omitted from request)
}),
llmVndAntSkills: {
label: 'Document Skills',
type: 'string',
+2 -1
View File
@@ -50,7 +50,7 @@ export function aixCreateModelFromLLMOptions(
// destructure input with the overrides
const {
llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream,
llmVndAnt1MContext, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort, llmVndAntEffortMax,
llmVndAnt1MContext, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort, llmVndAntEffortMax,
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel, llmVndGeminiThinkingLevel4,
llmVndMoonReasoningEffort, // -> mapped to vndOaiReasoningEffort below
// llmVndMoonshotWebSearch,
@@ -108,6 +108,7 @@ export function aixCreateModelFromLLMOptions(
...(llmForceNoStream ? { forceNoStream: true } : {}),
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget === -1 ? 'adaptive' as const : llmVndAntThinkingBudget } : {}),
...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}),
...(llmVndAntInfSpeed === 'fast' ? { vndAntInfSpeed: 'fast' } : {}),
...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}),
...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}),
@@ -452,6 +452,7 @@ export namespace AixWire_API {
// Anthropic
vndAnt1MContext: z.boolean().optional(),
vndAntEffort: z.enum(['low', 'medium', 'high', 'max']).optional(),
vndAntInfSpeed: z.enum(['fast']).optional(),
vndAntSkills: z.string().optional(),
vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
@@ -193,6 +193,10 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
console.warn('[Anthropic] Structured output_config.format may conflict with web_fetch citations');
}
// [Anthropic, fast-mode-2026-02-01] Fast inference mode (preview/waitlist)
if (model.vndAntInfSpeed === 'fast')
payload.speed = 'fast';
// --- Tools ---
// Allow/deny auto-adding hosted tools when custom tools are present
@@ -65,6 +65,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
vndAnt1MContext: model.vndAnt1MContext === true,
vndAntEffort: !!model.vndAntEffort,
enableSkills: !!model.vndAntSkills,
enableFastMode: model.vndAntInfSpeed === 'fast',
enableStrictOutputs: !!model.strictJsonOutput || !!model.strictToolInvocations, // [Anthropic, 2025-11-13] for both JSON output and grammar-constrained tool invocations inputs
enableToolSearch: !!model.vndAntToolSearch,
enableProgrammaticToolCalling: usesProgrammaticToolCalling,
@@ -896,6 +896,12 @@ export namespace AnthropicWire_API_Message_Create {
* */
top_p: z.number().optional(),
/**
* [Anthropic, fast-mode-2026-02-01] Accelerated inference mode.
* Preview/waitlist. Only supported on Claude Opus 4.6.
*/
speed: z.enum(['fast']).optional(),
/**
* [Anthropic, 2026-02-01] Geographic region for model inference.
* - "global": default, inference may run in any available geography
@@ -250,6 +250,7 @@ export function LLMParametersEditor(props: {
llmVndAnt1MContext,
llmVndAntEffort,
llmVndAntEffortMax,
llmVndAntInfSpeed,
llmVndAntSkills,
llmVndAntThinkingBudget,
llmVndAntWebFetch,
@@ -467,6 +468,20 @@ export function LLMParametersEditor(props: {
/>
)}
{/* Anthropic Fast Mode - currently hidden via parameterSpec.hidden */}
{showParam('llmVndAntInfSpeed') && (
<FormSwitchControl
title='Fast Mode (Preview)'
description={llmVndAntInfSpeed === 'fast' ? 'Fast - 6x pricing ⚠️' : 'Standard (default)'}
tooltip='Accelerated inference (~2.5x faster output) at 6x pricing. Preview access required.'
checked={llmVndAntInfSpeed === 'fast'}
onChange={(checked) => {
if (!checked) onRemoveParameter('llmVndAntInfSpeed');
else onChangeParameter({ llmVndAntInfSpeed: 'fast' });
}}
/>
)}
{isExtra && showParam('llmVndAntSkills') && (
<AnthropicSkillsConfig llmVndAntSkills={llmVndAntSkills} onChangeParameter={onChangeParameter} onRemoveParameter={onRemoveParameter} />
)}
@@ -85,6 +85,7 @@ export type AnthropicHeaderOptions = {
vndAntEffort?: boolean; // [Anthropic, effort-2025-11-24]
enableSkills?: boolean;
enableCodeExecution?: boolean;
enableFastMode?: boolean; // [Anthropic, fast-mode-2026-02-01]
enableStrictOutputs?: boolean; // [Anthropic, 2025-11-13] Structured Outputs (JSON outputs & strict tool use)
enableToolSearch?: boolean; // [Anthropic, 2025-11-24] Tool Search Tool
enableProgrammaticToolCalling?: boolean; // [Anthropic, 2025-11-24] Programmatic Tool Calling (allowed_callers, input_examples)
@@ -165,9 +166,12 @@ function _anthropicHeaders(options?: AnthropicHeaderOptions): Record<string, str
}
// Add beta feature for code execution (required for Skills)
if (options?.enableCodeExecution || options?.enableSkills) {
if (options?.enableCodeExecution || options?.enableSkills)
betaFeatures.push('code-execution-2025-08-25');
}
// [Anthropic, fast-mode-2026-02-01] Fast inference mode
if (options?.enableFastMode)
betaFeatures.push('fast-mode-2026-02-01');
// [Anthropic, 2025-11-24] Add beta feature for effort parameter (Claude Opus 4.5+)
if (options?.vndAntEffort)
@@ -41,7 +41,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
label: 'Claude Opus 4.6 (Adaptive)',
description: 'Claude Opus 4.6 with adaptive thinking mode for the most complex reasoning and agentic workflows',
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* adaptive */ }, { paramId: 'llmVndAntEffortMax' }, { paramId: 'llmVndAnt1MContext' }],
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* adaptive */ }, { paramId: 'llmVndAntEffortMax' }, { paramId: 'llmVndAnt1MContext' }, { paramId: 'llmVndAntInfSpeed' }],
// benchmark: { cbaElo: ... }, // TBD
},
@@ -136,7 +136,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
contextWindow: 200000,
maxCompletionTokens: 128000,
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntEffortMax' }, { paramId: 'llmVndAnt1MContext' }],
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntEffortMax' }, { paramId: 'llmVndAnt1MContext' }, { paramId: 'llmVndAntInfSpeed' }],
// Note: Tiered pricing - ≤200K: $5/$25, >200K: $10/$37.50 (with 1M context enabled)
// Cache pricing also tiered: write 1.25× input, read 0.10× input
chatPrice: {
@@ -359,6 +359,7 @@ export function llmsAntCreatePlaceholderModel(model: AnthropicWire_API_Models_Li
const _ORT_ANT_IF_ALLOWLIST: ReadonlySet<string> = new Set([
LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning,
] as const);
// NOTE: llmVndAntInfSpeed intentionally NOT included - fast mode not available through OpenRouter
const _ORT_ANT_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
'llmVndAntEffort', 'llmVndAntEffortMax',
'llmVndAntThinkingBudget',
@@ -80,6 +80,7 @@ const ModelParameterSpec_schema = z.object({
'llmVndAnt1MContext',
'llmVndAntEffort',
'llmVndAntEffortMax',
'llmVndAntInfSpeed',
'llmVndAntSkills',
'llmVndAntThinkingBudget',
'llmVndAntWebFetch',