From 5b9c6a2d0efb240dc188e43d32d4da1e008a2dfb Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Fri, 6 Feb 2026 14:49:36 -0800 Subject: [PATCH] LLMs: Anthropic: support adaptive thinking correctly. #962 --- src/common/stores/llms/llms.parameters.ts | 4 ++-- src/common/stores/llms/store-llms.ts | 4 ++++ src/modules/aix/client/aix.client.ts | 2 +- src/modules/aix/server/api/aix.wiretypes.ts | 2 +- .../chatGenerate/adapters/anthropic.messageCreate.ts | 6 ++++-- .../chatGenerate/adapters/openai.chatCompletions.ts | 7 ++++++- .../aix/server/dispatch/wiretypes/anthropic.wiretypes.ts | 7 +++---- src/modules/llms/server/anthropic/anthropic.models.ts | 2 +- 8 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/common/stores/llms/llms.parameters.ts b/src/common/stores/llms/llms.parameters.ts index 42261dd7d..cb6484b1d 100644 --- a/src/common/stores/llms/llms.parameters.ts +++ b/src/common/stores/llms/llms.parameters.ts @@ -156,8 +156,8 @@ export const DModelParameterRegistry = { type: 'integer', description: 'Budget for extended thinking', range: [1024, 65536] as const, - initialValue: 16384, - nullable: { + initialValue: 16384, // special: '-1' is an out-of-range sentinel for 'adaptive' thinking (hidden, used for 4.6+) + nullable: { // null means to not turn on thinking at all, and it's the user-overridden equivalent to the param missing meaning: 'Disable extended thinking', }, }, diff --git a/src/common/stores/llms/store-llms.ts b/src/common/stores/llms/store-llms.ts index eb9f1717a..f2eaeb7d2 100644 --- a/src/common/stores/llms/store-llms.ts +++ b/src/common/stores/llms/store-llms.ts @@ -137,6 +137,10 @@ export const useModelsStore = create()(persist( if (currentValue && typeof currentValue === 'string' && !(regDef.values as readonly string[]).includes(currentValue)) delete result.userParameters[paramId]; // reset to default (undefined) } + + // NOTE: no range validation for integer/float types yet. If added, be aware that + // llmVndAntThinkingBudget uses initialValue: -1 (out of range [1024, 65536]) as a + // sentinel for adaptive thinking mode on hidden params — range checks must skip hidden params. } } diff --git a/src/modules/aix/client/aix.client.ts b/src/modules/aix/client/aix.client.ts index 3c741a5e4..c3bfdd800 100644 --- a/src/modules/aix/client/aix.client.ts +++ b/src/modules/aix/client/aix.client.ts @@ -105,7 +105,7 @@ export function aixCreateModelFromLLMOptions( ...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}), ...(llmTopP !== undefined ? { topP: llmTopP } : {}), ...(llmForceNoStream ? { forceNoStream: true } : {}), - ...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}), + ...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget === -1 ? 'adaptive' as const : llmVndAntThinkingBudget } : {}), ...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}), ...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}), ...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}), diff --git a/src/modules/aix/server/api/aix.wiretypes.ts b/src/modules/aix/server/api/aix.wiretypes.ts index ea2e7e51f..accff9d6a 100644 --- a/src/modules/aix/server/api/aix.wiretypes.ts +++ b/src/modules/aix/server/api/aix.wiretypes.ts @@ -453,7 +453,7 @@ export namespace AixWire_API { vndAnt1MContext: z.boolean().optional(), vndAntEffort: z.enum(['low', 'medium', 'high']).optional(), vndAntSkills: z.string().optional(), - vndAntThinkingBudget: z.number().nullable().optional(), + vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(), vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant vndAntWebFetch: z.enum(['auto']).optional(), vndAntWebSearch: z.enum(['auto']).optional(), diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts index ba99b0c76..e1c5348bf 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts @@ -132,11 +132,13 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate: delete payload.temperature; } - // [Anthropic] Thinking Budget + // [Anthropic] Thinking: adaptive (4.6+), enabled with budget (≤4.5), or disabled const areToolCallsRequired = payload.tool_choice && typeof payload.tool_choice === 'object' && (payload.tool_choice.type === 'any' || payload.tool_choice.type === 'tool'); const canUseThinking = !areToolCallsRequired || !hotFixDisableThinkingWhenToolsForced; if (model.vndAntThinkingBudget !== undefined && canUseThinking) { - payload.thinking = model.vndAntThinkingBudget !== null ? { + payload.thinking = model.vndAntThinkingBudget === 'adaptive' ? { + type: 'adaptive', + } : model.vndAntThinkingBudget !== null ? { type: 'enabled', budget_tokens: model.vndAntThinkingBudget < payload.max_tokens ? model.vndAntThinkingBudget : payload.max_tokens - 1, } : { diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts index e72f35d84..b28c3ec27 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts @@ -233,9 +233,14 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: // Anthropic via OpenRouter if (model.vndAntThinkingBudget !== undefined) { // vndAntThinkingBudget's presence indicates a user preference: + // - 'adaptive': adaptive thinking (4.6+) - skip, let effort handle it via OpenRouter // - a number: explicit token budget (1024-32000) // - null: disable thinking (don't set reasoning field) - if (model.vndAntThinkingBudget === null) { + if (model.vndAntThinkingBudget === 'adaptive') { + // Adaptive thinking on OpenRouter: no explicit budget, effort controls depth + // TODO: verify if this is a good assumption - new guide: + // https://openrouter.ai/docs/guides/guides/model-migrations/claude-4-6-opus#verbosity-vs-reasoning-effort + } else if (model.vndAntThinkingBudget === null) { // If null, don't set reasoning field at all (disables thinking) } else payload.reasoning = { max_tokens: model.vndAntThinkingBudget || 8192 }; diff --git a/src/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes.ts index a44cf213e..c48eef7a4 100644 --- a/src/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes.ts @@ -834,11 +834,10 @@ export namespace AnthropicWire_API_Message_Create { * When enabled, responses include thinking content blocks showing Claude's thinking process before the final answer. */ thinking: z.union([ + // [Anthropic, 4.6+] Adaptive thinking - Claude decides when and how much to think + z.object({ type: z.literal('adaptive') }), // Requires a minimum budget of 1,024 tokens and counts towards your max_tokens limit. - z.object({ - type: z.literal('enabled'), - budget_tokens: z.number(), - }), + z.object({ type: z.literal('enabled'), budget_tokens: z.number() }), // having this for completeness, but seems like it's not needed / can be omitted z.object({ type: z.literal('disabled') }), ]).optional(), diff --git a/src/modules/llms/server/anthropic/anthropic.models.ts b/src/modules/llms/server/anthropic/anthropic.models.ts index 5ac5dfa41..677a24bb4 100644 --- a/src/modules/llms/server/anthropic/anthropic.models.ts +++ b/src/modules/llms/server/anthropic/anthropic.models.ts @@ -40,7 +40,7 @@ const _hardcodedAnthropicVariants: ModelVariantMap = { label: 'Claude Opus 4.6 (Thinking)', description: 'Claude Opus 4.6 with adaptive thinking mode for the most complex reasoning and agentic workflows', interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch], - parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget' }, { paramId: 'llmVndAntEffort' }, { paramId: 'llmVndAnt1MContext' }], + parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* adaptive */ }, { paramId: 'llmVndAntEffort' }, { paramId: 'llmVndAnt1MContext' }], // benchmark: { cbaElo: ... }, // TBD maxCompletionTokens: 32000, },