LLMs: Anthropic: support adaptive thinking correctly. #962

2026-05-10 21:50:14 -07:00 · 2026-02-06 14:49:36 -08:00
parent 4c7f50ab98
commit 5b9c6a2d0e
8 changed files with 22 additions and 12 deletions
@@ -156,8 +156,8 @@ export const DModelParameterRegistry = {
    type: 'integer',
    description: 'Budget for extended thinking',
    range: [1024, 65536] as const,
-    initialValue: 16384,
-    nullable: {
+    initialValue: 16384, // special: '-1' is an out-of-range sentinel for 'adaptive' thinking (hidden, used for 4.6+)
+    nullable: { // null means to not turn on thinking at all, and it's the user-overridden equivalent to the param missing
      meaning: 'Disable extended thinking',
    },
  },
@@ -137,6 +137,10 @@ export const useModelsStore = create<LlmsStore>()(persist(
                if (currentValue && typeof currentValue === 'string' && !(regDef.values as readonly string[]).includes(currentValue))
                  delete result.userParameters[paramId]; // reset to default (undefined)
              }
+
+              // NOTE: no range validation for integer/float types yet. If added, be aware that
+              // llmVndAntThinkingBudget uses initialValue: -1 (out of range [1024, 65536]) as a
+              // sentinel for adaptive thinking mode on hidden params — range checks must skip hidden params.
            }
          }

@@ -105,7 +105,7 @@ export function aixCreateModelFromLLMOptions(
    ...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
    ...(llmTopP !== undefined ? { topP: llmTopP } : {}),
    ...(llmForceNoStream ? { forceNoStream: true } : {}),
-    ...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}),
+    ...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget === -1 ? 'adaptive' as const : llmVndAntThinkingBudget } : {}),
    ...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}),
    ...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}),
    ...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
@@ -453,7 +453,7 @@ export namespace AixWire_API {
    vndAnt1MContext: z.boolean().optional(),
    vndAntEffort: z.enum(['low', 'medium', 'high']).optional(),
    vndAntSkills: z.string().optional(),
-    vndAntThinkingBudget: z.number().nullable().optional(),
+    vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
    vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
    vndAntWebFetch: z.enum(['auto']).optional(),
    vndAntWebSearch: z.enum(['auto']).optional(),
@@ -132,11 +132,13 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
    delete payload.temperature;
  }

-  // [Anthropic] Thinking Budget
+  // [Anthropic] Thinking: adaptive (4.6+), enabled with budget (≤4.5), or disabled
  const areToolCallsRequired = payload.tool_choice && typeof payload.tool_choice === 'object' && (payload.tool_choice.type === 'any' || payload.tool_choice.type === 'tool');
  const canUseThinking = !areToolCallsRequired || !hotFixDisableThinkingWhenToolsForced;
  if (model.vndAntThinkingBudget !== undefined && canUseThinking) {
-    payload.thinking = model.vndAntThinkingBudget !== null ? {
+    payload.thinking = model.vndAntThinkingBudget === 'adaptive' ? {
+      type: 'adaptive',
+    } : model.vndAntThinkingBudget !== null ? {
      type: 'enabled',
      budget_tokens: model.vndAntThinkingBudget < payload.max_tokens ? model.vndAntThinkingBudget : payload.max_tokens - 1,
    } : {
@@ -233,9 +233,14 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
    // Anthropic via OpenRouter
    if (model.vndAntThinkingBudget !== undefined) {
      // vndAntThinkingBudget's presence indicates a user preference:
+      // - 'adaptive': adaptive thinking (4.6+) - skip, let effort handle it via OpenRouter
      // - a number: explicit token budget (1024-32000)
      // - null: disable thinking (don't set reasoning field)
-      if (model.vndAntThinkingBudget === null) {
+      if (model.vndAntThinkingBudget === 'adaptive') {
+        // Adaptive thinking on OpenRouter: no explicit budget, effort controls depth
+        // TODO: verify if this is a good assumption - new guide:
+        // https://openrouter.ai/docs/guides/guides/model-migrations/claude-4-6-opus#verbosity-vs-reasoning-effort
+      } else if (model.vndAntThinkingBudget === null) {
        // If null, don't set reasoning field at all (disables thinking)
      } else
        payload.reasoning = { max_tokens: model.vndAntThinkingBudget || 8192 };
@@ -834,11 +834,10 @@ export namespace AnthropicWire_API_Message_Create {
     * When enabled, responses include thinking content blocks showing Claude's thinking process before the final answer.
     */
    thinking: z.union([
+      // [Anthropic, 4.6+] Adaptive thinking - Claude decides when and how much to think
+      z.object({ type: z.literal('adaptive') }),
      // Requires a minimum budget of 1,024 tokens and counts towards your max_tokens limit.
-      z.object({
-        type: z.literal('enabled'),
-        budget_tokens: z.number(),
-      }),
+      z.object({ type: z.literal('enabled'), budget_tokens: z.number() }),
      // having this for completeness, but seems like it's not needed / can be omitted
      z.object({ type: z.literal('disabled') }),
    ]).optional(),
@@ -40,7 +40,7 @@ const _hardcodedAnthropicVariants: ModelVariantMap = {
    label: 'Claude Opus 4.6 (Thinking)',
    description: 'Claude Opus 4.6 with adaptive thinking mode for the most complex reasoning and agentic workflows',
    interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
-    parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget' }, { paramId: 'llmVndAntEffort' }, { paramId: 'llmVndAnt1MContext' }],
+    parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* adaptive */ }, { paramId: 'llmVndAntEffort' }, { paramId: 'llmVndAnt1MContext' }],
    // benchmark: { cbaElo: ... }, // TBD
    maxCompletionTokens: 32000,
  },