mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
LLMs: Anthropic: support adaptive thinking correctly. #962
This commit is contained in:
@@ -156,8 +156,8 @@ export const DModelParameterRegistry = {
|
||||
type: 'integer',
|
||||
description: 'Budget for extended thinking',
|
||||
range: [1024, 65536] as const,
|
||||
initialValue: 16384,
|
||||
nullable: {
|
||||
initialValue: 16384, // special: '-1' is an out-of-range sentinel for 'adaptive' thinking (hidden, used for 4.6+)
|
||||
nullable: { // null means to not turn on thinking at all, and it's the user-overridden equivalent to the param missing
|
||||
meaning: 'Disable extended thinking',
|
||||
},
|
||||
},
|
||||
|
||||
@@ -137,6 +137,10 @@ export const useModelsStore = create<LlmsStore>()(persist(
|
||||
if (currentValue && typeof currentValue === 'string' && !(regDef.values as readonly string[]).includes(currentValue))
|
||||
delete result.userParameters[paramId]; // reset to default (undefined)
|
||||
}
|
||||
|
||||
// NOTE: no range validation for integer/float types yet. If added, be aware that
|
||||
// llmVndAntThinkingBudget uses initialValue: -1 (out of range [1024, 65536]) as a
|
||||
// sentinel for adaptive thinking mode on hidden params — range checks must skip hidden params.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
|
||||
...(llmTopP !== undefined ? { topP: llmTopP } : {}),
|
||||
...(llmForceNoStream ? { forceNoStream: true } : {}),
|
||||
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}),
|
||||
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget === -1 ? 'adaptive' as const : llmVndAntThinkingBudget } : {}),
|
||||
...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}),
|
||||
...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}),
|
||||
...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
|
||||
|
||||
@@ -453,7 +453,7 @@ export namespace AixWire_API {
|
||||
vndAnt1MContext: z.boolean().optional(),
|
||||
vndAntEffort: z.enum(['low', 'medium', 'high']).optional(),
|
||||
vndAntSkills: z.string().optional(),
|
||||
vndAntThinkingBudget: z.number().nullable().optional(),
|
||||
vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
|
||||
vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
|
||||
vndAntWebFetch: z.enum(['auto']).optional(),
|
||||
vndAntWebSearch: z.enum(['auto']).optional(),
|
||||
|
||||
@@ -132,11 +132,13 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
|
||||
delete payload.temperature;
|
||||
}
|
||||
|
||||
// [Anthropic] Thinking Budget
|
||||
// [Anthropic] Thinking: adaptive (4.6+), enabled with budget (≤4.5), or disabled
|
||||
const areToolCallsRequired = payload.tool_choice && typeof payload.tool_choice === 'object' && (payload.tool_choice.type === 'any' || payload.tool_choice.type === 'tool');
|
||||
const canUseThinking = !areToolCallsRequired || !hotFixDisableThinkingWhenToolsForced;
|
||||
if (model.vndAntThinkingBudget !== undefined && canUseThinking) {
|
||||
payload.thinking = model.vndAntThinkingBudget !== null ? {
|
||||
payload.thinking = model.vndAntThinkingBudget === 'adaptive' ? {
|
||||
type: 'adaptive',
|
||||
} : model.vndAntThinkingBudget !== null ? {
|
||||
type: 'enabled',
|
||||
budget_tokens: model.vndAntThinkingBudget < payload.max_tokens ? model.vndAntThinkingBudget : payload.max_tokens - 1,
|
||||
} : {
|
||||
|
||||
@@ -233,9 +233,14 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
// Anthropic via OpenRouter
|
||||
if (model.vndAntThinkingBudget !== undefined) {
|
||||
// vndAntThinkingBudget's presence indicates a user preference:
|
||||
// - 'adaptive': adaptive thinking (4.6+) - skip, let effort handle it via OpenRouter
|
||||
// - a number: explicit token budget (1024-32000)
|
||||
// - null: disable thinking (don't set reasoning field)
|
||||
if (model.vndAntThinkingBudget === null) {
|
||||
if (model.vndAntThinkingBudget === 'adaptive') {
|
||||
// Adaptive thinking on OpenRouter: no explicit budget, effort controls depth
|
||||
// TODO: verify if this is a good assumption - new guide:
|
||||
// https://openrouter.ai/docs/guides/guides/model-migrations/claude-4-6-opus#verbosity-vs-reasoning-effort
|
||||
} else if (model.vndAntThinkingBudget === null) {
|
||||
// If null, don't set reasoning field at all (disables thinking)
|
||||
} else
|
||||
payload.reasoning = { max_tokens: model.vndAntThinkingBudget || 8192 };
|
||||
|
||||
@@ -834,11 +834,10 @@ export namespace AnthropicWire_API_Message_Create {
|
||||
* When enabled, responses include thinking content blocks showing Claude's thinking process before the final answer.
|
||||
*/
|
||||
thinking: z.union([
|
||||
// [Anthropic, 4.6+] Adaptive thinking - Claude decides when and how much to think
|
||||
z.object({ type: z.literal('adaptive') }),
|
||||
// Requires a minimum budget of 1,024 tokens and counts towards your max_tokens limit.
|
||||
z.object({
|
||||
type: z.literal('enabled'),
|
||||
budget_tokens: z.number(),
|
||||
}),
|
||||
z.object({ type: z.literal('enabled'), budget_tokens: z.number() }),
|
||||
// having this for completeness, but seems like it's not needed / can be omitted
|
||||
z.object({ type: z.literal('disabled') }),
|
||||
]).optional(),
|
||||
|
||||
@@ -40,7 +40,7 @@ const _hardcodedAnthropicVariants: ModelVariantMap = {
|
||||
label: 'Claude Opus 4.6 (Thinking)',
|
||||
description: 'Claude Opus 4.6 with adaptive thinking mode for the most complex reasoning and agentic workflows',
|
||||
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget' }, { paramId: 'llmVndAntEffort' }, { paramId: 'llmVndAnt1MContext' }],
|
||||
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* adaptive */ }, { paramId: 'llmVndAntEffort' }, { paramId: 'llmVndAnt1MContext' }],
|
||||
// benchmark: { cbaElo: ... }, // TBD
|
||||
maxCompletionTokens: 32000,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user