mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
LLMs: Anthropic: Fast mode research preview
This commit is contained in:
@@ -159,6 +159,15 @@ export const DModelParameterRegistry = {
|
||||
// No initialValue - undefined means high effort (default, equivalent to omitting the parameter)
|
||||
}),
|
||||
|
||||
llmVndAntInfSpeed: _enumDef({
|
||||
label: 'Fast Mode',
|
||||
type: 'enum',
|
||||
description: 'Accelerated inference (~2.5x faster output) at 6x pricing. Preview access required.',
|
||||
values: ['fast'],
|
||||
enumPriceMultiplier: { fast: 6 },
|
||||
// No initialValue - undefined means standard speed (omitted from request)
|
||||
}),
|
||||
|
||||
llmVndAntSkills: {
|
||||
label: 'Document Skills',
|
||||
type: 'string',
|
||||
|
||||
@@ -50,7 +50,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
// destructure input with the overrides
|
||||
const {
|
||||
llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream,
|
||||
llmVndAnt1MContext, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort, llmVndAntEffortMax,
|
||||
llmVndAnt1MContext, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort, llmVndAntEffortMax,
|
||||
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel, llmVndGeminiThinkingLevel4,
|
||||
llmVndMoonReasoningEffort, // -> mapped to vndOaiReasoningEffort below
|
||||
// llmVndMoonshotWebSearch,
|
||||
@@ -108,6 +108,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(llmForceNoStream ? { forceNoStream: true } : {}),
|
||||
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget === -1 ? 'adaptive' as const : llmVndAntThinkingBudget } : {}),
|
||||
...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}),
|
||||
...(llmVndAntInfSpeed === 'fast' ? { vndAntInfSpeed: 'fast' } : {}),
|
||||
...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}),
|
||||
...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
|
||||
...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}),
|
||||
|
||||
@@ -452,6 +452,7 @@ export namespace AixWire_API {
|
||||
// Anthropic
|
||||
vndAnt1MContext: z.boolean().optional(),
|
||||
vndAntEffort: z.enum(['low', 'medium', 'high', 'max']).optional(),
|
||||
vndAntInfSpeed: z.enum(['fast']).optional(),
|
||||
vndAntSkills: z.string().optional(),
|
||||
vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
|
||||
vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
|
||||
|
||||
@@ -193,6 +193,10 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
|
||||
console.warn('[Anthropic] Structured output_config.format may conflict with web_fetch citations');
|
||||
}
|
||||
|
||||
// [Anthropic, fast-mode-2026-02-01] Fast inference mode (preview/waitlist)
|
||||
if (model.vndAntInfSpeed === 'fast')
|
||||
payload.speed = 'fast';
|
||||
|
||||
// --- Tools ---
|
||||
|
||||
// Allow/deny auto-adding hosted tools when custom tools are present
|
||||
|
||||
@@ -65,6 +65,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
|
||||
vndAnt1MContext: model.vndAnt1MContext === true,
|
||||
vndAntEffort: !!model.vndAntEffort,
|
||||
enableSkills: !!model.vndAntSkills,
|
||||
enableFastMode: model.vndAntInfSpeed === 'fast',
|
||||
enableStrictOutputs: !!model.strictJsonOutput || !!model.strictToolInvocations, // [Anthropic, 2025-11-13] for both JSON output and grammar-constrained tool invocations inputs
|
||||
enableToolSearch: !!model.vndAntToolSearch,
|
||||
enableProgrammaticToolCalling: usesProgrammaticToolCalling,
|
||||
|
||||
@@ -896,6 +896,12 @@ export namespace AnthropicWire_API_Message_Create {
|
||||
* */
|
||||
top_p: z.number().optional(),
|
||||
|
||||
/**
|
||||
* [Anthropic, fast-mode-2026-02-01] Accelerated inference mode.
|
||||
* Preview/waitlist. Only supported on Claude Opus 4.6.
|
||||
*/
|
||||
speed: z.enum(['fast']).optional(),
|
||||
|
||||
/**
|
||||
* [Anthropic, 2026-02-01] Geographic region for model inference.
|
||||
* - "global": default, inference may run in any available geography
|
||||
|
||||
@@ -250,6 +250,7 @@ export function LLMParametersEditor(props: {
|
||||
llmVndAnt1MContext,
|
||||
llmVndAntEffort,
|
||||
llmVndAntEffortMax,
|
||||
llmVndAntInfSpeed,
|
||||
llmVndAntSkills,
|
||||
llmVndAntThinkingBudget,
|
||||
llmVndAntWebFetch,
|
||||
@@ -467,6 +468,20 @@ export function LLMParametersEditor(props: {
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Anthropic Fast Mode - currently hidden via parameterSpec.hidden */}
|
||||
{showParam('llmVndAntInfSpeed') && (
|
||||
<FormSwitchControl
|
||||
title='Fast Mode (Preview)'
|
||||
description={llmVndAntInfSpeed === 'fast' ? 'Fast - 6x pricing ⚠️' : 'Standard (default)'}
|
||||
tooltip='Accelerated inference (~2.5x faster output) at 6x pricing. Preview access required.'
|
||||
checked={llmVndAntInfSpeed === 'fast'}
|
||||
onChange={(checked) => {
|
||||
if (!checked) onRemoveParameter('llmVndAntInfSpeed');
|
||||
else onChangeParameter({ llmVndAntInfSpeed: 'fast' });
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
||||
{isExtra && showParam('llmVndAntSkills') && (
|
||||
<AnthropicSkillsConfig llmVndAntSkills={llmVndAntSkills} onChangeParameter={onChangeParameter} onRemoveParameter={onRemoveParameter} />
|
||||
)}
|
||||
|
||||
@@ -85,6 +85,7 @@ export type AnthropicHeaderOptions = {
|
||||
vndAntEffort?: boolean; // [Anthropic, effort-2025-11-24]
|
||||
enableSkills?: boolean;
|
||||
enableCodeExecution?: boolean;
|
||||
enableFastMode?: boolean; // [Anthropic, fast-mode-2026-02-01]
|
||||
enableStrictOutputs?: boolean; // [Anthropic, 2025-11-13] Structured Outputs (JSON outputs & strict tool use)
|
||||
enableToolSearch?: boolean; // [Anthropic, 2025-11-24] Tool Search Tool
|
||||
enableProgrammaticToolCalling?: boolean; // [Anthropic, 2025-11-24] Programmatic Tool Calling (allowed_callers, input_examples)
|
||||
@@ -165,9 +166,12 @@ function _anthropicHeaders(options?: AnthropicHeaderOptions): Record<string, str
|
||||
}
|
||||
|
||||
// Add beta feature for code execution (required for Skills)
|
||||
if (options?.enableCodeExecution || options?.enableSkills) {
|
||||
if (options?.enableCodeExecution || options?.enableSkills)
|
||||
betaFeatures.push('code-execution-2025-08-25');
|
||||
}
|
||||
|
||||
// [Anthropic, fast-mode-2026-02-01] Fast inference mode
|
||||
if (options?.enableFastMode)
|
||||
betaFeatures.push('fast-mode-2026-02-01');
|
||||
|
||||
// [Anthropic, 2025-11-24] Add beta feature for effort parameter (Claude Opus 4.5+)
|
||||
if (options?.vndAntEffort)
|
||||
|
||||
@@ -41,7 +41,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
label: 'Claude Opus 4.6 (Adaptive)',
|
||||
description: 'Claude Opus 4.6 with adaptive thinking mode for the most complex reasoning and agentic workflows',
|
||||
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* adaptive */ }, { paramId: 'llmVndAntEffortMax' }, { paramId: 'llmVndAnt1MContext' }],
|
||||
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* adaptive */ }, { paramId: 'llmVndAntEffortMax' }, { paramId: 'llmVndAnt1MContext' }, { paramId: 'llmVndAntInfSpeed' }],
|
||||
// benchmark: { cbaElo: ... }, // TBD
|
||||
},
|
||||
|
||||
@@ -136,7 +136,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntEffortMax' }, { paramId: 'llmVndAnt1MContext' }],
|
||||
parameterSpecs: [...ANT_TOOLS, { paramId: 'llmVndAntEffortMax' }, { paramId: 'llmVndAnt1MContext' }, { paramId: 'llmVndAntInfSpeed' }],
|
||||
// Note: Tiered pricing - ≤200K: $5/$25, >200K: $10/$37.50 (with 1M context enabled)
|
||||
// Cache pricing also tiered: write 1.25× input, read 0.10× input
|
||||
chatPrice: {
|
||||
@@ -359,6 +359,7 @@ export function llmsAntCreatePlaceholderModel(model: AnthropicWire_API_Models_Li
|
||||
const _ORT_ANT_IF_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning,
|
||||
] as const);
|
||||
// NOTE: llmVndAntInfSpeed intentionally NOT included - fast mode not available through OpenRouter
|
||||
const _ORT_ANT_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
'llmVndAntEffort', 'llmVndAntEffortMax',
|
||||
'llmVndAntThinkingBudget',
|
||||
|
||||
@@ -80,6 +80,7 @@ const ModelParameterSpec_schema = z.object({
|
||||
'llmVndAnt1MContext',
|
||||
'llmVndAntEffort',
|
||||
'llmVndAntEffortMax',
|
||||
'llmVndAntInfSpeed',
|
||||
'llmVndAntSkills',
|
||||
'llmVndAntThinkingBudget',
|
||||
'llmVndAntWebFetch',
|
||||
|
||||
Reference in New Issue
Block a user