diff --git a/src/modules/aix/client/aix.client.ts b/src/modules/aix/client/aix.client.ts index 2319339b5..bd28ef795 100644 --- a/src/modules/aix/client/aix.client.ts +++ b/src/modules/aix/client/aix.client.ts @@ -172,6 +172,10 @@ interface AixClientOptions { // LLM parameter configuration layers: full replacement of user params and/or overrides of a set of individual params llmUserParametersReplacement?: DModelParameterValues; // can replace the 'global' llm user configuration with an alternate config (e.g. persona, or per-chat) llmOptionsOverride?: Omit; // overrides (sets/replaces) individual LLM parameters + + // -- Session State - extract? -- + // [Anthropic Container] Container ID from a prior turn (caller checks expiry before setting) + antContainerId?: string; } @@ -217,6 +221,20 @@ export async function aixChatGenerateContent_DMessage_FromConversation( chatSequence: await aixCGR_ChatSequence_FromDMessagesOrThrow(chatHistoryWithoutSystemMessages), }; + // [Anthropic Container] Extract container from the last assistant message (if valid, 15s margin for transit) + if (!clientOptions.antContainerId) + for (let i = chatHistoryWithoutSystemMessages.length - 1; i >= 0; i--) { + const uc = chatHistoryWithoutSystemMessages[i].generator?.upstreamContainer; + if (uc?.uct === 'vnd.ant.container') { + const remainingMs = Date.parse(uc.expiresAt) - Date.now(); + if (remainingMs <= 15_000) + console.log(`[DEV] AIX: Anthropic container ${uc.containerId} expired ${Math.round(-remainingMs / 1000)}s ago, not reusing.`); + else + clientOptions = { ...clientOptions, antContainerId: uc.containerId }; + break; + } + } + const { outcome, ...resultDMessage } = await aixChatGenerateContent_DMessage_orThrow( llmId, aixChatContentGenerateRequest, @@ -487,6 +505,10 @@ export async function aixChatGenerateContent_DMessage_orThrow t.type === 'function_call'); + const _hasAixToolRestrictivePolicy = chatGenerate.toolsPolicy?.type === 'any' || chatGenerate.toolsPolicy?.type === 'function_call'; + + // Dynamic web tools (20260209) require code execution for programmatic tool calling + const hasDynamicWebTools = model.vndAntWebDynamic === true && (model.vndAntWebSearch === 'auto' || model.vndAntWebFetch === 'auto'); + + // Programmatic Tool Calling - tools with allowed_callers or input_examples + const programmaticToolCalling = chatGenerate.tools?.some(tool => + tool.type === 'function_call' && ( + tool.function_call.allowed_callers?.includes('code_execution') || + (tool.function_call.input_examples && tool.function_call.input_examples.length > 0) + ), + ) ?? false; + + return { + disableAllHostedTools: !!(_hasAixCustomTools && _hasAixToolRestrictivePolicy), + enable1MContext: model.vndAnt1MContext === true, + enableCodeExecution: !!model.vndAntSkills || !!model.vndAntContainerId || hasDynamicWebTools || programmaticToolCalling, + enableFastMode: model.vndAntInfSpeed === 'fast', + enableSkills: !!model.vndAntSkills, + enableStrictOutputs: !!model.strictJsonOutput || !!model.strictToolInvocations, + enableToolAdvanced20251120: !!model.vndAntToolSearch || programmaticToolCalling, + modelIdForPerModelFeatures: model.id, + }; +} + +export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, streaming: boolean, hostedFeatures: ReturnType): TRequest { // Pre-process CGR - approximate spill of System to User message const chatGenerate = aixSpillSystemToUser(_chatGenerate); @@ -204,15 +240,14 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate: if (model.vndAntInfSpeed === 'fast') payload.speed = 'fast'; + // --- Tools --- - // Allow/deny auto-adding hosted tools when custom tools are present - const hasCustomTools = chatGenerate.tools?.some(t => t.type === 'function_call'); - const hasRestrictivePolicy = chatGenerate.toolsPolicy?.type === 'any' || chatGenerate.toolsPolicy?.type === 'function_call'; - const skipHostedToolsDueToCustomTools = hasCustomTools && hasRestrictivePolicy; + // Hosted capabilities - shared logic with dispatch for beta header correctness + const { disableAllHostedTools, enableCodeExecution } = hostedFeatures; // Hosted tools - if (!skipHostedToolsDueToCustomTools) { + if (!disableAllHostedTools) { const hostedTools: NonNullable = []; // Web Search Tool - dynamic filtering (20260209) uses internal code execution for better results @@ -249,38 +284,36 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate: name: 'tool_search_tool_bm25', }); + // Code Execution tool - required for dynamic filtering, Skills, etc. + if (enableCodeExecution) + hostedTools.push({ type: 'code_execution_20260120', name: 'code_execution' }); + // Merge hosted tools with custom tools if (hostedTools.length > 0) { payload.tools = payload.tools ? [...payload.tools, ...hostedTools] : hostedTools; } } - // --- Skills Container --- + // --- Container - for code execution (Skills, dynamic filtering, etc.) continuity between calls --- - // Add Skills container if enabled (non-empty string) - if (model.vndAntSkills) { + if (enableCodeExecution) { - // Parse comma-separated string and convert to Anthropic format - const skillIds = model.vndAntSkills.split(',').map((s: string) => s.trim()).filter((s: string) => s); + // Container ID from a previous turn (expiry already checked client-side) + const containerId = model.vndAntContainerId; - if (skillIds.length > 0) { - - // request a container with those selected skills + const skillIds = model.vndAntSkills?.split(',').map(s => s.trim()).filter(s => s); + if (skillIds?.length) { + // Reuse or create a container for the skills payload.container = { - skills: skillIds.map((skillId: string) => ({ - type: 'anthropic' as const, + ...(containerId ? { id: containerId } : {}), + skills: skillIds.map((skillId: string) => ({ + type: 'anthropic', skill_id: skillId, version: 'latest', })), }; - - // also require the code_execution tool (required by Skills) - if (!payload.tools?.length) - payload.tools = []; - - if (!payload.tools.some(t => t.type === 'code_execution_20260120' /* Beta */ || t.type === 'code_execution_20250825')) - payload.tools.push({ type: 'code_execution_20260120', name: 'code_execution' }); - } + } else if (containerId && enableCodeExecution) + payload.container = containerId; } diff --git a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts index f25af6c71..0404392ec 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts @@ -1,4 +1,4 @@ -import { ANTHROPIC_API_PATHS, anthropicAccess, anthropicBetaFeatures, AnthropicHeaderOptions } from '~/modules/llms/server/anthropic/anthropic.access'; +import { ANTHROPIC_API_PATHS, anthropicAccess, anthropicBetaFeatures } from '~/modules/llms/server/anthropic/anthropic.access'; import { OPENAI_API_PATHS, openAIAccess } from '~/modules/llms/server/openai/openai.access'; import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLMantle, bedrockURLRuntime } from '~/modules/llms/server/bedrock/bedrock.access'; import { geminiAccess } from '~/modules/llms/server/gemini/gemini.access'; @@ -9,7 +9,7 @@ import type { AixDemuxers } from '../stream.demuxers'; import { GeminiWire_API_Generate_Content } from '../wiretypes/gemini.wiretypes'; -import { aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate'; +import { aixAnthropicHostedFeatures, aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate'; import { aixToBedrockConverse } from './adapters/bedrock.converse'; import { aixToGeminiGenerateContent } from './adapters/gemini.generateContent'; import { aixToOpenAIChatCompletions } from './adapters/openai.chatCompletions'; @@ -55,20 +55,10 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A switch (dialect) { case 'anthropic': { - // [Anthropic, 2025-11-24] Detect if any tool uses Programmatic Tool Calling features (allowed_callers, input_examples) - const usesProgrammaticToolCalling = chatGenerate.tools?.some(tool => - tool.type === 'function_call' && ( - tool.function_call.allowed_callers?.includes('code_execution') || - (tool.function_call.input_examples && tool.function_call.input_examples.length > 0) - ), - ) ?? false; - - const anthropicRequest = anthropicAccess(access, ANTHROPIC_API_PATHS.messages, - _anthropicBetaOptionsFromModel(model, usesProgrammaticToolCalling), - ); + const hostedFeatures = aixAnthropicHostedFeatures(model, chatGenerate); // Build the request body from model + chat parameters - const anthropicBody = aixToAnthropicMessageCreate(model, chatGenerate, streaming); + const anthropicBody = aixToAnthropicMessageCreate(model, chatGenerate, streaming, hostedFeatures); // [Anthropic, 2026-02-01] Service-level inference geo routing (e.g. "us") if (access.anthropicInferenceGeo) @@ -76,7 +66,7 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A return { request: { - ...anthropicRequest, + ...anthropicAccess(access, ANTHROPIC_API_PATHS.messages, hostedFeatures), method: 'POST', body: anthropicBody, }, @@ -109,15 +99,14 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A const invokeUrl = bedrockURLRuntime(bedrockResolveRegion(access), model.id, 'invoke', streaming); // body - const bedrockAnthropicBody: Record = aixToAnthropicMessageCreate(model, chatGenerate, streaming); + const bedrockHostedFeatures = aixAnthropicHostedFeatures(model, chatGenerate); + const bedrockAnthropicBody: Record = aixToAnthropicMessageCreate(model, chatGenerate, streaming, bedrockHostedFeatures); delete bedrockAnthropicBody.model; // model in path delete bedrockAnthropicBody.stream; // streaming behavior in path // headers['anthropic-version'] -> body bedrockAnthropicBody.anthropic_version = 'bedrock-2023-05-31'; - // headers['anthropic-beta'] -> body - bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures( - _anthropicBetaOptionsFromModel(model /* note that .id won't match, and it's okay, we don't need per model customizations */, false /* hardcoded */), - ); + // headers['anthropic-beta'] -> body (note: model.id won't match PER_MODEL keys, and that's fine) + bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures(bedrockHostedFeatures); if (!bedrockAnthropicBody.anthropic_beta?.length) delete bedrockAnthropicBody.anthropic_beta; @@ -257,21 +246,6 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A } } -/** Used by both Anthropic direct and Bedrock dispatch paths. */ -function _anthropicBetaOptionsFromModel(model: AixAPI_Model, usesProgrammaticToolCalling: boolean): AnthropicHeaderOptions { - return { - modelIdForBetaFeatures: model.id, - vndAntWebFetch: model.vndAntWebFetch === 'auto', - vndAnt1MContext: model.vndAnt1MContext === true, - enableSkills: !!model.vndAntSkills, - enableFastMode: model.vndAntInfSpeed === 'fast', - enableStrictOutputs: !!model.strictJsonOutput || !!model.strictToolInvocations, // [Anthropic, 2025-11-13] for both JSON output and grammar-constrained tool invocations inputs - enableToolSearch: !!model.vndAntToolSearch, - enableProgrammaticToolCalling: usesProgrammaticToolCalling, - // enableCodeExecution: ... - }; -} - /** * Specializes to the correct vendor a request for resuming chat generation (OpenAI Responses API only). diff --git a/src/modules/llms/server/anthropic/anthropic.access.ts b/src/modules/llms/server/anthropic/anthropic.access.ts index ece2956ce..bdf13adc7 100644 --- a/src/modules/llms/server/anthropic/anthropic.access.ts +++ b/src/modules/llms/server/anthropic/anthropic.access.ts @@ -95,16 +95,15 @@ const PER_MODEL_BETA_FEATURES: { [modelId: string]: string[] } = { // --- Anthropic Access --- -export type AnthropicHeaderOptions = { - modelIdForBetaFeatures?: string; - vndAnt1MContext?: boolean; - enableSkills?: boolean; +export type AnthropicHostedFeatures = { + disableAllHostedTools?: boolean; + enable1MContext?: boolean; enableCodeExecution?: boolean; enableFastMode?: boolean; // [Anthropic, fast-mode-2026-02-01] + enableSkills?: boolean; enableStrictOutputs?: boolean; // [Anthropic, 2025-11-13] Structured Outputs (JSON outputs & strict tool use) - enableToolSearch?: boolean; // [Anthropic, 2025-11-24] Tool Search Tool - enableProgrammaticToolCalling?: boolean; // [Anthropic, 2025-11-24] Programmatic Tool Calling (allowed_callers, input_examples) - clientSideFetch?: boolean; // whether the request will be made from client-side (browser) - adds CORS header + enableToolAdvanced20251120?: boolean; // [Anthropic, 2025-11-24] Tool Search Tool + Programmatic Tool Calling (umbrella header) + modelIdForPerModelFeatures?: string; }; export type AnthropicAccessSchema = z.infer; @@ -117,7 +116,7 @@ export const anthropicAccessSchema = z.object({ anthropicInferenceGeo: z.string().trim().nullable().optional(), // [Anthropic, 2026-02-01] e.g. "us" for US-only inference, optional: for server backward-comp, and can be removed }); -export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHeaderOptions): { headers: HeadersInit, url: string } { +export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHostedFeatures): { headers: HeadersInit, url: string } { // API key const anthropicKey = access.anthropicKey || env.ANTHROPIC_API_KEY || ''; @@ -162,22 +161,21 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string, * Build the list of Anthropic beta feature strings from options. * Used by both the direct Anthropic path (as header) and Bedrock path (as body field). */ -export function anthropicBetaFeatures(options?: AnthropicHeaderOptions): string[] { +export function anthropicBetaFeatures(options?: AnthropicHostedFeatures): string[] { const bf = new Set(DEFAULT_ANTHROPIC_BETA_FEATURES); // Per-model beta features - if (options?.modelIdForBetaFeatures) { + if (options?.modelIdForPerModelFeatures) { // string search (.includes) within the keys, to be more resilient to modelId changes/prefixing for (const [key, value] of Object.entries(PER_MODEL_BETA_FEATURES)) - if (key.includes(options.modelIdForBetaFeatures)) + if (key.includes(options.modelIdForPerModelFeatures)) value.forEach(f => bf.add(f)); } // Add beta feature for 1M context window if enabled - if (options?.vndAnt1MContext) + if (options?.enable1MContext) bf.add('context-1m-2025-08-07'); - // Code execution (for dynamic web tools PFC, or Skills) + files API for container downloads // Note: SDK defines code-execution-2025-05-22; we use 2025-08-25 (newer iteration, not yet in SDK types). // Code execution may be GA now (most SDK examples skip the beta namespace), but keeping for safety. @@ -186,27 +184,26 @@ export function anthropicBetaFeatures(options?: AnthropicHeaderOptions): string[ bf.add('files-api-2025-04-14'); } - if (options?.enableSkills) { - bf.add('code-execution-2025-08-25'); - bf.add('files-api-2025-04-14'); + // [Anthropic, fast-mode-2026-02-01] Fast inference mode + if (options?.enableFastMode) + bf.add('fast-mode-2026-02-01'); + + // Skills also requires +enableCodeExecution + if (options?.enableSkills) bf.add('skills-2025-10-02'); - } // [Anthropic, 2025-11-13] Structured Outputs (JSON outputs & strict tool use) // GA on Claude 4.5+ via output_config.format (which we use). SDK auto-injects structured-outputs-2025-12-15. // Keeping older header as safety net for pre-4.5 models; harmless on newer ones. + // Bedrock / AWS may still require: https://platform.claude.com/docs/en/build-with-claude/structured-outputs if (options?.enableStrictOutputs) bf.add('structured-outputs-2025-11-13'); // [Anthropic, 2025-11-24] Advanced Tool Use (Tool Search Tool, Programmatic Tool Calling) // Same beta header covers both features: tool discovery and programmatic calling from code execution. // Note: advanced-tool-use-2025-11-20 is NOT in the SDK AnthropicBeta type union (possibly private/undocumented). - if (options?.enableToolSearch || options?.enableProgrammaticToolCalling) + if (options?.enableToolAdvanced20251120) bf.add('advanced-tool-use-2025-11-20'); - // [Anthropic, fast-mode-2026-02-01] Fast inference mode - if (options?.enableFastMode) - bf.add('fast-mode-2026-02-01'); - return [...bf]; } diff --git a/src/modules/llms/server/anthropic/anthropic.router.ts b/src/modules/llms/server/anthropic/anthropic.router.ts index 89a29eb87..0ad0ba754 100644 --- a/src/modules/llms/server/anthropic/anthropic.router.ts +++ b/src/modules/llms/server/anthropic/anthropic.router.ts @@ -6,12 +6,12 @@ import { fetchJsonOrTRPCThrow, fetchResponseOrTRPCThrow } from '~/server/trpc/tr import { ListModelsResponse_schema } from '../llm.server.types'; import { listModelsRunDispatch } from '../listModels.dispatch'; -import { anthropicAccess, anthropicAccessSchema, AnthropicAccessSchema, AnthropicHeaderOptions, ANTHROPIC_API_PATHS } from './anthropic.access'; +import { anthropicAccess, anthropicAccessSchema, AnthropicAccessSchema, AnthropicHostedFeatures, ANTHROPIC_API_PATHS } from './anthropic.access'; // Mappers -async function anthropicGETOrThrow(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHeaderOptions, signal?: AbortSignal): Promise { +async function anthropicGETOrThrow(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHostedFeatures, signal?: AbortSignal): Promise { const { headers, url } = anthropicAccess(access, apiPath, options); return await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal }); }