AIX: Anthropic: Containers support (1h) - allows to continue Skills, bash sessions, dynamic filtering, temp files, etc.

This commit is contained in:
Enrico Ros
2026-04-07 03:05:19 -07:00
parent 14c5c83f91
commit 7929d4eb30
6 changed files with 110 additions and 83 deletions
+22
View File
@@ -172,6 +172,10 @@ interface AixClientOptions {
// LLM parameter configuration layers: full replacement of user params and/or overrides of a set of individual params
llmUserParametersReplacement?: DModelParameterValues; // can replace the 'global' llm user configuration with an alternate config (e.g. persona, or per-chat)
llmOptionsOverride?: Omit<DModelParameterValues, 'llmRef'>; // overrides (sets/replaces) individual LLM parameters
// -- Session State - extract? --
// [Anthropic Container] Container ID from a prior turn (caller checks expiry before setting)
antContainerId?: string;
}
@@ -217,6 +221,20 @@ export async function aixChatGenerateContent_DMessage_FromConversation(
chatSequence: await aixCGR_ChatSequence_FromDMessagesOrThrow(chatHistoryWithoutSystemMessages),
};
// [Anthropic Container] Extract container from the last assistant message (if valid, 15s margin for transit)
if (!clientOptions.antContainerId)
for (let i = chatHistoryWithoutSystemMessages.length - 1; i >= 0; i--) {
const uc = chatHistoryWithoutSystemMessages[i].generator?.upstreamContainer;
if (uc?.uct === 'vnd.ant.container') {
const remainingMs = Date.parse(uc.expiresAt) - Date.now();
if (remainingMs <= 15_000)
console.log(`[DEV] AIX: Anthropic container ${uc.containerId} expired ${Math.round(-remainingMs / 1000)}s ago, not reusing.`);
else
clientOptions = { ...clientOptions, antContainerId: uc.containerId };
break;
}
}
const { outcome, ...resultDMessage } = await aixChatGenerateContent_DMessage_orThrow(
llmId,
aixChatContentGenerateRequest,
@@ -487,6 +505,10 @@ export async function aixChatGenerateContent_DMessage_orThrow<TServiceSettings e
const llmParameters = getAllModelParameterValues(llm.initialParameters, clientOptions?.llmUserParametersReplacement ?? llm.userParameters);
const aixModel = aixCreateModelFromLLMOptions(llm.interfaces, llmParameters, clientOptions?.llmOptionsOverride, llmId);
// [Anthropic Container] Inject session state: Anthropic container from a prior turn (must be unexpired)
if (clientOptions?.antContainerId)
aixModel.vndAntContainerId = clientOptions.antContainerId;
// Client-side late stage model HotFixes
const { shallDisableStreaming } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id);
if (shallDisableStreaming || aixModel.forceNoStream)
@@ -473,6 +473,7 @@ export namespace AixWire_API {
// Anthropic
vndAnt1MContext: z.boolean().optional(),
vndAntContainerId: z.string().optional(),
vndAntInfSpeed: z.enum(['fast']).optional(),
vndAntSkills: z.string().optional(),
vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
@@ -1,5 +1,7 @@
import * as z from 'zod/v4';
import type { AnthropicHostedFeatures } from '~/modules/llms/server/anthropic/anthropic.access';
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../api/aix.wiretypes';
import { AnthropicWire_API_Message_Create, AnthropicWire_Blocks } from '../../wiretypes/anthropic.wiretypes';
@@ -21,7 +23,41 @@ const hotFixAntSeparateContiguousThinkingBlocks = true; // Interleave continuous
type TRequest = AnthropicWire_API_Message_Create.Request;
export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): TRequest {
/**
* Determines which Anthropic hosted features will be active for a request.
* Single source of truth for both the request builder (tools, container) and the dispatch (beta headers).
*/
export function aixAnthropicHostedFeatures(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request): AnthropicHostedFeatures {
// Allow/deny auto-adding hosted tools when custom tools are present with a restrictive policy
const _hasAixCustomTools = chatGenerate.tools?.some(t => t.type === 'function_call');
const _hasAixToolRestrictivePolicy = chatGenerate.toolsPolicy?.type === 'any' || chatGenerate.toolsPolicy?.type === 'function_call';
// Dynamic web tools (20260209) require code execution for programmatic tool calling
const hasDynamicWebTools = model.vndAntWebDynamic === true && (model.vndAntWebSearch === 'auto' || model.vndAntWebFetch === 'auto');
// Programmatic Tool Calling - tools with allowed_callers or input_examples
const programmaticToolCalling = chatGenerate.tools?.some(tool =>
tool.type === 'function_call' && (
tool.function_call.allowed_callers?.includes('code_execution') ||
(tool.function_call.input_examples && tool.function_call.input_examples.length > 0)
),
) ?? false;
return {
disableAllHostedTools: !!(_hasAixCustomTools && _hasAixToolRestrictivePolicy),
enable1MContext: model.vndAnt1MContext === true,
enableCodeExecution: !!model.vndAntSkills || !!model.vndAntContainerId || hasDynamicWebTools || programmaticToolCalling,
enableFastMode: model.vndAntInfSpeed === 'fast',
enableSkills: !!model.vndAntSkills,
enableStrictOutputs: !!model.strictJsonOutput || !!model.strictToolInvocations,
enableToolAdvanced20251120: !!model.vndAntToolSearch || programmaticToolCalling,
modelIdForPerModelFeatures: model.id,
};
}
export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, streaming: boolean, hostedFeatures: ReturnType<typeof aixAnthropicHostedFeatures>): TRequest {
// Pre-process CGR - approximate spill of System to User message
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
@@ -204,15 +240,14 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
if (model.vndAntInfSpeed === 'fast')
payload.speed = 'fast';
// --- Tools ---
// Allow/deny auto-adding hosted tools when custom tools are present
const hasCustomTools = chatGenerate.tools?.some(t => t.type === 'function_call');
const hasRestrictivePolicy = chatGenerate.toolsPolicy?.type === 'any' || chatGenerate.toolsPolicy?.type === 'function_call';
const skipHostedToolsDueToCustomTools = hasCustomTools && hasRestrictivePolicy;
// Hosted capabilities - shared logic with dispatch for beta header correctness
const { disableAllHostedTools, enableCodeExecution } = hostedFeatures;
// Hosted tools
if (!skipHostedToolsDueToCustomTools) {
if (!disableAllHostedTools) {
const hostedTools: NonNullable<TRequest['tools']> = [];
// Web Search Tool - dynamic filtering (20260209) uses internal code execution for better results
@@ -249,38 +284,36 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
name: 'tool_search_tool_bm25',
});
// Code Execution tool - required for dynamic filtering, Skills, etc.
if (enableCodeExecution)
hostedTools.push({ type: 'code_execution_20260120', name: 'code_execution' });
// Merge hosted tools with custom tools
if (hostedTools.length > 0) {
payload.tools = payload.tools ? [...payload.tools, ...hostedTools] : hostedTools;
}
}
// --- Skills Container ---
// --- Container - for code execution (Skills, dynamic filtering, etc.) continuity between calls ---
// Add Skills container if enabled (non-empty string)
if (model.vndAntSkills) {
if (enableCodeExecution) {
// Parse comma-separated string and convert to Anthropic format
const skillIds = model.vndAntSkills.split(',').map((s: string) => s.trim()).filter((s: string) => s);
// Container ID from a previous turn (expiry already checked client-side)
const containerId = model.vndAntContainerId;
if (skillIds.length > 0) {
// request a container with those selected skills
const skillIds = model.vndAntSkills?.split(',').map(s => s.trim()).filter(s => s);
if (skillIds?.length) {
// Reuse or create a container for the skills
payload.container = {
skills: skillIds.map((skillId: string) => ({
type: 'anthropic' as const,
...(containerId ? { id: containerId } : {}),
skills: skillIds.map((skillId: string) => ({
type: 'anthropic',
skill_id: skillId,
version: 'latest',
})),
};
// also require the code_execution tool (required by Skills)
if (!payload.tools?.length)
payload.tools = [];
if (!payload.tools.some(t => t.type === 'code_execution_20260120' /* Beta */ || t.type === 'code_execution_20250825'))
payload.tools.push({ type: 'code_execution_20260120', name: 'code_execution' });
}
} else if (containerId && enableCodeExecution)
payload.container = containerId;
}
@@ -1,4 +1,4 @@
import { ANTHROPIC_API_PATHS, anthropicAccess, anthropicBetaFeatures, AnthropicHeaderOptions } from '~/modules/llms/server/anthropic/anthropic.access';
import { ANTHROPIC_API_PATHS, anthropicAccess, anthropicBetaFeatures } from '~/modules/llms/server/anthropic/anthropic.access';
import { OPENAI_API_PATHS, openAIAccess } from '~/modules/llms/server/openai/openai.access';
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLMantle, bedrockURLRuntime } from '~/modules/llms/server/bedrock/bedrock.access';
import { geminiAccess } from '~/modules/llms/server/gemini/gemini.access';
@@ -9,7 +9,7 @@ import type { AixDemuxers } from '../stream.demuxers';
import { GeminiWire_API_Generate_Content } from '../wiretypes/gemini.wiretypes';
import { aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate';
import { aixAnthropicHostedFeatures, aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate';
import { aixToBedrockConverse } from './adapters/bedrock.converse';
import { aixToGeminiGenerateContent } from './adapters/gemini.generateContent';
import { aixToOpenAIChatCompletions } from './adapters/openai.chatCompletions';
@@ -55,20 +55,10 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
switch (dialect) {
case 'anthropic': {
// [Anthropic, 2025-11-24] Detect if any tool uses Programmatic Tool Calling features (allowed_callers, input_examples)
const usesProgrammaticToolCalling = chatGenerate.tools?.some(tool =>
tool.type === 'function_call' && (
tool.function_call.allowed_callers?.includes('code_execution') ||
(tool.function_call.input_examples && tool.function_call.input_examples.length > 0)
),
) ?? false;
const anthropicRequest = anthropicAccess(access, ANTHROPIC_API_PATHS.messages,
_anthropicBetaOptionsFromModel(model, usesProgrammaticToolCalling),
);
const hostedFeatures = aixAnthropicHostedFeatures(model, chatGenerate);
// Build the request body from model + chat parameters
const anthropicBody = aixToAnthropicMessageCreate(model, chatGenerate, streaming);
const anthropicBody = aixToAnthropicMessageCreate(model, chatGenerate, streaming, hostedFeatures);
// [Anthropic, 2026-02-01] Service-level inference geo routing (e.g. "us")
if (access.anthropicInferenceGeo)
@@ -76,7 +66,7 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
return {
request: {
...anthropicRequest,
...anthropicAccess(access, ANTHROPIC_API_PATHS.messages, hostedFeatures),
method: 'POST',
body: anthropicBody,
},
@@ -109,15 +99,14 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
const invokeUrl = bedrockURLRuntime(bedrockResolveRegion(access), model.id, 'invoke', streaming);
// body
const bedrockAnthropicBody: Record<string, any> = aixToAnthropicMessageCreate(model, chatGenerate, streaming);
const bedrockHostedFeatures = aixAnthropicHostedFeatures(model, chatGenerate);
const bedrockAnthropicBody: Record<string, any> = aixToAnthropicMessageCreate(model, chatGenerate, streaming, bedrockHostedFeatures);
delete bedrockAnthropicBody.model; // model in path
delete bedrockAnthropicBody.stream; // streaming behavior in path
// headers['anthropic-version'] -> body
bedrockAnthropicBody.anthropic_version = 'bedrock-2023-05-31';
// headers['anthropic-beta'] -> body
bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures(
_anthropicBetaOptionsFromModel(model /* note that .id won't match, and it's okay, we don't need per model customizations */, false /* hardcoded */),
);
// headers['anthropic-beta'] -> body (note: model.id won't match PER_MODEL keys, and that's fine)
bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures(bedrockHostedFeatures);
if (!bedrockAnthropicBody.anthropic_beta?.length)
delete bedrockAnthropicBody.anthropic_beta;
@@ -257,21 +246,6 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
}
}
/** Used by both Anthropic direct and Bedrock dispatch paths. */
function _anthropicBetaOptionsFromModel(model: AixAPI_Model, usesProgrammaticToolCalling: boolean): AnthropicHeaderOptions {
return {
modelIdForBetaFeatures: model.id,
vndAntWebFetch: model.vndAntWebFetch === 'auto',
vndAnt1MContext: model.vndAnt1MContext === true,
enableSkills: !!model.vndAntSkills,
enableFastMode: model.vndAntInfSpeed === 'fast',
enableStrictOutputs: !!model.strictJsonOutput || !!model.strictToolInvocations, // [Anthropic, 2025-11-13] for both JSON output and grammar-constrained tool invocations inputs
enableToolSearch: !!model.vndAntToolSearch,
enableProgrammaticToolCalling: usesProgrammaticToolCalling,
// enableCodeExecution: ...
};
}
/**
* Specializes to the correct vendor a request for resuming chat generation (OpenAI Responses API only).
@@ -95,16 +95,15 @@ const PER_MODEL_BETA_FEATURES: { [modelId: string]: string[] } = {
// --- Anthropic Access ---
export type AnthropicHeaderOptions = {
modelIdForBetaFeatures?: string;
vndAnt1MContext?: boolean;
enableSkills?: boolean;
export type AnthropicHostedFeatures = {
disableAllHostedTools?: boolean;
enable1MContext?: boolean;
enableCodeExecution?: boolean;
enableFastMode?: boolean; // [Anthropic, fast-mode-2026-02-01]
enableSkills?: boolean;
enableStrictOutputs?: boolean; // [Anthropic, 2025-11-13] Structured Outputs (JSON outputs & strict tool use)
enableToolSearch?: boolean; // [Anthropic, 2025-11-24] Tool Search Tool
enableProgrammaticToolCalling?: boolean; // [Anthropic, 2025-11-24] Programmatic Tool Calling (allowed_callers, input_examples)
clientSideFetch?: boolean; // whether the request will be made from client-side (browser) - adds CORS header
enableToolAdvanced20251120?: boolean; // [Anthropic, 2025-11-24] Tool Search Tool + Programmatic Tool Calling (umbrella header)
modelIdForPerModelFeatures?: string;
};
export type AnthropicAccessSchema = z.infer<typeof anthropicAccessSchema>;
@@ -117,7 +116,7 @@ export const anthropicAccessSchema = z.object({
anthropicInferenceGeo: z.string().trim().nullable().optional(), // [Anthropic, 2026-02-01] e.g. "us" for US-only inference, optional: for server backward-comp, and can be removed
});
export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHeaderOptions): { headers: HeadersInit, url: string } {
export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHostedFeatures): { headers: HeadersInit, url: string } {
// API key
const anthropicKey = access.anthropicKey || env.ANTHROPIC_API_KEY || '';
@@ -162,22 +161,21 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string,
* Build the list of Anthropic beta feature strings from options.
* Used by both the direct Anthropic path (as header) and Bedrock path (as body field).
*/
export function anthropicBetaFeatures(options?: AnthropicHeaderOptions): string[] {
export function anthropicBetaFeatures(options?: AnthropicHostedFeatures): string[] {
const bf = new Set(DEFAULT_ANTHROPIC_BETA_FEATURES);
// Per-model beta features
if (options?.modelIdForBetaFeatures) {
if (options?.modelIdForPerModelFeatures) {
// string search (.includes) within the keys, to be more resilient to modelId changes/prefixing
for (const [key, value] of Object.entries(PER_MODEL_BETA_FEATURES))
if (key.includes(options.modelIdForBetaFeatures))
if (key.includes(options.modelIdForPerModelFeatures))
value.forEach(f => bf.add(f));
}
// Add beta feature for 1M context window if enabled
if (options?.vndAnt1MContext)
if (options?.enable1MContext)
bf.add('context-1m-2025-08-07');
// Code execution (for dynamic web tools PFC, or Skills) + files API for container downloads
// Note: SDK defines code-execution-2025-05-22; we use 2025-08-25 (newer iteration, not yet in SDK types).
// Code execution may be GA now (most SDK examples skip the beta namespace), but keeping for safety.
@@ -186,27 +184,26 @@ export function anthropicBetaFeatures(options?: AnthropicHeaderOptions): string[
bf.add('files-api-2025-04-14');
}
if (options?.enableSkills) {
bf.add('code-execution-2025-08-25');
bf.add('files-api-2025-04-14');
// [Anthropic, fast-mode-2026-02-01] Fast inference mode
if (options?.enableFastMode)
bf.add('fast-mode-2026-02-01');
// Skills also requires +enableCodeExecution
if (options?.enableSkills)
bf.add('skills-2025-10-02');
}
// [Anthropic, 2025-11-13] Structured Outputs (JSON outputs & strict tool use)
// GA on Claude 4.5+ via output_config.format (which we use). SDK auto-injects structured-outputs-2025-12-15.
// Keeping older header as safety net for pre-4.5 models; harmless on newer ones.
// Bedrock / AWS may still require: https://platform.claude.com/docs/en/build-with-claude/structured-outputs
if (options?.enableStrictOutputs)
bf.add('structured-outputs-2025-11-13');
// [Anthropic, 2025-11-24] Advanced Tool Use (Tool Search Tool, Programmatic Tool Calling)
// Same beta header covers both features: tool discovery and programmatic calling from code execution.
// Note: advanced-tool-use-2025-11-20 is NOT in the SDK AnthropicBeta type union (possibly private/undocumented).
if (options?.enableToolSearch || options?.enableProgrammaticToolCalling)
if (options?.enableToolAdvanced20251120)
bf.add('advanced-tool-use-2025-11-20');
// [Anthropic, fast-mode-2026-02-01] Fast inference mode
if (options?.enableFastMode)
bf.add('fast-mode-2026-02-01');
return [...bf];
}
@@ -6,12 +6,12 @@ import { fetchJsonOrTRPCThrow, fetchResponseOrTRPCThrow } from '~/server/trpc/tr
import { ListModelsResponse_schema } from '../llm.server.types';
import { listModelsRunDispatch } from '../listModels.dispatch';
import { anthropicAccess, anthropicAccessSchema, AnthropicAccessSchema, AnthropicHeaderOptions, ANTHROPIC_API_PATHS } from './anthropic.access';
import { anthropicAccess, anthropicAccessSchema, AnthropicAccessSchema, AnthropicHostedFeatures, ANTHROPIC_API_PATHS } from './anthropic.access';
// Mappers
async function anthropicGETOrThrow<TOut extends object>(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHeaderOptions, signal?: AbortSignal): Promise<TOut> {
async function anthropicGETOrThrow<TOut extends object>(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHostedFeatures, signal?: AbortSignal): Promise<TOut> {
const { headers, url } = anthropicAccess(access, apiPath, options);
return await fetchJsonOrTRPCThrow<TOut>({ url, headers, name: 'Anthropic', signal });
}