AIX/LLMs: Bedrock: support Mantle (OpenAI-compatible) including model enumeration. Fixes #965

This commit is contained in:
Enrico Ros
2026-02-25 21:10:54 -08:00
parent a231ccb492
commit 2de42c2010
11 changed files with 267 additions and 98 deletions
+1 -1
View File
@@ -72,7 +72,7 @@ Server-side adapters translate AIX parameters to vendor APIs. Each vendor may in
When a model is loaded:
1. **Model Creation**: `modelDescriptionToDLLM()` creates the DLLM with empty `initialParameters`
2. **Initial Value Application**: `applyModelParameterInitialValues()` populates initial values from:
2. **Initial Value Application**: `applyModelParameterSpecsInitialValues()` populates initial values from:
- Model spec `initialValue` (highest priority)
- Registry `initialValue` (fallback)
3. **Runtime Resolution**: `getAllModelParameterValues()` creates final parameter set:
+12 -1
View File
@@ -251,6 +251,17 @@ export const DModelParameterRegistry = {
// },
// Bedrock-specific
llmVndBedrockAPI: _enumDef({
label: 'Bedrock API',
type: 'enum',
description: 'Bedrock invocation API for this model',
values: ['converse', 'invoke-anthropic', 'mantle'],
// undefined is not accepted when this parameter is used
}),
// Gemini-specific
llmVndGeminiAspectRatio: _enumDef({ // implies: LLM_IF_Outputs_Image
@@ -549,7 +560,7 @@ export function applyModelParameterSpecsInitialValues(destValues: DModelParamete
if ('writeFactoryValue' in registryDef && registryDef.writeFactoryValue !== undefined)
destValues[paramId] = registryDef.writeFactoryValue as DModelParameterValue<typeof paramId>;
} else
console.warn(`applyModelParameterInitialValues: unknown parameter id '${paramId}'`);
console.warn(`applyModelParameterSpecsInitialValues: unknown parameter id '${paramId}'`);
}
}
+2 -2
View File
@@ -144,10 +144,10 @@ export type DModelInterfaceV1 =
| 'oai-chat-vision'
| 'oai-chat-reasoning'
| 'ant-prompt-caching'
| 'gem-code-execution'
| 'oai-prompt-caching'
| 'oai-realtime'
| 'oai-responses'
| 'gem-code-execution'
| 'outputs-audio' // TEMP: ui flag - supports audio output (e.g., text-to-speech)
| 'outputs-image' // TEMP: ui flag - supports image output (image generation)
| 'outputs-no-text' // disable text outputs (used in conjunction with alt-outputs) - assumed off
@@ -174,9 +174,9 @@ export const LLM_IF_Outputs_Image: DModelInterfaceV1 = 'outputs-image';
export const LLM_IF_Outputs_NoText: DModelInterfaceV1 = 'outputs-no-text';
export const LLM_IF_Tools_WebSearch: DModelInterfaceV1 = 'tools-web-search';
export const LLM_IF_ANT_PromptCaching: DModelInterfaceV1 = 'ant-prompt-caching';
export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution';
export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching';
export const LLM_IF_OAI_Responses: DModelInterfaceV1 = 'oai-responses';
export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution';
export const LLM_IF_HOTFIX_NoStream: DModelInterfaceV1 = 'hotfix-no-stream';
export const LLM_IF_HOTFIX_NoTemperature: DModelInterfaceV1 = 'hotfix-no-temperature';
export const LLM_IF_HOTFIX_NoWebP: DModelInterfaceV1 = 'hotfix-no-webp';
+4
View File
@@ -51,6 +51,7 @@ export function aixCreateModelFromLLMOptions(
llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream,
llmVndAntEffort, llmVndGemEffort, llmVndOaiEffort, llmVndMiscEffort,
llmVndAnt1MContext, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch,
llmVndBedrockAPI,
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiThinkingBudget,
// llmVndMoonshotWebSearch,
llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration, llmVndOaiCodeInterpreter,
@@ -117,6 +118,9 @@ export function aixCreateModelFromLLMOptions(
...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}),
// Bedrock
...(llmVndBedrockAPI ? { vndBedrockAPI: llmVndBedrockAPI } : {}),
// Gemini
...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}),
...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}),
+1 -1
View File
@@ -485,7 +485,7 @@ export namespace AixWire_API {
vndAntWebSearch: z.enum(['auto']).optional(),
// Bedrock
vndBedrockInvokeAPI: z.enum(['invoke-anthropic', 'converse']).optional(),
vndBedrockAPI: z.enum(['converse', 'invoke-anthropic', 'mantle']).optional(),
// Gemini
vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(),
@@ -1,6 +1,6 @@
import { ANTHROPIC_API_PATHS, anthropicAccess, anthropicBetaFeatures, AnthropicHeaderOptions } from '~/modules/llms/server/anthropic/anthropic.access';
import { OPENAI_API_PATHS, openAIAccess } from '~/modules/llms/server/openai/openai.access';
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLRuntime } from '~/modules/llms/server/bedrock/bedrock.access';
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLMantle, bedrockURLRuntime } from '~/modules/llms/server/bedrock/bedrock.access';
import { geminiAccess } from '~/modules/llms/server/gemini/gemini.access';
import { ollamaAccess } from '~/modules/llms/server/ollama/ollama.access';
@@ -84,38 +84,61 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
}
case 'bedrock': {
switch (model.vndBedrockAPI) {
// [Bedrock, 2026-02-24] we only support the Invoke API for Anthropic models
const invokeAPI = model.vndBedrockInvokeAPI ?? 'invoke-anthropic';
if (invokeAPI === 'converse')
throw new Error('[Bedrock] Converse API is not yet implemented. Use Anthropic models with the InvokeModel API (invoke-anthropic).');
case 'converse':
// No plans of implementing this yet - throwing below
break;
const region = bedrockResolveRegion(access);
const url = bedrockURLRuntime(region, model.id, streaming);
// [Bedrock Invoke] Anthropic-native InvokeModel API
case 'invoke-anthropic':
const invokeUrl = bedrockURLRuntime(bedrockResolveRegion(access), model.id, streaming);
// body
const bedrockAnthropicBody: Record<string, any> = aixToAnthropicMessageCreate(model, chatGenerate, streaming);
delete bedrockAnthropicBody.model; // model in path
delete bedrockAnthropicBody.stream; // streaming behavior in path
// headers['anthropic-version'] -> body
bedrockAnthropicBody.anthropic_version = 'bedrock-2023-05-31';
// headers['anthropic-beta'] -> body
bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures(
_anthropicBetaOptionsFromModel(model /* note that .id won't match, and it's okay, we don't need per model customizations */, false /* hardcoded */),
);
if (!bedrockAnthropicBody.anthropic_beta?.length)
delete bedrockAnthropicBody.anthropic_beta;
// body
const bedrockAnthropicBody: Record<string, any> = aixToAnthropicMessageCreate(model, chatGenerate, streaming);
delete bedrockAnthropicBody.model; // model in path
delete bedrockAnthropicBody.stream; // streaming behavior in path
// headers['anthropic-version'] -> body
bedrockAnthropicBody.anthropic_version = 'bedrock-2023-05-31';
// headers['anthropic-beta'] -> body
bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures(
_anthropicBetaOptionsFromModel(model /* note that .id won't match, and it's okay, we don't need per model customizations */, false /* hardcoded */),
);
if (!bedrockAnthropicBody.anthropic_beta?.length)
delete bedrockAnthropicBody.anthropic_beta;
return {
request: {
...await bedrockAccessAsync(access, 'POST', url, bedrockAnthropicBody),
method: 'POST',
body: bedrockAnthropicBody,
},
bodyTransform: streaming ? 'aws-eventstream-binary' : null,
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(),
};
return {
request: {
...await bedrockAccessAsync(access, 'POST', invokeUrl, bedrockAnthropicBody),
method: 'POST',
body: bedrockAnthropicBody,
},
bodyTransform: streaming ? 'aws-eventstream-binary' : null,
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(),
};
// [Bedrock Mantle] OpenAI Chat Completions-compatible API for non-Anthropic models
case 'mantle':
const mantleUrl = bedrockURLMantle(bedrockResolveRegion(access), '/v1/chat/completions');
const mantleBody = aixToOpenAIChatCompletions('openai', model, chatGenerate, streaming);
return {
request: {
...await bedrockAccessAsync(access, 'POST', mantleUrl, mantleBody),
method: 'POST',
body: mantleBody,
},
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(),
};
default:
const _exhaustiveCheck: never = model.vndBedrockAPI;
// fallthrough, then throw
case undefined:
break;
}
throw new Error(`Unsupported '${model.vndBedrockAPI}' API.`);
}
case 'gemini':
@@ -463,6 +463,8 @@ const _BEDROCK_ANT_IF_ALLOWLIST: ReadonlySet<string> = new Set([
// NOTE: llmVndAntInfSpeed not available on Bedrock, llmVndAntWebFetch/llmVndAntSkills not available
const _BEDROCK_ANT_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
// bedrock params to not strip
'llmVndBedrockAPI',
// supported
'llmVndAnt1MContext',
'llmVndAntEffort',
@@ -81,15 +81,19 @@ export function bedrockResolveRegion(access: BedrockAccessSchema): string {
// --- URLs ---
export function bedrockURLControlPlane(region: string, path: string): string {
return `https://bedrock.${region}.amazonaws.com${path}`;
}
export function bedrockURLMantle(region: string, path: string): string {
return `https://bedrock-mantle.${region}.api.aws${path}`;
}
export function bedrockURLRuntime(region: string, modelId: string, streaming: boolean): string {
const action = streaming ? 'invoke-with-response-stream' : 'invoke';
return `https://bedrock-runtime.${region}.amazonaws.com/model/${encodeURIComponent(modelId)}/${action}`;
}
export function bedrockURLControlPlane(region: string, path: string): string {
return `https://bedrock.${region}.amazonaws.com${path}`;
}
// --- Bedrock Access (Bearer or async SigV4) ---
+169 -53
View File
@@ -16,6 +16,29 @@ import type { ModelDescriptionSchema } from '../llm.server.types';
import { anthropicInjectVariants, llmBedrockFindAnthropicModel, llmBedrockStripAnthropicMDS } from '../anthropic/anthropic.models';
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { DModelParameterSpecAny } from '~/common/stores/llms/llms.parameters';
// --- Suppression Rules ---
const SKIP_FM_ID_CONTAINS = ['rerank'];
const SKIP_IP_ID_STARTSWITH = ['stability.'];
// Known Mantle-only models (no matching foundation model) — override heuristics with accurate metadata
const KNOWN_MANTLE_ONLY: Record<string, { label: string; ctx: number; out: number; vision?: true }> = {
'deepseek.v3.1': { label: 'DeepSeek V3.1', ctx: 131072, out: 16384 },
'moonshotai.kimi-k2-thinking': { label: 'Kimi K2 Thinking', ctx: 131072, out: 16384 },
'openai.gpt-oss-20b': { label: 'GPT-OSS 20B', ctx: 131072, out: 16384 },
'openai.gpt-oss-120b': { label: 'GPT-OSS 120B', ctx: 131072, out: 16384 },
'qwen.qwen3-32b': { label: 'Qwen3 32B', ctx: 131072, out: 16384 },
'qwen.qwen3-235b-a22b-2507': { label: 'Qwen3 235B A22B', ctx: 131072, out: 16384 },
'qwen.qwen3-coder-30b-a3b-instruct': { label: 'Qwen3 Coder 30B', ctx: 131072, out: 16384 },
'qwen.qwen3-coder-480b-a35b-instruct': { label: 'Qwen3 Coder 480B', ctx: 131072, out: 16384 },
'qwen.qwen3-coder-next': { label: 'Qwen3 Coder Next', ctx: 131072, out: 16384 },
'qwen.qwen3-next-80b-a3b-instruct': { label: 'Qwen3 Next 80B', ctx: 131072, out: 16384 },
'qwen.qwen3-vl-235b-a22b-instruct': { label: 'Qwen3 VL 235B', ctx: 131072, out: 16384, vision: true },
'zai.glm-4.6': { label: 'GLM 4.6', ctx: 131072, out: 16384 },
} as const;
// --- Bedrock API Wire Types ---
@@ -68,6 +91,17 @@ export namespace BedrockWire_API_Models_List {
nextToken: z.string().optional().nullable(),
});
// ListMantleModels response (OpenAI-compatible /v1/models from Bedrock Mantle)
export const MantleModelsResponse_schema = z.object({
data: z.array(z.object({
id: z.string(),
object: z.string().optional(),
created: z.number().optional(),
owned_by: z.string().optional(),
})),
});
}
@@ -101,14 +135,21 @@ function _seemsAnthropicBedrockModel(bedrockModelId: string): boolean {
export function bedrockModelsToDescriptions(
foundationModels: z.infer<typeof BedrockWire_API_Models_List.FoundationModelsResponse_schema>,
inferenceProfiles: z.infer<typeof BedrockWire_API_Models_List.InferenceProfilesResponse_schema>,
mantleModels: z.infer<typeof BedrockWire_API_Models_List.MantleModelsResponse_schema>,
): ModelDescriptionSchema[] {
// Collect unique model IDs from both sources
// Get the IDs for the Mantle models
const mantleModelIds = new Set(mantleModels.data.map(m => m.id));
let remainingMantleModelIds = new Set(mantleModelIds); // to track which Mantle models are not matched to foundation/inference profiles
// Collect unique model definitions from all sources
const modelMap = new Map<string, {
id: string;
label: string;
provider: string;
isInferenceProfile: boolean;
hasMantle: boolean;
isLegacy: boolean;
isProfile: boolean;
streaming: boolean;
converseMaxTokens: number | null;
converseImageTypes: string[]
@@ -116,96 +157,167 @@ export function bedrockModelsToDescriptions(
// Foundation Models
for (const fm of foundationModels.modelSummaries) {
// exclude legacy models
if (fm.modelLifecycle?.status === 'LEGACY') continue;
const baseId = fm.modelId; // e.g. 'google.gemma-3-4b-it', 'moonshotai.kimi-k2.5'
const hasMantle = mantleModelIds.has(baseId);
// excludes embedding, image gen, video gen, speech-only
// exclusion by pattern
if (SKIP_FM_ID_CONTAINS.some(s => baseId.includes(s))) continue;
// excludes non text->text, such as embedding, image gen, video gen, speech-only
if (!fm.inputModalities?.includes('TEXT') || !fm.outputModalities?.includes('TEXT')) continue;
// denylist '..match..'
if (['rerank'].some(match => fm.modelId.includes(match))) continue;
modelMap.set(fm.modelId, {
id: fm.modelId,
modelMap.set(baseId, {
id: baseId,
label: fm.modelName,
provider: fm.providerName,
isInferenceProfile: false,
hasMantle,
isLegacy: fm.modelLifecycle?.status === 'LEGACY',
isProfile: false,
streaming: fm.responseStreamingSupported ?? true,
converseMaxTokens: fm.converse?.maxTokensMaximum ?? null,
converseImageTypes: fm.converse?.userImageTypesSupported ?? [],
});
// mark as used in mantle
if (hasMantle)
remainingMantleModelIds.delete(baseId);
}
// Inference Profiles
// Inference Profiles - important to come AFTER the base models, so we can resolve some attributes, if needed
for (const ip of inferenceProfiles.inferenceProfileSummaries) {
// exclude legacy models
if (ip.status && ip.status !== 'ACTIVE') continue;
// denylist 'start..'
const baseId = _stripRegionPrefix(ip.inferenceProfileId);
if (['stability.'].some(start => baseId.startsWith(start))) continue;
if (SKIP_IP_ID_STARTSWITH.some(s => baseId.startsWith(s))) continue;
const hasMantle = mantleModelIds.has(baseId);
// check if there's a matching foundation model (not anthropic, we map them differently)
const foundationMeta = modelMap.get(baseId);
// if (!_seemsAnthropicBedrockModel(ip.inferenceProfileId) && !foundationMeta)
// console.log('[Bedrock] No matching foundation model for inference profile', ip.inferenceProfileId);
modelMap.set(ip.inferenceProfileId, {
id: ip.inferenceProfileId,
label: ip.inferenceProfileName,
provider: _extractProvider(ip.inferenceProfileId),
isInferenceProfile: true,
hasMantle,
isLegacy: ip.status === 'LEGACY',
isProfile: true,
streaming: foundationMeta?.streaming ?? true,
converseMaxTokens: foundationMeta?.converseMaxTokens ?? null,
converseImageTypes: foundationMeta?.converseImageTypes ?? [],
});
// mark as used in mantle
if (hasMantle)
remainingMantleModelIds.delete(baseId);
}
// Fuse foundationModels + inferenceProfiles into unified ModelDescriptionSchema definitions
// - Anthropic models get enriched with hardcoded metadata, plus 0-day
// - non-anthropic models get basic descriptions based on Bedrock metadata, plus mantle/converse markers
// -> ModelDescriptionSchema[], with Anthropic thinking variants injected inline
const descriptions: ModelDescriptionSchema[] = [];
for (const [modelId, meta] of modelMap) {
const symbolMantle = ''; // '🐘'; 'Ⓜ️'
const bedrockAPIAnthropic = { paramId: 'llmVndBedrockAPI', initialValue: 'invoke-anthropic' } as const satisfies DModelParameterSpecAny;
const bedrockAPIConverse = { paramId: 'llmVndBedrockAPI', initialValue: 'converse' } as const satisfies DModelParameterSpecAny;
const bedrockAPIMantle = { paramId: 'llmVndBedrockAPI', initialValue: 'mantle' } as const satisfies DModelParameterSpecAny;
for (const [modelId, modelMeta] of modelMap) {
if (_seemsAnthropicBedrockModel(modelId)) {
// Known Anthropic models: enrich with hardcoded definitions + inject thinking variants
const antModel = llmBedrockFindAnthropicModel(_stripRegionPrefix(modelId));
if (antModel) {
const isProfile = !!_extractRegionPrefix(modelId);
// Inject variants (returns [variant, base] or [base] if no variant)
const withVariants = anthropicInjectVariants([], antModel);
for (const variant of withVariants) {
const label = isProfile ? _profileLabel(variant.label, modelId) : variant.label;
descriptions.push({ ...variant, id: modelId, label });
// Anthropic models
const antModel = llmBedrockFindAnthropicModel(_stripRegionPrefix(modelId));
// Known Anthropic: enrich with hardcoded definitions + inject thinking variants
if (antModel) {
for (const variant of anthropicInjectVariants([], antModel))
descriptions.push(llmBedrockStripAnthropicMDS({ // Filter to the subset of Anthropic params supported
...variant,
id: modelId,
description: `${variant.description}${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`,
label: `${modelMeta.isLegacy ? '🕰️ ' : '' /*🅰️*/}${!modelMeta.isProfile ? variant.label : _labelFromProfile(variant.label, modelId)}`,
parameterSpecs: [...(variant.parameterSpecs || []), bedrockAPIAnthropic], // NOTE: FILTER MUST ALLOW THIS PARAM TOO!
}));
}
// Unknown Anthropic: 0-day model, not in our hardcoded DB
else {
descriptions.push({
id: modelId,
label: `${modelMeta.isLegacy ? '🕰️ ' : ''}${!modelMeta.isProfile ? modelMeta.label : _labelFromProfile(modelMeta.label, modelId)} [?]`,
description: `${modelMeta.provider} model ${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`,
hidden: modelMeta.isLegacy || modelId.includes('.claude-3-'),
// default assumptions
contextWindow: 200000,
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
parameterSpecs: [bedrockAPIAnthropic],
});
}
continue;
}
// Unknown models - these will NOT be accessible, hence the '🚧'. We show them just in case, but maybe we shall not
const isAnthropic = _seemsAnthropicBedrockModel(modelId);
const hasVision = meta.converseImageTypes.length > 0;
} else {
// Non-Anthropic models - may call them via mantle (if hasMantle)
const hasVision = modelMeta.converseImageTypes.length > 0;
const isMantle = modelMeta.hasMantle;
let label = modelMeta.isProfile ? _labelFromProfile(modelMeta.label, modelId) : modelMeta.label;
descriptions.push({
id: modelId,
label: `${isMantle ? symbolMantle : '🚧 '}${label.startsWith(modelMeta.provider) ? '' : (modelMeta.provider + ' ')}${label}`,
description: `${modelMeta.provider} model via ${isMantle ? 'OpenAI-Compatible' : 'Unsupported'} API ${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`,
contextWindow: modelMeta.converseMaxTokens ?? null,
interfaces: hasVision ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision] : [LLM_IF_OAI_Chat],
parameterSpecs: [isMantle ? bedrockAPIMantle : bedrockAPIConverse],
hidden: !isMantle, // only if it runs through mantle
});
}
}
// -> Add remaining Mantle-only models (not matched to any FM/IP)
for (const mantleId of remainingMantleModelIds) {
const known = KNOWN_MANTLE_ONLY[mantleId];
const provider = _extractMantleProvider(mantleId);
const interfaces = [LLM_IF_OAI_Chat];
if (known?.vision) interfaces.push(LLM_IF_OAI_Vision);
descriptions.push({
id: modelId,
label: '🚧 ' + (meta.isInferenceProfile ? _profileLabel(meta.label, modelId) : meta.label),
description: `${meta.provider} model on AWS Bedrock${isAnthropic ? '' : ' (Converse API)'}`,
contextWindow: isAnthropic ? 200000 : (meta.converseMaxTokens ? meta.converseMaxTokens * 2 : 32768),
maxCompletionTokens: isAnthropic ? 64000 : (meta.converseMaxTokens ?? 4096),
interfaces:
isAnthropic ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching]
: hasVision ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision]
: [LLM_IF_OAI_Chat],
hidden: true, // not in our known models DB — hide until verified usable
id: mantleId,
label: `${symbolMantle}${known?.label ?? labelForMantle(mantleId, provider)}${known ? '' : ' [?]'}`,
description: `${provider} model via OpenAI-Compatible API on AWS Bedrock Mantle`,
contextWindow: known?.ctx ?? 131072,
maxCompletionTokens: known?.out ?? 16384,
interfaces,
parameterSpecs: [bedrockAPIMantle],
hidden: true, // we know it can run, but we don't have models details
});
}
// Filter interfaces and params to Bedrock-supported subset, then sort
const filtered = descriptions.map(llmBedrockStripAnthropicMDS);
filtered.sort(_bedrockModelSort);
return filtered;
return descriptions.sort(_bedrockModelSort);
}
// --- Helpers ---
// Extract provider name from Mantle model ID (e.g., 'mistral.model-name' -> 'Mistral')
function _extractMantleProvider(modelId: string): string {
const parts = modelId.split('.');
return !parts[0] ? 'Unknown' : parts[0].charAt(0).toUpperCase() + parts[0].slice(1);
}
// Build a display label from a Mantle model ID
function labelForMantle(modelId: string, provider: string): string {
const parts = modelId.split('.');
const modelPart = parts.slice(1).join('.') || modelId;
// clean up: remove common suffixes, improve readability
const cleanLabel = modelPart
.replace(/-/g, ' ')
.replace(/\b\w/g, c => c.toUpperCase());
return `${provider} ${cleanLabel}`;
}
/** Build a profile label: strip redundant region prefix from name, append `· Region` suffix (omit for global) */
function _profileLabel(name: string, modelId: string): string {
function _labelFromProfile(name: string, modelId: string): string {
const prefix = _extractRegionPrefix(modelId) ?? 'regional';
// Strip leading "US ", "GLOBAL ", etc. from the AWS-provided name
const cleanName = name.replace(/^(US|EU|GLOBAL|JP|APAC)\s+/i, '');
@@ -223,14 +335,19 @@ function _extractProvider(profileId: string): string {
return parts[0] ? parts[0].charAt(0).toUpperCase() + parts[0].slice(1) : 'Unknown';
}
/** Sort: Anthropic first, then family > class > variant (thinking before plain) > region */
/** Sort: Anthropic first, then non-Anthropic by provider > label */
function _bedrockModelSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
const aIsAnt = _seemsAnthropicBedrockModel(a.id);
const bIsAnt = _seemsAnthropicBedrockModel(b.id);
if (aIsAnt && !bIsAnt) return -1;
if (!aIsAnt && bIsAnt) return 1;
if (aIsAnt !== bIsAnt) return aIsAnt ? -1 : 1;
// Within Anthropic: sort by family precedence
// --- Non-Anthropic: 🚧-prefixed labels last, then provider, then label ---
if (!aIsAnt)
return (a.label.startsWith('🚧') ? 1 : 0) - (b.label.startsWith('🚧') ? 1 : 0)
|| _extractMantleProvider(a.id).localeCompare(_extractMantleProvider(b.id))
|| a.label.localeCompare(b.label);
// --- Anthropic: family > class > variant > region ---
const familyPrecedence = ['-4-7-', '-4-6', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
@@ -245,11 +362,10 @@ function _bedrockModelSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema)
const classB = getClassIdx(b.id);
if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
// Thinking/adaptive variants before plain (idVariant present = variant)
// Thinking/adaptive variants before plain
const aIsVariant = !!a.idVariant;
const bIsVariant = !!b.idVariant;
if (aIsVariant && !bIsVariant) return -1;
if (!aIsVariant && bIsVariant) return 1;
if (aIsVariant !== bIsVariant) return aIsVariant ? -1 : 1;
// Prefer global > us > eu > regional
const prefixOrder = ['global', 'us', 'eu', 'jp', 'apac'];
+14 -7
View File
@@ -16,7 +16,7 @@ import { anthropicInjectVariants, anthropicValidateModelDefs_DEV, AnthropicWire_
import { ANTHROPIC_API_PATHS, anthropicAccess } from './anthropic/anthropic.access';
// protocol: Bedrock
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane } from './bedrock/bedrock.access';
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane, bedrockURLMantle } from './bedrock/bedrock.access';
import { bedrockModelsToDescriptions, BedrockWire_API_Models_List } from './bedrock/bedrock.models';
// protocol: Gemini
@@ -172,35 +172,42 @@ function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal):
const region = bedrockResolveRegion(access);
const fmUrl = bedrockURLControlPlane(region, '/foundation-models?byInferenceType=ON_DEMAND');
const ipUrl = bedrockURLControlPlane(region, '/inference-profiles?typeEquals=SYSTEM_DEFINED&maxResults=1000');
const mantleUrl = bedrockURLMantle(region, '/v1/models');
// sign and fetch both lists in parallel - degrade gracefully if one fails, throw if both fail
const [fmResult, ipResult] = await Promise.allSettled([
// sign and fetch all lists in parallel - each fails independently
const [fmResult, ipResult, mantleIdsResult] = await Promise.allSettled([
// Foundation Models
bedrockAccessAsync(access, 'GET', fmUrl, undefined)
.then(fmAccess => fetchJsonOrTRPCThrow({ ...fmAccess, signal, name: 'Bedrock/FM' })),
// Inference Profiles
bedrockAccessAsync(access, 'GET', ipUrl, undefined)
.then(ipAccess => fetchJsonOrTRPCThrow({ ...ipAccess, signal, name: 'Bedrock/IP' })),
// Mantle Models
bedrockAccessAsync(access, 'GET', mantleUrl, undefined)
.then(mantleAccess => fetchJsonOrTRPCThrow({ ...mantleAccess, signal, name: 'Bedrock/Mantle' })),
]);
// if both failed, throw the first error so the user sees it
// if both FM and IP failed, throw the first error so the user sees it
if (fmResult.status === 'rejected' && ipResult.status === 'rejected')
throw fmResult.reason;
// degrade gracefully if only one failed
// degrade gracefully if any failed
const fmResponse = fmResult.status === 'fulfilled' ? fmResult.value : { modelSummaries: [] };
const ipResponse = ipResult.status === 'fulfilled' ? ipResult.value : { inferenceProfileSummaries: [] };
const mantleResponse = mantleIdsResult.status === 'fulfilled' ? mantleIdsResult.value : { data: [] };
_wire?.logResponse(fmResponse);
_wire?.logResponse(ipResponse);
_wire?.logResponse(mantleResponse);
return {
foundationModels: BedrockWire_API_Models_List.FoundationModelsResponse_schema.parse(fmResponse),
inferenceProfiles: BedrockWire_API_Models_List.InferenceProfilesResponse_schema.parse(ipResponse),
mantleModelIds: BedrockWire_API_Models_List.MantleModelsResponse_schema.parse(mantleResponse),
};
},
convertToDescriptions: ({ foundationModels, inferenceProfiles }) =>
bedrockModelsToDescriptions(foundationModels, inferenceProfiles),
convertToDescriptions: ({ foundationModels, inferenceProfiles, mantleModelIds }) =>
bedrockModelsToDescriptions(foundationModels, inferenceProfiles, mantleModelIds),
});
}
@@ -88,6 +88,8 @@ const ModelParameterSpec_schema = z.object({
'llmVndAntThinkingBudget',
'llmVndAntWebFetch',
'llmVndAntWebSearch',
// Bedrock
'llmVndBedrockAPI',
// Gemini
'llmVndGeminiAspectRatio',
'llmVndGeminiCodeExecution',