From 2de42c2010fb9d8a97c496bbb9f2a963fa10f65c Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Wed, 25 Feb 2026 21:10:54 -0800 Subject: [PATCH] AIX/LLMs: Bedrock: support Mantle (OpenAI-compatible) including model enumeration. Fixes #965 --- kb/systems/LLM-parameters-system.md | 2 +- src/common/stores/llms/llms.parameters.ts | 13 +- src/common/stores/llms/llms.types.ts | 4 +- src/modules/aix/client/aix.client.ts | 4 + src/modules/aix/server/api/aix.wiretypes.ts | 2 +- .../chatGenerate/chatGenerate.dispatch.ts | 81 ++++--- .../llms/server/anthropic/anthropic.models.ts | 2 + .../llms/server/bedrock/bedrock.access.ts | 12 +- .../llms/server/bedrock/bedrock.models.ts | 222 +++++++++++++----- .../llms/server/listModels.dispatch.ts | 21 +- src/modules/llms/server/llm.server.types.ts | 2 + 11 files changed, 267 insertions(+), 98 deletions(-) diff --git a/kb/systems/LLM-parameters-system.md b/kb/systems/LLM-parameters-system.md index 8a43b3e87..54797e4da 100644 --- a/kb/systems/LLM-parameters-system.md +++ b/kb/systems/LLM-parameters-system.md @@ -72,7 +72,7 @@ Server-side adapters translate AIX parameters to vendor APIs. Each vendor may in When a model is loaded: 1. **Model Creation**: `modelDescriptionToDLLM()` creates the DLLM with empty `initialParameters` -2. **Initial Value Application**: `applyModelParameterInitialValues()` populates initial values from: +2. **Initial Value Application**: `applyModelParameterSpecsInitialValues()` populates initial values from: - Model spec `initialValue` (highest priority) - Registry `initialValue` (fallback) 3. **Runtime Resolution**: `getAllModelParameterValues()` creates final parameter set: diff --git a/src/common/stores/llms/llms.parameters.ts b/src/common/stores/llms/llms.parameters.ts index 1ae47efaf..fa56822f8 100644 --- a/src/common/stores/llms/llms.parameters.ts +++ b/src/common/stores/llms/llms.parameters.ts @@ -251,6 +251,17 @@ export const DModelParameterRegistry = { // }, + // Bedrock-specific + + llmVndBedrockAPI: _enumDef({ + label: 'Bedrock API', + type: 'enum', + description: 'Bedrock invocation API for this model', + values: ['converse', 'invoke-anthropic', 'mantle'], + // undefined is not accepted when this parameter is used + }), + + // Gemini-specific llmVndGeminiAspectRatio: _enumDef({ // implies: LLM_IF_Outputs_Image @@ -549,7 +560,7 @@ export function applyModelParameterSpecsInitialValues(destValues: DModelParamete if ('writeFactoryValue' in registryDef && registryDef.writeFactoryValue !== undefined) destValues[paramId] = registryDef.writeFactoryValue as DModelParameterValue; } else - console.warn(`applyModelParameterInitialValues: unknown parameter id '${paramId}'`); + console.warn(`applyModelParameterSpecsInitialValues: unknown parameter id '${paramId}'`); } } diff --git a/src/common/stores/llms/llms.types.ts b/src/common/stores/llms/llms.types.ts index 399bd1c29..e3d3780ba 100644 --- a/src/common/stores/llms/llms.types.ts +++ b/src/common/stores/llms/llms.types.ts @@ -144,10 +144,10 @@ export type DModelInterfaceV1 = | 'oai-chat-vision' | 'oai-chat-reasoning' | 'ant-prompt-caching' + | 'gem-code-execution' | 'oai-prompt-caching' | 'oai-realtime' | 'oai-responses' - | 'gem-code-execution' | 'outputs-audio' // TEMP: ui flag - supports audio output (e.g., text-to-speech) | 'outputs-image' // TEMP: ui flag - supports image output (image generation) | 'outputs-no-text' // disable text outputs (used in conjunction with alt-outputs) - assumed off @@ -174,9 +174,9 @@ export const LLM_IF_Outputs_Image: DModelInterfaceV1 = 'outputs-image'; export const LLM_IF_Outputs_NoText: DModelInterfaceV1 = 'outputs-no-text'; export const LLM_IF_Tools_WebSearch: DModelInterfaceV1 = 'tools-web-search'; export const LLM_IF_ANT_PromptCaching: DModelInterfaceV1 = 'ant-prompt-caching'; +export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution'; export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching'; export const LLM_IF_OAI_Responses: DModelInterfaceV1 = 'oai-responses'; -export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution'; export const LLM_IF_HOTFIX_NoStream: DModelInterfaceV1 = 'hotfix-no-stream'; export const LLM_IF_HOTFIX_NoTemperature: DModelInterfaceV1 = 'hotfix-no-temperature'; export const LLM_IF_HOTFIX_NoWebP: DModelInterfaceV1 = 'hotfix-no-webp'; diff --git a/src/modules/aix/client/aix.client.ts b/src/modules/aix/client/aix.client.ts index 5c3f26f81..8de2e8541 100644 --- a/src/modules/aix/client/aix.client.ts +++ b/src/modules/aix/client/aix.client.ts @@ -51,6 +51,7 @@ export function aixCreateModelFromLLMOptions( llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream, llmVndAntEffort, llmVndGemEffort, llmVndOaiEffort, llmVndMiscEffort, llmVndAnt1MContext, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, + llmVndBedrockAPI, llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiThinkingBudget, // llmVndMoonshotWebSearch, llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration, llmVndOaiCodeInterpreter, @@ -117,6 +118,9 @@ export function aixCreateModelFromLLMOptions( ...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}), ...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}), + // Bedrock + ...(llmVndBedrockAPI ? { vndBedrockAPI: llmVndBedrockAPI } : {}), + // Gemini ...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}), ...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}), diff --git a/src/modules/aix/server/api/aix.wiretypes.ts b/src/modules/aix/server/api/aix.wiretypes.ts index 12efa9beb..30a6272a0 100644 --- a/src/modules/aix/server/api/aix.wiretypes.ts +++ b/src/modules/aix/server/api/aix.wiretypes.ts @@ -485,7 +485,7 @@ export namespace AixWire_API { vndAntWebSearch: z.enum(['auto']).optional(), // Bedrock - vndBedrockInvokeAPI: z.enum(['invoke-anthropic', 'converse']).optional(), + vndBedrockAPI: z.enum(['converse', 'invoke-anthropic', 'mantle']).optional(), // Gemini vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(), diff --git a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts index c2e7c84dc..5979c7009 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts @@ -1,6 +1,6 @@ import { ANTHROPIC_API_PATHS, anthropicAccess, anthropicBetaFeatures, AnthropicHeaderOptions } from '~/modules/llms/server/anthropic/anthropic.access'; import { OPENAI_API_PATHS, openAIAccess } from '~/modules/llms/server/openai/openai.access'; -import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLRuntime } from '~/modules/llms/server/bedrock/bedrock.access'; +import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLMantle, bedrockURLRuntime } from '~/modules/llms/server/bedrock/bedrock.access'; import { geminiAccess } from '~/modules/llms/server/gemini/gemini.access'; import { ollamaAccess } from '~/modules/llms/server/ollama/ollama.access'; @@ -84,38 +84,61 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A } case 'bedrock': { + switch (model.vndBedrockAPI) { - // [Bedrock, 2026-02-24] we only support the Invoke API for Anthropic models - const invokeAPI = model.vndBedrockInvokeAPI ?? 'invoke-anthropic'; - if (invokeAPI === 'converse') - throw new Error('[Bedrock] Converse API is not yet implemented. Use Anthropic models with the InvokeModel API (invoke-anthropic).'); + case 'converse': + // No plans of implementing this yet - throwing below + break; - const region = bedrockResolveRegion(access); - const url = bedrockURLRuntime(region, model.id, streaming); + // [Bedrock Invoke] Anthropic-native InvokeModel API + case 'invoke-anthropic': + const invokeUrl = bedrockURLRuntime(bedrockResolveRegion(access), model.id, streaming); - // body - const bedrockAnthropicBody: Record = aixToAnthropicMessageCreate(model, chatGenerate, streaming); - delete bedrockAnthropicBody.model; // model in path - delete bedrockAnthropicBody.stream; // streaming behavior in path - // headers['anthropic-version'] -> body - bedrockAnthropicBody.anthropic_version = 'bedrock-2023-05-31'; - // headers['anthropic-beta'] -> body - bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures( - _anthropicBetaOptionsFromModel(model /* note that .id won't match, and it's okay, we don't need per model customizations */, false /* hardcoded */), - ); - if (!bedrockAnthropicBody.anthropic_beta?.length) - delete bedrockAnthropicBody.anthropic_beta; + // body + const bedrockAnthropicBody: Record = aixToAnthropicMessageCreate(model, chatGenerate, streaming); + delete bedrockAnthropicBody.model; // model in path + delete bedrockAnthropicBody.stream; // streaming behavior in path + // headers['anthropic-version'] -> body + bedrockAnthropicBody.anthropic_version = 'bedrock-2023-05-31'; + // headers['anthropic-beta'] -> body + bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures( + _anthropicBetaOptionsFromModel(model /* note that .id won't match, and it's okay, we don't need per model customizations */, false /* hardcoded */), + ); + if (!bedrockAnthropicBody.anthropic_beta?.length) + delete bedrockAnthropicBody.anthropic_beta; - return { - request: { - ...await bedrockAccessAsync(access, 'POST', url, bedrockAnthropicBody), - method: 'POST', - body: bedrockAnthropicBody, - }, - bodyTransform: streaming ? 'aws-eventstream-binary' : null, - demuxerFormat: streaming ? 'fast-sse' : null, - chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(), - }; + return { + request: { + ...await bedrockAccessAsync(access, 'POST', invokeUrl, bedrockAnthropicBody), + method: 'POST', + body: bedrockAnthropicBody, + }, + bodyTransform: streaming ? 'aws-eventstream-binary' : null, + demuxerFormat: streaming ? 'fast-sse' : null, + chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(), + }; + + // [Bedrock Mantle] OpenAI Chat Completions-compatible API for non-Anthropic models + case 'mantle': + const mantleUrl = bedrockURLMantle(bedrockResolveRegion(access), '/v1/chat/completions'); + const mantleBody = aixToOpenAIChatCompletions('openai', model, chatGenerate, streaming); + return { + request: { + ...await bedrockAccessAsync(access, 'POST', mantleUrl, mantleBody), + method: 'POST', + body: mantleBody, + }, + demuxerFormat: streaming ? 'fast-sse' : null, + chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(), + }; + + default: + const _exhaustiveCheck: never = model.vndBedrockAPI; + // fallthrough, then throw + case undefined: + break; + } + throw new Error(`Unsupported '${model.vndBedrockAPI}' API.`); } case 'gemini': diff --git a/src/modules/llms/server/anthropic/anthropic.models.ts b/src/modules/llms/server/anthropic/anthropic.models.ts index a017ebc4a..574623b19 100644 --- a/src/modules/llms/server/anthropic/anthropic.models.ts +++ b/src/modules/llms/server/anthropic/anthropic.models.ts @@ -463,6 +463,8 @@ const _BEDROCK_ANT_IF_ALLOWLIST: ReadonlySet = new Set([ // NOTE: llmVndAntInfSpeed not available on Bedrock, llmVndAntWebFetch/llmVndAntSkills not available const _BEDROCK_ANT_PARAM_ALLOWLIST: ReadonlySet = new Set([ + // bedrock params to not strip + 'llmVndBedrockAPI', // supported 'llmVndAnt1MContext', 'llmVndAntEffort', diff --git a/src/modules/llms/server/bedrock/bedrock.access.ts b/src/modules/llms/server/bedrock/bedrock.access.ts index b794d59f4..9f0ca1a35 100644 --- a/src/modules/llms/server/bedrock/bedrock.access.ts +++ b/src/modules/llms/server/bedrock/bedrock.access.ts @@ -81,15 +81,19 @@ export function bedrockResolveRegion(access: BedrockAccessSchema): string { // --- URLs --- +export function bedrockURLControlPlane(region: string, path: string): string { + return `https://bedrock.${region}.amazonaws.com${path}`; +} + +export function bedrockURLMantle(region: string, path: string): string { + return `https://bedrock-mantle.${region}.api.aws${path}`; +} + export function bedrockURLRuntime(region: string, modelId: string, streaming: boolean): string { const action = streaming ? 'invoke-with-response-stream' : 'invoke'; return `https://bedrock-runtime.${region}.amazonaws.com/model/${encodeURIComponent(modelId)}/${action}`; } -export function bedrockURLControlPlane(region: string, path: string): string { - return `https://bedrock.${region}.amazonaws.com${path}`; -} - // --- Bedrock Access (Bearer or async SigV4) --- diff --git a/src/modules/llms/server/bedrock/bedrock.models.ts b/src/modules/llms/server/bedrock/bedrock.models.ts index 2f09c7e4d..c973a7ec4 100644 --- a/src/modules/llms/server/bedrock/bedrock.models.ts +++ b/src/modules/llms/server/bedrock/bedrock.models.ts @@ -16,6 +16,29 @@ import type { ModelDescriptionSchema } from '../llm.server.types'; import { anthropicInjectVariants, llmBedrockFindAnthropicModel, llmBedrockStripAnthropicMDS } from '../anthropic/anthropic.models'; import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; +import { DModelParameterSpecAny } from '~/common/stores/llms/llms.parameters'; + + +// --- Suppression Rules --- + +const SKIP_FM_ID_CONTAINS = ['rerank']; +const SKIP_IP_ID_STARTSWITH = ['stability.']; + +// Known Mantle-only models (no matching foundation model) โ€” override heuristics with accurate metadata +const KNOWN_MANTLE_ONLY: Record = { + 'deepseek.v3.1': { label: 'DeepSeek V3.1', ctx: 131072, out: 16384 }, + 'moonshotai.kimi-k2-thinking': { label: 'Kimi K2 Thinking', ctx: 131072, out: 16384 }, + 'openai.gpt-oss-20b': { label: 'GPT-OSS 20B', ctx: 131072, out: 16384 }, + 'openai.gpt-oss-120b': { label: 'GPT-OSS 120B', ctx: 131072, out: 16384 }, + 'qwen.qwen3-32b': { label: 'Qwen3 32B', ctx: 131072, out: 16384 }, + 'qwen.qwen3-235b-a22b-2507': { label: 'Qwen3 235B A22B', ctx: 131072, out: 16384 }, + 'qwen.qwen3-coder-30b-a3b-instruct': { label: 'Qwen3 Coder 30B', ctx: 131072, out: 16384 }, + 'qwen.qwen3-coder-480b-a35b-instruct': { label: 'Qwen3 Coder 480B', ctx: 131072, out: 16384 }, + 'qwen.qwen3-coder-next': { label: 'Qwen3 Coder Next', ctx: 131072, out: 16384 }, + 'qwen.qwen3-next-80b-a3b-instruct': { label: 'Qwen3 Next 80B', ctx: 131072, out: 16384 }, + 'qwen.qwen3-vl-235b-a22b-instruct': { label: 'Qwen3 VL 235B', ctx: 131072, out: 16384, vision: true }, + 'zai.glm-4.6': { label: 'GLM 4.6', ctx: 131072, out: 16384 }, +} as const; // --- Bedrock API Wire Types --- @@ -68,6 +91,17 @@ export namespace BedrockWire_API_Models_List { nextToken: z.string().optional().nullable(), }); + // ListMantleModels response (OpenAI-compatible /v1/models from Bedrock Mantle) + + export const MantleModelsResponse_schema = z.object({ + data: z.array(z.object({ + id: z.string(), + object: z.string().optional(), + created: z.number().optional(), + owned_by: z.string().optional(), + })), + }); + } @@ -101,14 +135,21 @@ function _seemsAnthropicBedrockModel(bedrockModelId: string): boolean { export function bedrockModelsToDescriptions( foundationModels: z.infer, inferenceProfiles: z.infer, + mantleModels: z.infer, ): ModelDescriptionSchema[] { - // Collect unique model IDs from both sources + // Get the IDs for the Mantle models + const mantleModelIds = new Set(mantleModels.data.map(m => m.id)); + let remainingMantleModelIds = new Set(mantleModelIds); // to track which Mantle models are not matched to foundation/inference profiles + + // Collect unique model definitions from all sources const modelMap = new Map baseId.includes(s))) continue; + + // excludes non text->text, such as embedding, image gen, video gen, speech-only if (!fm.inputModalities?.includes('TEXT') || !fm.outputModalities?.includes('TEXT')) continue; - // denylist '..match..' - if (['rerank'].some(match => fm.modelId.includes(match))) continue; - - modelMap.set(fm.modelId, { - id: fm.modelId, + modelMap.set(baseId, { + id: baseId, label: fm.modelName, provider: fm.providerName, - isInferenceProfile: false, + hasMantle, + isLegacy: fm.modelLifecycle?.status === 'LEGACY', + isProfile: false, streaming: fm.responseStreamingSupported ?? true, converseMaxTokens: fm.converse?.maxTokensMaximum ?? null, converseImageTypes: fm.converse?.userImageTypesSupported ?? [], }); + + // mark as used in mantle + if (hasMantle) + remainingMantleModelIds.delete(baseId); } - // Inference Profiles + // Inference Profiles - important to come AFTER the base models, so we can resolve some attributes, if needed for (const ip of inferenceProfiles.inferenceProfileSummaries) { // exclude legacy models if (ip.status && ip.status !== 'ACTIVE') continue; // denylist 'start..' const baseId = _stripRegionPrefix(ip.inferenceProfileId); - if (['stability.'].some(start => baseId.startsWith(start))) continue; + if (SKIP_IP_ID_STARTSWITH.some(s => baseId.startsWith(s))) continue; + const hasMantle = mantleModelIds.has(baseId); // check if there's a matching foundation model (not anthropic, we map them differently) const foundationMeta = modelMap.get(baseId); - // if (!_seemsAnthropicBedrockModel(ip.inferenceProfileId) && !foundationMeta) - // console.log('[Bedrock] No matching foundation model for inference profile', ip.inferenceProfileId); modelMap.set(ip.inferenceProfileId, { id: ip.inferenceProfileId, label: ip.inferenceProfileName, provider: _extractProvider(ip.inferenceProfileId), - isInferenceProfile: true, + hasMantle, + isLegacy: ip.status === 'LEGACY', + isProfile: true, streaming: foundationMeta?.streaming ?? true, converseMaxTokens: foundationMeta?.converseMaxTokens ?? null, converseImageTypes: foundationMeta?.converseImageTypes ?? [], }); + + // mark as used in mantle + if (hasMantle) + remainingMantleModelIds.delete(baseId); } + + // Fuse foundationModels + inferenceProfiles into unified ModelDescriptionSchema definitions + // - Anthropic models get enriched with hardcoded metadata, plus 0-day + // - non-anthropic models get basic descriptions based on Bedrock metadata, plus mantle/converse markers + // -> ModelDescriptionSchema[], with Anthropic thinking variants injected inline const descriptions: ModelDescriptionSchema[] = []; - for (const [modelId, meta] of modelMap) { + const symbolMantle = ''; // '๐Ÿ˜'; 'โ“‚๏ธ' + const bedrockAPIAnthropic = { paramId: 'llmVndBedrockAPI', initialValue: 'invoke-anthropic' } as const satisfies DModelParameterSpecAny; + const bedrockAPIConverse = { paramId: 'llmVndBedrockAPI', initialValue: 'converse' } as const satisfies DModelParameterSpecAny; + const bedrockAPIMantle = { paramId: 'llmVndBedrockAPI', initialValue: 'mantle' } as const satisfies DModelParameterSpecAny; + for (const [modelId, modelMeta] of modelMap) { + if (_seemsAnthropicBedrockModel(modelId)) { - // Known Anthropic models: enrich with hardcoded definitions + inject thinking variants - const antModel = llmBedrockFindAnthropicModel(_stripRegionPrefix(modelId)); - if (antModel) { - const isProfile = !!_extractRegionPrefix(modelId); - // Inject variants (returns [variant, base] or [base] if no variant) - const withVariants = anthropicInjectVariants([], antModel); - for (const variant of withVariants) { - const label = isProfile ? _profileLabel(variant.label, modelId) : variant.label; - descriptions.push({ ...variant, id: modelId, label }); + // Anthropic models + const antModel = llmBedrockFindAnthropicModel(_stripRegionPrefix(modelId)); + + // Known Anthropic: enrich with hardcoded definitions + inject thinking variants + if (antModel) { + for (const variant of anthropicInjectVariants([], antModel)) + descriptions.push(llmBedrockStripAnthropicMDS({ // Filter to the subset of Anthropic params supported + ...variant, + id: modelId, + description: `${variant.description}${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`, + label: `${modelMeta.isLegacy ? '๐Ÿ•ฐ๏ธ ' : '' /*๐Ÿ…ฐ๏ธ*/}${!modelMeta.isProfile ? variant.label : _labelFromProfile(variant.label, modelId)}`, + parameterSpecs: [...(variant.parameterSpecs || []), bedrockAPIAnthropic], // NOTE: FILTER MUST ALLOW THIS PARAM TOO! + })); + } + // Unknown Anthropic: 0-day model, not in our hardcoded DB + else { + descriptions.push({ + id: modelId, + label: `${modelMeta.isLegacy ? '๐Ÿ•ฐ๏ธ ' : ''}${!modelMeta.isProfile ? modelMeta.label : _labelFromProfile(modelMeta.label, modelId)} [?]`, + description: `${modelMeta.provider} model ${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`, + hidden: modelMeta.isLegacy || modelId.includes('.claude-3-'), + // default assumptions + contextWindow: 200000, + maxCompletionTokens: 64000, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching], + parameterSpecs: [bedrockAPIAnthropic], + }); } - continue; - } - // Unknown models - these will NOT be accessible, hence the '๐Ÿšง'. We show them just in case, but maybe we shall not - const isAnthropic = _seemsAnthropicBedrockModel(modelId); - const hasVision = meta.converseImageTypes.length > 0; + } else { + + // Non-Anthropic models - may call them via mantle (if hasMantle) + const hasVision = modelMeta.converseImageTypes.length > 0; + const isMantle = modelMeta.hasMantle; + let label = modelMeta.isProfile ? _labelFromProfile(modelMeta.label, modelId) : modelMeta.label; + descriptions.push({ + id: modelId, + label: `${isMantle ? symbolMantle : '๐Ÿšง '}${label.startsWith(modelMeta.provider) ? '' : (modelMeta.provider + ' ')}${label}`, + description: `${modelMeta.provider} model via ${isMantle ? 'OpenAI-Compatible' : 'Unsupported'} API ${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`, + contextWindow: modelMeta.converseMaxTokens ?? null, + interfaces: hasVision ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision] : [LLM_IF_OAI_Chat], + parameterSpecs: [isMantle ? bedrockAPIMantle : bedrockAPIConverse], + hidden: !isMantle, // only if it runs through mantle + }); + + } + } + + // -> Add remaining Mantle-only models (not matched to any FM/IP) + for (const mantleId of remainingMantleModelIds) { + const known = KNOWN_MANTLE_ONLY[mantleId]; + const provider = _extractMantleProvider(mantleId); + const interfaces = [LLM_IF_OAI_Chat]; + if (known?.vision) interfaces.push(LLM_IF_OAI_Vision); descriptions.push({ - id: modelId, - label: '๐Ÿšง ' + (meta.isInferenceProfile ? _profileLabel(meta.label, modelId) : meta.label), - description: `${meta.provider} model on AWS Bedrock${isAnthropic ? '' : ' (Converse API)'}`, - contextWindow: isAnthropic ? 200000 : (meta.converseMaxTokens ? meta.converseMaxTokens * 2 : 32768), - maxCompletionTokens: isAnthropic ? 64000 : (meta.converseMaxTokens ?? 4096), - interfaces: - isAnthropic ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching] - : hasVision ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision] - : [LLM_IF_OAI_Chat], - hidden: true, // not in our known models DB โ€” hide until verified usable + id: mantleId, + label: `${symbolMantle}${known?.label ?? labelForMantle(mantleId, provider)}${known ? '' : ' [?]'}`, + description: `${provider} model via OpenAI-Compatible API on AWS Bedrock Mantle`, + contextWindow: known?.ctx ?? 131072, + maxCompletionTokens: known?.out ?? 16384, + interfaces, + parameterSpecs: [bedrockAPIMantle], + hidden: true, // we know it can run, but we don't have models details }); } - // Filter interfaces and params to Bedrock-supported subset, then sort - const filtered = descriptions.map(llmBedrockStripAnthropicMDS); - filtered.sort(_bedrockModelSort); - return filtered; + return descriptions.sort(_bedrockModelSort); } // --- Helpers --- +// Extract provider name from Mantle model ID (e.g., 'mistral.model-name' -> 'Mistral') +function _extractMantleProvider(modelId: string): string { + const parts = modelId.split('.'); + return !parts[0] ? 'Unknown' : parts[0].charAt(0).toUpperCase() + parts[0].slice(1); +} + +// Build a display label from a Mantle model ID +function labelForMantle(modelId: string, provider: string): string { + const parts = modelId.split('.'); + const modelPart = parts.slice(1).join('.') || modelId; + // clean up: remove common suffixes, improve readability + const cleanLabel = modelPart + .replace(/-/g, ' ') + .replace(/\b\w/g, c => c.toUpperCase()); + return `${provider} ${cleanLabel}`; +} + /** Build a profile label: strip redundant region prefix from name, append `ยท Region` suffix (omit for global) */ -function _profileLabel(name: string, modelId: string): string { +function _labelFromProfile(name: string, modelId: string): string { const prefix = _extractRegionPrefix(modelId) ?? 'regional'; // Strip leading "US ", "GLOBAL ", etc. from the AWS-provided name const cleanName = name.replace(/^(US|EU|GLOBAL|JP|APAC)\s+/i, ''); @@ -223,14 +335,19 @@ function _extractProvider(profileId: string): string { return parts[0] ? parts[0].charAt(0).toUpperCase() + parts[0].slice(1) : 'Unknown'; } -/** Sort: Anthropic first, then family > class > variant (thinking before plain) > region */ +/** Sort: Anthropic first, then non-Anthropic by provider > label */ function _bedrockModelSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number { const aIsAnt = _seemsAnthropicBedrockModel(a.id); const bIsAnt = _seemsAnthropicBedrockModel(b.id); - if (aIsAnt && !bIsAnt) return -1; - if (!aIsAnt && bIsAnt) return 1; + if (aIsAnt !== bIsAnt) return aIsAnt ? -1 : 1; - // Within Anthropic: sort by family precedence + // --- Non-Anthropic: ๐Ÿšง-prefixed labels last, then provider, then label --- + if (!aIsAnt) + return (a.label.startsWith('๐Ÿšง') ? 1 : 0) - (b.label.startsWith('๐Ÿšง') ? 1 : 0) + || _extractMantleProvider(a.id).localeCompare(_extractMantleProvider(b.id)) + || a.label.localeCompare(b.label); + + // --- Anthropic: family > class > variant > region --- const familyPrecedence = ['-4-7-', '-4-6', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-']; const classPrecedence = ['-opus-', '-sonnet-', '-haiku-']; @@ -245,11 +362,10 @@ function _bedrockModelSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema) const classB = getClassIdx(b.id); if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB); - // Thinking/adaptive variants before plain (idVariant present = variant) + // Thinking/adaptive variants before plain const aIsVariant = !!a.idVariant; const bIsVariant = !!b.idVariant; - if (aIsVariant && !bIsVariant) return -1; - if (!aIsVariant && bIsVariant) return 1; + if (aIsVariant !== bIsVariant) return aIsVariant ? -1 : 1; // Prefer global > us > eu > regional const prefixOrder = ['global', 'us', 'eu', 'jp', 'apac']; diff --git a/src/modules/llms/server/listModels.dispatch.ts b/src/modules/llms/server/listModels.dispatch.ts index 3e3cccdf6..72b57dff0 100644 --- a/src/modules/llms/server/listModels.dispatch.ts +++ b/src/modules/llms/server/listModels.dispatch.ts @@ -16,7 +16,7 @@ import { anthropicInjectVariants, anthropicValidateModelDefs_DEV, AnthropicWire_ import { ANTHROPIC_API_PATHS, anthropicAccess } from './anthropic/anthropic.access'; // protocol: Bedrock -import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane } from './bedrock/bedrock.access'; +import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane, bedrockURLMantle } from './bedrock/bedrock.access'; import { bedrockModelsToDescriptions, BedrockWire_API_Models_List } from './bedrock/bedrock.models'; // protocol: Gemini @@ -172,35 +172,42 @@ function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal): const region = bedrockResolveRegion(access); const fmUrl = bedrockURLControlPlane(region, '/foundation-models?byInferenceType=ON_DEMAND'); const ipUrl = bedrockURLControlPlane(region, '/inference-profiles?typeEquals=SYSTEM_DEFINED&maxResults=1000'); + const mantleUrl = bedrockURLMantle(region, '/v1/models'); - // sign and fetch both lists in parallel - degrade gracefully if one fails, throw if both fail - const [fmResult, ipResult] = await Promise.allSettled([ + // sign and fetch all lists in parallel - each fails independently + const [fmResult, ipResult, mantleIdsResult] = await Promise.allSettled([ // Foundation Models bedrockAccessAsync(access, 'GET', fmUrl, undefined) .then(fmAccess => fetchJsonOrTRPCThrow({ ...fmAccess, signal, name: 'Bedrock/FM' })), // Inference Profiles bedrockAccessAsync(access, 'GET', ipUrl, undefined) .then(ipAccess => fetchJsonOrTRPCThrow({ ...ipAccess, signal, name: 'Bedrock/IP' })), + // Mantle Models + bedrockAccessAsync(access, 'GET', mantleUrl, undefined) + .then(mantleAccess => fetchJsonOrTRPCThrow({ ...mantleAccess, signal, name: 'Bedrock/Mantle' })), ]); - // if both failed, throw the first error so the user sees it + // if both FM and IP failed, throw the first error so the user sees it if (fmResult.status === 'rejected' && ipResult.status === 'rejected') throw fmResult.reason; - // degrade gracefully if only one failed + // degrade gracefully if any failed const fmResponse = fmResult.status === 'fulfilled' ? fmResult.value : { modelSummaries: [] }; const ipResponse = ipResult.status === 'fulfilled' ? ipResult.value : { inferenceProfileSummaries: [] }; + const mantleResponse = mantleIdsResult.status === 'fulfilled' ? mantleIdsResult.value : { data: [] }; _wire?.logResponse(fmResponse); _wire?.logResponse(ipResponse); + _wire?.logResponse(mantleResponse); return { foundationModels: BedrockWire_API_Models_List.FoundationModelsResponse_schema.parse(fmResponse), inferenceProfiles: BedrockWire_API_Models_List.InferenceProfilesResponse_schema.parse(ipResponse), + mantleModelIds: BedrockWire_API_Models_List.MantleModelsResponse_schema.parse(mantleResponse), }; }, - convertToDescriptions: ({ foundationModels, inferenceProfiles }) => - bedrockModelsToDescriptions(foundationModels, inferenceProfiles), + convertToDescriptions: ({ foundationModels, inferenceProfiles, mantleModelIds }) => + bedrockModelsToDescriptions(foundationModels, inferenceProfiles, mantleModelIds), }); } diff --git a/src/modules/llms/server/llm.server.types.ts b/src/modules/llms/server/llm.server.types.ts index 16db477bf..e98b992cc 100644 --- a/src/modules/llms/server/llm.server.types.ts +++ b/src/modules/llms/server/llm.server.types.ts @@ -88,6 +88,8 @@ const ModelParameterSpec_schema = z.object({ 'llmVndAntThinkingBudget', 'llmVndAntWebFetch', 'llmVndAntWebSearch', + // Bedrock + 'llmVndBedrockAPI', // Gemini 'llmVndGeminiAspectRatio', 'llmVndGeminiCodeExecution',