mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 14:10:15 -07:00
AIX/LLMs: Bedrock: support Mantle (OpenAI-compatible) including model enumeration. Fixes #965
This commit is contained in:
@@ -72,7 +72,7 @@ Server-side adapters translate AIX parameters to vendor APIs. Each vendor may in
|
||||
When a model is loaded:
|
||||
|
||||
1. **Model Creation**: `modelDescriptionToDLLM()` creates the DLLM with empty `initialParameters`
|
||||
2. **Initial Value Application**: `applyModelParameterInitialValues()` populates initial values from:
|
||||
2. **Initial Value Application**: `applyModelParameterSpecsInitialValues()` populates initial values from:
|
||||
- Model spec `initialValue` (highest priority)
|
||||
- Registry `initialValue` (fallback)
|
||||
3. **Runtime Resolution**: `getAllModelParameterValues()` creates final parameter set:
|
||||
|
||||
@@ -251,6 +251,17 @@ export const DModelParameterRegistry = {
|
||||
// },
|
||||
|
||||
|
||||
// Bedrock-specific
|
||||
|
||||
llmVndBedrockAPI: _enumDef({
|
||||
label: 'Bedrock API',
|
||||
type: 'enum',
|
||||
description: 'Bedrock invocation API for this model',
|
||||
values: ['converse', 'invoke-anthropic', 'mantle'],
|
||||
// undefined is not accepted when this parameter is used
|
||||
}),
|
||||
|
||||
|
||||
// Gemini-specific
|
||||
|
||||
llmVndGeminiAspectRatio: _enumDef({ // implies: LLM_IF_Outputs_Image
|
||||
@@ -549,7 +560,7 @@ export function applyModelParameterSpecsInitialValues(destValues: DModelParamete
|
||||
if ('writeFactoryValue' in registryDef && registryDef.writeFactoryValue !== undefined)
|
||||
destValues[paramId] = registryDef.writeFactoryValue as DModelParameterValue<typeof paramId>;
|
||||
} else
|
||||
console.warn(`applyModelParameterInitialValues: unknown parameter id '${paramId}'`);
|
||||
console.warn(`applyModelParameterSpecsInitialValues: unknown parameter id '${paramId}'`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -144,10 +144,10 @@ export type DModelInterfaceV1 =
|
||||
| 'oai-chat-vision'
|
||||
| 'oai-chat-reasoning'
|
||||
| 'ant-prompt-caching'
|
||||
| 'gem-code-execution'
|
||||
| 'oai-prompt-caching'
|
||||
| 'oai-realtime'
|
||||
| 'oai-responses'
|
||||
| 'gem-code-execution'
|
||||
| 'outputs-audio' // TEMP: ui flag - supports audio output (e.g., text-to-speech)
|
||||
| 'outputs-image' // TEMP: ui flag - supports image output (image generation)
|
||||
| 'outputs-no-text' // disable text outputs (used in conjunction with alt-outputs) - assumed off
|
||||
@@ -174,9 +174,9 @@ export const LLM_IF_Outputs_Image: DModelInterfaceV1 = 'outputs-image';
|
||||
export const LLM_IF_Outputs_NoText: DModelInterfaceV1 = 'outputs-no-text';
|
||||
export const LLM_IF_Tools_WebSearch: DModelInterfaceV1 = 'tools-web-search';
|
||||
export const LLM_IF_ANT_PromptCaching: DModelInterfaceV1 = 'ant-prompt-caching';
|
||||
export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution';
|
||||
export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching';
|
||||
export const LLM_IF_OAI_Responses: DModelInterfaceV1 = 'oai-responses';
|
||||
export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution';
|
||||
export const LLM_IF_HOTFIX_NoStream: DModelInterfaceV1 = 'hotfix-no-stream';
|
||||
export const LLM_IF_HOTFIX_NoTemperature: DModelInterfaceV1 = 'hotfix-no-temperature';
|
||||
export const LLM_IF_HOTFIX_NoWebP: DModelInterfaceV1 = 'hotfix-no-webp';
|
||||
|
||||
@@ -51,6 +51,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream,
|
||||
llmVndAntEffort, llmVndGemEffort, llmVndOaiEffort, llmVndMiscEffort,
|
||||
llmVndAnt1MContext, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch,
|
||||
llmVndBedrockAPI,
|
||||
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiThinkingBudget,
|
||||
// llmVndMoonshotWebSearch,
|
||||
llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration, llmVndOaiCodeInterpreter,
|
||||
@@ -117,6 +118,9 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
|
||||
...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}),
|
||||
|
||||
// Bedrock
|
||||
...(llmVndBedrockAPI ? { vndBedrockAPI: llmVndBedrockAPI } : {}),
|
||||
|
||||
// Gemini
|
||||
...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}),
|
||||
...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}),
|
||||
|
||||
@@ -485,7 +485,7 @@ export namespace AixWire_API {
|
||||
vndAntWebSearch: z.enum(['auto']).optional(),
|
||||
|
||||
// Bedrock
|
||||
vndBedrockInvokeAPI: z.enum(['invoke-anthropic', 'converse']).optional(),
|
||||
vndBedrockAPI: z.enum(['converse', 'invoke-anthropic', 'mantle']).optional(),
|
||||
|
||||
// Gemini
|
||||
vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { ANTHROPIC_API_PATHS, anthropicAccess, anthropicBetaFeatures, AnthropicHeaderOptions } from '~/modules/llms/server/anthropic/anthropic.access';
|
||||
import { OPENAI_API_PATHS, openAIAccess } from '~/modules/llms/server/openai/openai.access';
|
||||
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLRuntime } from '~/modules/llms/server/bedrock/bedrock.access';
|
||||
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLMantle, bedrockURLRuntime } from '~/modules/llms/server/bedrock/bedrock.access';
|
||||
import { geminiAccess } from '~/modules/llms/server/gemini/gemini.access';
|
||||
import { ollamaAccess } from '~/modules/llms/server/ollama/ollama.access';
|
||||
|
||||
@@ -84,38 +84,61 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
|
||||
}
|
||||
|
||||
case 'bedrock': {
|
||||
switch (model.vndBedrockAPI) {
|
||||
|
||||
// [Bedrock, 2026-02-24] we only support the Invoke API for Anthropic models
|
||||
const invokeAPI = model.vndBedrockInvokeAPI ?? 'invoke-anthropic';
|
||||
if (invokeAPI === 'converse')
|
||||
throw new Error('[Bedrock] Converse API is not yet implemented. Use Anthropic models with the InvokeModel API (invoke-anthropic).');
|
||||
case 'converse':
|
||||
// No plans of implementing this yet - throwing below
|
||||
break;
|
||||
|
||||
const region = bedrockResolveRegion(access);
|
||||
const url = bedrockURLRuntime(region, model.id, streaming);
|
||||
// [Bedrock Invoke] Anthropic-native InvokeModel API
|
||||
case 'invoke-anthropic':
|
||||
const invokeUrl = bedrockURLRuntime(bedrockResolveRegion(access), model.id, streaming);
|
||||
|
||||
// body
|
||||
const bedrockAnthropicBody: Record<string, any> = aixToAnthropicMessageCreate(model, chatGenerate, streaming);
|
||||
delete bedrockAnthropicBody.model; // model in path
|
||||
delete bedrockAnthropicBody.stream; // streaming behavior in path
|
||||
// headers['anthropic-version'] -> body
|
||||
bedrockAnthropicBody.anthropic_version = 'bedrock-2023-05-31';
|
||||
// headers['anthropic-beta'] -> body
|
||||
bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures(
|
||||
_anthropicBetaOptionsFromModel(model /* note that .id won't match, and it's okay, we don't need per model customizations */, false /* hardcoded */),
|
||||
);
|
||||
if (!bedrockAnthropicBody.anthropic_beta?.length)
|
||||
delete bedrockAnthropicBody.anthropic_beta;
|
||||
// body
|
||||
const bedrockAnthropicBody: Record<string, any> = aixToAnthropicMessageCreate(model, chatGenerate, streaming);
|
||||
delete bedrockAnthropicBody.model; // model in path
|
||||
delete bedrockAnthropicBody.stream; // streaming behavior in path
|
||||
// headers['anthropic-version'] -> body
|
||||
bedrockAnthropicBody.anthropic_version = 'bedrock-2023-05-31';
|
||||
// headers['anthropic-beta'] -> body
|
||||
bedrockAnthropicBody.anthropic_beta = anthropicBetaFeatures(
|
||||
_anthropicBetaOptionsFromModel(model /* note that .id won't match, and it's okay, we don't need per model customizations */, false /* hardcoded */),
|
||||
);
|
||||
if (!bedrockAnthropicBody.anthropic_beta?.length)
|
||||
delete bedrockAnthropicBody.anthropic_beta;
|
||||
|
||||
return {
|
||||
request: {
|
||||
...await bedrockAccessAsync(access, 'POST', url, bedrockAnthropicBody),
|
||||
method: 'POST',
|
||||
body: bedrockAnthropicBody,
|
||||
},
|
||||
bodyTransform: streaming ? 'aws-eventstream-binary' : null,
|
||||
demuxerFormat: streaming ? 'fast-sse' : null,
|
||||
chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(),
|
||||
};
|
||||
return {
|
||||
request: {
|
||||
...await bedrockAccessAsync(access, 'POST', invokeUrl, bedrockAnthropicBody),
|
||||
method: 'POST',
|
||||
body: bedrockAnthropicBody,
|
||||
},
|
||||
bodyTransform: streaming ? 'aws-eventstream-binary' : null,
|
||||
demuxerFormat: streaming ? 'fast-sse' : null,
|
||||
chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(),
|
||||
};
|
||||
|
||||
// [Bedrock Mantle] OpenAI Chat Completions-compatible API for non-Anthropic models
|
||||
case 'mantle':
|
||||
const mantleUrl = bedrockURLMantle(bedrockResolveRegion(access), '/v1/chat/completions');
|
||||
const mantleBody = aixToOpenAIChatCompletions('openai', model, chatGenerate, streaming);
|
||||
return {
|
||||
request: {
|
||||
...await bedrockAccessAsync(access, 'POST', mantleUrl, mantleBody),
|
||||
method: 'POST',
|
||||
body: mantleBody,
|
||||
},
|
||||
demuxerFormat: streaming ? 'fast-sse' : null,
|
||||
chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(),
|
||||
};
|
||||
|
||||
default:
|
||||
const _exhaustiveCheck: never = model.vndBedrockAPI;
|
||||
// fallthrough, then throw
|
||||
case undefined:
|
||||
break;
|
||||
}
|
||||
throw new Error(`Unsupported '${model.vndBedrockAPI}' API.`);
|
||||
}
|
||||
|
||||
case 'gemini':
|
||||
|
||||
@@ -463,6 +463,8 @@ const _BEDROCK_ANT_IF_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
|
||||
// NOTE: llmVndAntInfSpeed not available on Bedrock, llmVndAntWebFetch/llmVndAntSkills not available
|
||||
const _BEDROCK_ANT_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
// bedrock params to not strip
|
||||
'llmVndBedrockAPI',
|
||||
// supported
|
||||
'llmVndAnt1MContext',
|
||||
'llmVndAntEffort',
|
||||
|
||||
@@ -81,15 +81,19 @@ export function bedrockResolveRegion(access: BedrockAccessSchema): string {
|
||||
|
||||
// --- URLs ---
|
||||
|
||||
export function bedrockURLControlPlane(region: string, path: string): string {
|
||||
return `https://bedrock.${region}.amazonaws.com${path}`;
|
||||
}
|
||||
|
||||
export function bedrockURLMantle(region: string, path: string): string {
|
||||
return `https://bedrock-mantle.${region}.api.aws${path}`;
|
||||
}
|
||||
|
||||
export function bedrockURLRuntime(region: string, modelId: string, streaming: boolean): string {
|
||||
const action = streaming ? 'invoke-with-response-stream' : 'invoke';
|
||||
return `https://bedrock-runtime.${region}.amazonaws.com/model/${encodeURIComponent(modelId)}/${action}`;
|
||||
}
|
||||
|
||||
export function bedrockURLControlPlane(region: string, path: string): string {
|
||||
return `https://bedrock.${region}.amazonaws.com${path}`;
|
||||
}
|
||||
|
||||
|
||||
// --- Bedrock Access (Bearer or async SigV4) ---
|
||||
|
||||
|
||||
@@ -16,6 +16,29 @@ import type { ModelDescriptionSchema } from '../llm.server.types';
|
||||
|
||||
import { anthropicInjectVariants, llmBedrockFindAnthropicModel, llmBedrockStripAnthropicMDS } from '../anthropic/anthropic.models';
|
||||
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
import { DModelParameterSpecAny } from '~/common/stores/llms/llms.parameters';
|
||||
|
||||
|
||||
// --- Suppression Rules ---
|
||||
|
||||
const SKIP_FM_ID_CONTAINS = ['rerank'];
|
||||
const SKIP_IP_ID_STARTSWITH = ['stability.'];
|
||||
|
||||
// Known Mantle-only models (no matching foundation model) — override heuristics with accurate metadata
|
||||
const KNOWN_MANTLE_ONLY: Record<string, { label: string; ctx: number; out: number; vision?: true }> = {
|
||||
'deepseek.v3.1': { label: 'DeepSeek V3.1', ctx: 131072, out: 16384 },
|
||||
'moonshotai.kimi-k2-thinking': { label: 'Kimi K2 Thinking', ctx: 131072, out: 16384 },
|
||||
'openai.gpt-oss-20b': { label: 'GPT-OSS 20B', ctx: 131072, out: 16384 },
|
||||
'openai.gpt-oss-120b': { label: 'GPT-OSS 120B', ctx: 131072, out: 16384 },
|
||||
'qwen.qwen3-32b': { label: 'Qwen3 32B', ctx: 131072, out: 16384 },
|
||||
'qwen.qwen3-235b-a22b-2507': { label: 'Qwen3 235B A22B', ctx: 131072, out: 16384 },
|
||||
'qwen.qwen3-coder-30b-a3b-instruct': { label: 'Qwen3 Coder 30B', ctx: 131072, out: 16384 },
|
||||
'qwen.qwen3-coder-480b-a35b-instruct': { label: 'Qwen3 Coder 480B', ctx: 131072, out: 16384 },
|
||||
'qwen.qwen3-coder-next': { label: 'Qwen3 Coder Next', ctx: 131072, out: 16384 },
|
||||
'qwen.qwen3-next-80b-a3b-instruct': { label: 'Qwen3 Next 80B', ctx: 131072, out: 16384 },
|
||||
'qwen.qwen3-vl-235b-a22b-instruct': { label: 'Qwen3 VL 235B', ctx: 131072, out: 16384, vision: true },
|
||||
'zai.glm-4.6': { label: 'GLM 4.6', ctx: 131072, out: 16384 },
|
||||
} as const;
|
||||
|
||||
|
||||
// --- Bedrock API Wire Types ---
|
||||
@@ -68,6 +91,17 @@ export namespace BedrockWire_API_Models_List {
|
||||
nextToken: z.string().optional().nullable(),
|
||||
});
|
||||
|
||||
// ListMantleModels response (OpenAI-compatible /v1/models from Bedrock Mantle)
|
||||
|
||||
export const MantleModelsResponse_schema = z.object({
|
||||
data: z.array(z.object({
|
||||
id: z.string(),
|
||||
object: z.string().optional(),
|
||||
created: z.number().optional(),
|
||||
owned_by: z.string().optional(),
|
||||
})),
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -101,14 +135,21 @@ function _seemsAnthropicBedrockModel(bedrockModelId: string): boolean {
|
||||
export function bedrockModelsToDescriptions(
|
||||
foundationModels: z.infer<typeof BedrockWire_API_Models_List.FoundationModelsResponse_schema>,
|
||||
inferenceProfiles: z.infer<typeof BedrockWire_API_Models_List.InferenceProfilesResponse_schema>,
|
||||
mantleModels: z.infer<typeof BedrockWire_API_Models_List.MantleModelsResponse_schema>,
|
||||
): ModelDescriptionSchema[] {
|
||||
|
||||
// Collect unique model IDs from both sources
|
||||
// Get the IDs for the Mantle models
|
||||
const mantleModelIds = new Set(mantleModels.data.map(m => m.id));
|
||||
let remainingMantleModelIds = new Set(mantleModelIds); // to track which Mantle models are not matched to foundation/inference profiles
|
||||
|
||||
// Collect unique model definitions from all sources
|
||||
const modelMap = new Map<string, {
|
||||
id: string;
|
||||
label: string;
|
||||
provider: string;
|
||||
isInferenceProfile: boolean;
|
||||
hasMantle: boolean;
|
||||
isLegacy: boolean;
|
||||
isProfile: boolean;
|
||||
streaming: boolean;
|
||||
converseMaxTokens: number | null;
|
||||
converseImageTypes: string[]
|
||||
@@ -116,96 +157,167 @@ export function bedrockModelsToDescriptions(
|
||||
|
||||
// Foundation Models
|
||||
for (const fm of foundationModels.modelSummaries) {
|
||||
// exclude legacy models
|
||||
if (fm.modelLifecycle?.status === 'LEGACY') continue;
|
||||
const baseId = fm.modelId; // e.g. 'google.gemma-3-4b-it', 'moonshotai.kimi-k2.5'
|
||||
const hasMantle = mantleModelIds.has(baseId);
|
||||
|
||||
// excludes embedding, image gen, video gen, speech-only
|
||||
// exclusion by pattern
|
||||
if (SKIP_FM_ID_CONTAINS.some(s => baseId.includes(s))) continue;
|
||||
|
||||
// excludes non text->text, such as embedding, image gen, video gen, speech-only
|
||||
if (!fm.inputModalities?.includes('TEXT') || !fm.outputModalities?.includes('TEXT')) continue;
|
||||
|
||||
// denylist '..match..'
|
||||
if (['rerank'].some(match => fm.modelId.includes(match))) continue;
|
||||
|
||||
modelMap.set(fm.modelId, {
|
||||
id: fm.modelId,
|
||||
modelMap.set(baseId, {
|
||||
id: baseId,
|
||||
label: fm.modelName,
|
||||
provider: fm.providerName,
|
||||
isInferenceProfile: false,
|
||||
hasMantle,
|
||||
isLegacy: fm.modelLifecycle?.status === 'LEGACY',
|
||||
isProfile: false,
|
||||
streaming: fm.responseStreamingSupported ?? true,
|
||||
converseMaxTokens: fm.converse?.maxTokensMaximum ?? null,
|
||||
converseImageTypes: fm.converse?.userImageTypesSupported ?? [],
|
||||
});
|
||||
|
||||
// mark as used in mantle
|
||||
if (hasMantle)
|
||||
remainingMantleModelIds.delete(baseId);
|
||||
}
|
||||
|
||||
// Inference Profiles
|
||||
// Inference Profiles - important to come AFTER the base models, so we can resolve some attributes, if needed
|
||||
for (const ip of inferenceProfiles.inferenceProfileSummaries) {
|
||||
// exclude legacy models
|
||||
if (ip.status && ip.status !== 'ACTIVE') continue;
|
||||
|
||||
// denylist 'start..'
|
||||
const baseId = _stripRegionPrefix(ip.inferenceProfileId);
|
||||
if (['stability.'].some(start => baseId.startsWith(start))) continue;
|
||||
if (SKIP_IP_ID_STARTSWITH.some(s => baseId.startsWith(s))) continue;
|
||||
const hasMantle = mantleModelIds.has(baseId);
|
||||
|
||||
// check if there's a matching foundation model (not anthropic, we map them differently)
|
||||
const foundationMeta = modelMap.get(baseId);
|
||||
// if (!_seemsAnthropicBedrockModel(ip.inferenceProfileId) && !foundationMeta)
|
||||
// console.log('[Bedrock] No matching foundation model for inference profile', ip.inferenceProfileId);
|
||||
|
||||
modelMap.set(ip.inferenceProfileId, {
|
||||
id: ip.inferenceProfileId,
|
||||
label: ip.inferenceProfileName,
|
||||
provider: _extractProvider(ip.inferenceProfileId),
|
||||
isInferenceProfile: true,
|
||||
hasMantle,
|
||||
isLegacy: ip.status === 'LEGACY',
|
||||
isProfile: true,
|
||||
streaming: foundationMeta?.streaming ?? true,
|
||||
converseMaxTokens: foundationMeta?.converseMaxTokens ?? null,
|
||||
converseImageTypes: foundationMeta?.converseImageTypes ?? [],
|
||||
});
|
||||
|
||||
// mark as used in mantle
|
||||
if (hasMantle)
|
||||
remainingMantleModelIds.delete(baseId);
|
||||
}
|
||||
|
||||
|
||||
// Fuse foundationModels + inferenceProfiles into unified ModelDescriptionSchema definitions
|
||||
// - Anthropic models get enriched with hardcoded metadata, plus 0-day
|
||||
// - non-anthropic models get basic descriptions based on Bedrock metadata, plus mantle/converse markers
|
||||
|
||||
// -> ModelDescriptionSchema[], with Anthropic thinking variants injected inline
|
||||
const descriptions: ModelDescriptionSchema[] = [];
|
||||
for (const [modelId, meta] of modelMap) {
|
||||
const symbolMantle = ''; // '🐘'; 'Ⓜ️'
|
||||
const bedrockAPIAnthropic = { paramId: 'llmVndBedrockAPI', initialValue: 'invoke-anthropic' } as const satisfies DModelParameterSpecAny;
|
||||
const bedrockAPIConverse = { paramId: 'llmVndBedrockAPI', initialValue: 'converse' } as const satisfies DModelParameterSpecAny;
|
||||
const bedrockAPIMantle = { paramId: 'llmVndBedrockAPI', initialValue: 'mantle' } as const satisfies DModelParameterSpecAny;
|
||||
for (const [modelId, modelMeta] of modelMap) {
|
||||
if (_seemsAnthropicBedrockModel(modelId)) {
|
||||
|
||||
// Known Anthropic models: enrich with hardcoded definitions + inject thinking variants
|
||||
const antModel = llmBedrockFindAnthropicModel(_stripRegionPrefix(modelId));
|
||||
if (antModel) {
|
||||
const isProfile = !!_extractRegionPrefix(modelId);
|
||||
// Inject variants (returns [variant, base] or [base] if no variant)
|
||||
const withVariants = anthropicInjectVariants([], antModel);
|
||||
for (const variant of withVariants) {
|
||||
const label = isProfile ? _profileLabel(variant.label, modelId) : variant.label;
|
||||
descriptions.push({ ...variant, id: modelId, label });
|
||||
// Anthropic models
|
||||
const antModel = llmBedrockFindAnthropicModel(_stripRegionPrefix(modelId));
|
||||
|
||||
// Known Anthropic: enrich with hardcoded definitions + inject thinking variants
|
||||
if (antModel) {
|
||||
for (const variant of anthropicInjectVariants([], antModel))
|
||||
descriptions.push(llmBedrockStripAnthropicMDS({ // Filter to the subset of Anthropic params supported
|
||||
...variant,
|
||||
id: modelId,
|
||||
description: `${variant.description}${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`,
|
||||
label: `${modelMeta.isLegacy ? '🕰️ ' : '' /*🅰️*/}${!modelMeta.isProfile ? variant.label : _labelFromProfile(variant.label, modelId)}`,
|
||||
parameterSpecs: [...(variant.parameterSpecs || []), bedrockAPIAnthropic], // NOTE: FILTER MUST ALLOW THIS PARAM TOO!
|
||||
}));
|
||||
}
|
||||
// Unknown Anthropic: 0-day model, not in our hardcoded DB
|
||||
else {
|
||||
descriptions.push({
|
||||
id: modelId,
|
||||
label: `${modelMeta.isLegacy ? '🕰️ ' : ''}${!modelMeta.isProfile ? modelMeta.label : _labelFromProfile(modelMeta.label, modelId)} [?]`,
|
||||
description: `${modelMeta.provider} model ${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`,
|
||||
hidden: modelMeta.isLegacy || modelId.includes('.claude-3-'),
|
||||
// default assumptions
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 64000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
|
||||
parameterSpecs: [bedrockAPIAnthropic],
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Unknown models - these will NOT be accessible, hence the '🚧'. We show them just in case, but maybe we shall not
|
||||
const isAnthropic = _seemsAnthropicBedrockModel(modelId);
|
||||
const hasVision = meta.converseImageTypes.length > 0;
|
||||
} else {
|
||||
|
||||
// Non-Anthropic models - may call them via mantle (if hasMantle)
|
||||
const hasVision = modelMeta.converseImageTypes.length > 0;
|
||||
const isMantle = modelMeta.hasMantle;
|
||||
let label = modelMeta.isProfile ? _labelFromProfile(modelMeta.label, modelId) : modelMeta.label;
|
||||
descriptions.push({
|
||||
id: modelId,
|
||||
label: `${isMantle ? symbolMantle : '🚧 '}${label.startsWith(modelMeta.provider) ? '' : (modelMeta.provider + ' ')}${label}`,
|
||||
description: `${modelMeta.provider} model via ${isMantle ? 'OpenAI-Compatible' : 'Unsupported'} API ${modelMeta.isProfile ? ' (Bedrock Inference Profile)' : ' (Bedrock Foundation Model)'}`,
|
||||
contextWindow: modelMeta.converseMaxTokens ?? null,
|
||||
interfaces: hasVision ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision] : [LLM_IF_OAI_Chat],
|
||||
parameterSpecs: [isMantle ? bedrockAPIMantle : bedrockAPIConverse],
|
||||
hidden: !isMantle, // only if it runs through mantle
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// -> Add remaining Mantle-only models (not matched to any FM/IP)
|
||||
for (const mantleId of remainingMantleModelIds) {
|
||||
const known = KNOWN_MANTLE_ONLY[mantleId];
|
||||
const provider = _extractMantleProvider(mantleId);
|
||||
const interfaces = [LLM_IF_OAI_Chat];
|
||||
if (known?.vision) interfaces.push(LLM_IF_OAI_Vision);
|
||||
descriptions.push({
|
||||
id: modelId,
|
||||
label: '🚧 ' + (meta.isInferenceProfile ? _profileLabel(meta.label, modelId) : meta.label),
|
||||
description: `${meta.provider} model on AWS Bedrock${isAnthropic ? '' : ' (Converse API)'}`,
|
||||
contextWindow: isAnthropic ? 200000 : (meta.converseMaxTokens ? meta.converseMaxTokens * 2 : 32768),
|
||||
maxCompletionTokens: isAnthropic ? 64000 : (meta.converseMaxTokens ?? 4096),
|
||||
interfaces:
|
||||
isAnthropic ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching]
|
||||
: hasVision ? [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision]
|
||||
: [LLM_IF_OAI_Chat],
|
||||
hidden: true, // not in our known models DB — hide until verified usable
|
||||
id: mantleId,
|
||||
label: `${symbolMantle}${known?.label ?? labelForMantle(mantleId, provider)}${known ? '' : ' [?]'}`,
|
||||
description: `${provider} model via OpenAI-Compatible API on AWS Bedrock Mantle`,
|
||||
contextWindow: known?.ctx ?? 131072,
|
||||
maxCompletionTokens: known?.out ?? 16384,
|
||||
interfaces,
|
||||
parameterSpecs: [bedrockAPIMantle],
|
||||
hidden: true, // we know it can run, but we don't have models details
|
||||
});
|
||||
}
|
||||
|
||||
// Filter interfaces and params to Bedrock-supported subset, then sort
|
||||
const filtered = descriptions.map(llmBedrockStripAnthropicMDS);
|
||||
filtered.sort(_bedrockModelSort);
|
||||
return filtered;
|
||||
return descriptions.sort(_bedrockModelSort);
|
||||
}
|
||||
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
// Extract provider name from Mantle model ID (e.g., 'mistral.model-name' -> 'Mistral')
|
||||
function _extractMantleProvider(modelId: string): string {
|
||||
const parts = modelId.split('.');
|
||||
return !parts[0] ? 'Unknown' : parts[0].charAt(0).toUpperCase() + parts[0].slice(1);
|
||||
}
|
||||
|
||||
// Build a display label from a Mantle model ID
|
||||
function labelForMantle(modelId: string, provider: string): string {
|
||||
const parts = modelId.split('.');
|
||||
const modelPart = parts.slice(1).join('.') || modelId;
|
||||
// clean up: remove common suffixes, improve readability
|
||||
const cleanLabel = modelPart
|
||||
.replace(/-/g, ' ')
|
||||
.replace(/\b\w/g, c => c.toUpperCase());
|
||||
return `${provider} ${cleanLabel}`;
|
||||
}
|
||||
|
||||
/** Build a profile label: strip redundant region prefix from name, append `· Region` suffix (omit for global) */
|
||||
function _profileLabel(name: string, modelId: string): string {
|
||||
function _labelFromProfile(name: string, modelId: string): string {
|
||||
const prefix = _extractRegionPrefix(modelId) ?? 'regional';
|
||||
// Strip leading "US ", "GLOBAL ", etc. from the AWS-provided name
|
||||
const cleanName = name.replace(/^(US|EU|GLOBAL|JP|APAC)\s+/i, '');
|
||||
@@ -223,14 +335,19 @@ function _extractProvider(profileId: string): string {
|
||||
return parts[0] ? parts[0].charAt(0).toUpperCase() + parts[0].slice(1) : 'Unknown';
|
||||
}
|
||||
|
||||
/** Sort: Anthropic first, then family > class > variant (thinking before plain) > region */
|
||||
/** Sort: Anthropic first, then non-Anthropic by provider > label */
|
||||
function _bedrockModelSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
|
||||
const aIsAnt = _seemsAnthropicBedrockModel(a.id);
|
||||
const bIsAnt = _seemsAnthropicBedrockModel(b.id);
|
||||
if (aIsAnt && !bIsAnt) return -1;
|
||||
if (!aIsAnt && bIsAnt) return 1;
|
||||
if (aIsAnt !== bIsAnt) return aIsAnt ? -1 : 1;
|
||||
|
||||
// Within Anthropic: sort by family precedence
|
||||
// --- Non-Anthropic: 🚧-prefixed labels last, then provider, then label ---
|
||||
if (!aIsAnt)
|
||||
return (a.label.startsWith('🚧') ? 1 : 0) - (b.label.startsWith('🚧') ? 1 : 0)
|
||||
|| _extractMantleProvider(a.id).localeCompare(_extractMantleProvider(b.id))
|
||||
|| a.label.localeCompare(b.label);
|
||||
|
||||
// --- Anthropic: family > class > variant > region ---
|
||||
const familyPrecedence = ['-4-7-', '-4-6', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
|
||||
const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
|
||||
|
||||
@@ -245,11 +362,10 @@ function _bedrockModelSort(a: ModelDescriptionSchema, b: ModelDescriptionSchema)
|
||||
const classB = getClassIdx(b.id);
|
||||
if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
|
||||
|
||||
// Thinking/adaptive variants before plain (idVariant present = variant)
|
||||
// Thinking/adaptive variants before plain
|
||||
const aIsVariant = !!a.idVariant;
|
||||
const bIsVariant = !!b.idVariant;
|
||||
if (aIsVariant && !bIsVariant) return -1;
|
||||
if (!aIsVariant && bIsVariant) return 1;
|
||||
if (aIsVariant !== bIsVariant) return aIsVariant ? -1 : 1;
|
||||
|
||||
// Prefer global > us > eu > regional
|
||||
const prefixOrder = ['global', 'us', 'eu', 'jp', 'apac'];
|
||||
|
||||
@@ -16,7 +16,7 @@ import { anthropicInjectVariants, anthropicValidateModelDefs_DEV, AnthropicWire_
|
||||
import { ANTHROPIC_API_PATHS, anthropicAccess } from './anthropic/anthropic.access';
|
||||
|
||||
// protocol: Bedrock
|
||||
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane } from './bedrock/bedrock.access';
|
||||
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane, bedrockURLMantle } from './bedrock/bedrock.access';
|
||||
import { bedrockModelsToDescriptions, BedrockWire_API_Models_List } from './bedrock/bedrock.models';
|
||||
|
||||
// protocol: Gemini
|
||||
@@ -172,35 +172,42 @@ function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal):
|
||||
const region = bedrockResolveRegion(access);
|
||||
const fmUrl = bedrockURLControlPlane(region, '/foundation-models?byInferenceType=ON_DEMAND');
|
||||
const ipUrl = bedrockURLControlPlane(region, '/inference-profiles?typeEquals=SYSTEM_DEFINED&maxResults=1000');
|
||||
const mantleUrl = bedrockURLMantle(region, '/v1/models');
|
||||
|
||||
// sign and fetch both lists in parallel - degrade gracefully if one fails, throw if both fail
|
||||
const [fmResult, ipResult] = await Promise.allSettled([
|
||||
// sign and fetch all lists in parallel - each fails independently
|
||||
const [fmResult, ipResult, mantleIdsResult] = await Promise.allSettled([
|
||||
// Foundation Models
|
||||
bedrockAccessAsync(access, 'GET', fmUrl, undefined)
|
||||
.then(fmAccess => fetchJsonOrTRPCThrow({ ...fmAccess, signal, name: 'Bedrock/FM' })),
|
||||
// Inference Profiles
|
||||
bedrockAccessAsync(access, 'GET', ipUrl, undefined)
|
||||
.then(ipAccess => fetchJsonOrTRPCThrow({ ...ipAccess, signal, name: 'Bedrock/IP' })),
|
||||
// Mantle Models
|
||||
bedrockAccessAsync(access, 'GET', mantleUrl, undefined)
|
||||
.then(mantleAccess => fetchJsonOrTRPCThrow({ ...mantleAccess, signal, name: 'Bedrock/Mantle' })),
|
||||
]);
|
||||
|
||||
// if both failed, throw the first error so the user sees it
|
||||
// if both FM and IP failed, throw the first error so the user sees it
|
||||
if (fmResult.status === 'rejected' && ipResult.status === 'rejected')
|
||||
throw fmResult.reason;
|
||||
|
||||
// degrade gracefully if only one failed
|
||||
// degrade gracefully if any failed
|
||||
const fmResponse = fmResult.status === 'fulfilled' ? fmResult.value : { modelSummaries: [] };
|
||||
const ipResponse = ipResult.status === 'fulfilled' ? ipResult.value : { inferenceProfileSummaries: [] };
|
||||
const mantleResponse = mantleIdsResult.status === 'fulfilled' ? mantleIdsResult.value : { data: [] };
|
||||
|
||||
_wire?.logResponse(fmResponse);
|
||||
_wire?.logResponse(ipResponse);
|
||||
_wire?.logResponse(mantleResponse);
|
||||
|
||||
return {
|
||||
foundationModels: BedrockWire_API_Models_List.FoundationModelsResponse_schema.parse(fmResponse),
|
||||
inferenceProfiles: BedrockWire_API_Models_List.InferenceProfilesResponse_schema.parse(ipResponse),
|
||||
mantleModelIds: BedrockWire_API_Models_List.MantleModelsResponse_schema.parse(mantleResponse),
|
||||
};
|
||||
},
|
||||
convertToDescriptions: ({ foundationModels, inferenceProfiles }) =>
|
||||
bedrockModelsToDescriptions(foundationModels, inferenceProfiles),
|
||||
convertToDescriptions: ({ foundationModels, inferenceProfiles, mantleModelIds }) =>
|
||||
bedrockModelsToDescriptions(foundationModels, inferenceProfiles, mantleModelIds),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -88,6 +88,8 @@ const ModelParameterSpec_schema = z.object({
|
||||
'llmVndAntThinkingBudget',
|
||||
'llmVndAntWebFetch',
|
||||
'llmVndAntWebSearch',
|
||||
// Bedrock
|
||||
'llmVndBedrockAPI',
|
||||
// Gemini
|
||||
'llmVndGeminiAspectRatio',
|
||||
'llmVndGeminiCodeExecution',
|
||||
|
||||
Reference in New Issue
Block a user