Files
big-agi/src/modules/llms/server/listModels.dispatch.ts
T

536 lines
26 KiB
TypeScript

import { TRPCError } from '@trpc/server';
import type { AixAPI_Access } from '~/modules/aix/server/api/aix.wiretypes';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { createDebugWireLogger } from '~/server/wire';
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
import type { ModelDescriptionSchema } from './llm.server.types';
import { llmDevValidateParameterSpecs_DEV, llmsAutoImplyInterfaces } from './models.mappings';
// protocol: Anthropic
import { anthropicInjectVariants, anthropicValidateModelDefs_DEV, AnthropicWire_API_Models_List, hardcodedAnthropicModels, llmsAntCreatePlaceholderModel } from './anthropic/anthropic.models';
import { ANTHROPIC_API_PATHS, anthropicAccess } from './anthropic/anthropic.access';
// protocol: Bedrock
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane, bedrockURLMantle } from './bedrock/bedrock.access';
import { bedrockModelsToDescriptions, BedrockWire_API_Models_List } from './bedrock/bedrock.models';
// protocol: Gemini
import { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
import { geminiAccess } from './gemini/gemini.access';
import { geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels, geminiValidateModelDefs_DEV, geminiValidateParserOutput_DEV } from './gemini/gemini.models';
// protocol: Ollama
import { OLLAMA_BASE_MODELS } from './ollama/ollama.models';
import { ollamaAccess } from './ollama/ollama.access';
import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama/ollama.wiretypes';
// protocol: OpenAI-compatible
import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
import { llmsHostnameMatches, OPENAI_API_PATHS, openAIAccess } from './openai/openai.access';
import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './openai/models/alibaba.models';
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './openai/models/azure.models';
import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './openai/models/chutesai.models';
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './openai/models/deepseek.models';
import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models';
import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models';
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription, groqValidateModelDefs_DEV } from './openai/models/groq.models';
import { llmapiHeuristic, llmapiModelsToModelDescriptions } from './openai/models/llmapi.models';
import { novitaHeuristic, novitaModelsToModelDescriptions } from './openai/models/novita.models';
import { lmStudioFetchModels, lmStudioModelsToModelDescriptions } from './openai/models/lmstudio.models';
import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models';
import { mistralModels } from './openai/models/mistral.models';
import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models';
import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './openai/models/openpipe.models';
import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './openai/models/openrouter.models';
import { openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels, openaiValidateModelDefs_DEV } from './openai/models/openai.models';
import { perplexityHardcodedModelDescriptions, perplexityInjectVariants } from './openai/models/perplexity.models';
import { tlusApiHeuristic, tlusApiTryParse } from './openai/models/tlusapi.models';
import { togetherAIModelsToModelDescriptions } from './openai/models/together.models';
import { xaiFetchModelDescriptions, xaiModelSort } from './openai/models/xai.models';
import { zaiCuratedModelDescriptions, zaiDiscoverModels, zaiModelSort } from './openai/models/zai.models';
// -- Dispatch types --
export type ListModelsDispatch<TWireModels = any> = {
fetchModels: () => Promise<TWireModels>;
convertToDescriptions: (wireModels: TWireModels) => ModelDescriptionSchema[];
};
/**
* Helper to create a dispatch with proper type inference.
* TypeScript will infer TWireModels from fetchModels return type and enforce it in convertToDescriptions.
*/
function createListModelsDispatch<T>(dispatch: ListModelsDispatch<T>): ListModelsDispatch<T> {
return dispatch;
}
// -- Specialized Implementations -- Core of Server-side LLM Model Listing abstraction --
export async function listModelsRunDispatch(access: AixAPI_Access, signal?: AbortSignal): Promise<ModelDescriptionSchema[]> {
const dispatch = _listModelsCreateDispatch(access, signal);
const wireModels = await dispatch.fetchModels();
const models = dispatch.convertToDescriptions(wireModels)
.map(llmsAutoImplyInterfaces); // auto-inject implied IFs from parameterSpecs
// DEV: validate parameterSpecs (enumValues ⊆ registry values, paramId existence)
if (process.env.NODE_ENV === 'development')
models.forEach(llmDevValidateParameterSpecs_DEV);
return models;
}
// stub to reduce dependencies - either server/client or both
function _capitalize(s: string): string {
return s?.length ? (s.charAt(0).toUpperCase() + s.slice(1)) : s;
}
/**
* Specializes to the correct vendor a request for listing models.
* This follows the same pattern as AIX's chatGenerate dispatcher for consistency.
*/
function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal): ListModelsDispatch {
// create the debug logger (if enabled)
const _wire = createDebugWireLogger('LLMs');
// dialect is the only common property
const { dialect } = access;
switch (dialect) {
case 'anthropic': {
return createListModelsDispatch({
fetchModels: async () => {
const { headers, url } = anthropicAccess(access, `${ANTHROPIC_API_PATHS.models}?limit=1000`, {/* ... no options for list ... */ });
_wire?.logRequest('GET', url, headers);
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal });
_wire?.logResponse(wireModels);
return AnthropicWire_API_Models_List.Response_schema.parse(wireModels);
},
convertToDescriptions: (wireModelsResponse) => {
const { data: availableModels } = wireModelsResponse;
// [DEV] check for stale/unknown model definitions
anthropicValidateModelDefs_DEV(availableModels);
// sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out
const familyPrecedence = ['-4-7-', '-4-6', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f));
const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c));
// cast the models to the common schema
return availableModels
.sort((a, b) => {
const familyA = getFamilyIdx(a.id);
const familyB = getFamilyIdx(b.id);
const classA = getClassIdx(a.id);
const classB = getClassIdx(b.id);
// family desc (lower index = better, -1 = unknown goes last)
if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB);
// class desc
if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
// date desc (newer first) - string comparison works since format is YYYYMMDD
return b.id.localeCompare(a.id);
})
.map((model): ModelDescriptionSchema => {
// match model definition
const knownModel = hardcodedAnthropicModels.find(m => m.id === model.id);
if (knownModel) {
// update model creation time, if provided
if (!knownModel.created && model.created_at)
knownModel.created = Math.round(new Date(model.created_at).getTime() / 1000);
return knownModel;
}
// 0-day, new model: create an approximate model definition (placeholder) with sensible defaultss
return llmsAntCreatePlaceholderModel(model);
})
// inject thinking variants using the centralized variant system
.reduce(anthropicInjectVariants, []);
},
});
}
case 'bedrock': {
return createListModelsDispatch({
fetchModels: async () => {
// construct URLs by region
const region = bedrockResolveRegion(access);
const fmUrl = bedrockURLControlPlane(region, '/foundation-models?byInferenceType=ON_DEMAND');
const ipUrl = bedrockURLControlPlane(region, '/inference-profiles?typeEquals=SYSTEM_DEFINED&maxResults=1000');
const mantleUrl = bedrockURLMantle(region, '/v1/models');
// sign and fetch all lists in parallel - each fails independently
const [fmResult, ipResult, mantleIdsResult] = await Promise.allSettled([
// Foundation Models
bedrockAccessAsync(access, 'GET', fmUrl, undefined)
.then(fmAccess => fetchJsonOrTRPCThrow({ ...fmAccess, signal, name: 'Bedrock/FM' })),
// Inference Profiles
bedrockAccessAsync(access, 'GET', ipUrl, undefined)
.then(ipAccess => fetchJsonOrTRPCThrow({ ...ipAccess, signal, name: 'Bedrock/IP' })),
// Mantle Models
bedrockAccessAsync(access, 'GET', mantleUrl, undefined)
.then(mantleAccess => fetchJsonOrTRPCThrow({ ...mantleAccess, signal, name: 'Bedrock/Mantle' })),
]);
// if both FM and IP failed, throw the first error so the user sees it
if (fmResult.status === 'rejected' && ipResult.status === 'rejected')
throw fmResult.reason;
// degrade gracefully if any failed
const fmResponse = fmResult.status === 'fulfilled' ? fmResult.value : { modelSummaries: [] };
const ipResponse = ipResult.status === 'fulfilled' ? ipResult.value : { inferenceProfileSummaries: [] };
const mantleResponse = mantleIdsResult.status === 'fulfilled' ? mantleIdsResult.value : { data: [] };
_wire?.logResponse(fmResponse);
_wire?.logResponse(ipResponse);
_wire?.logResponse(mantleResponse);
return {
foundationModels: BedrockWire_API_Models_List.FoundationModelsResponse_schema.parse(fmResponse),
inferenceProfiles: BedrockWire_API_Models_List.InferenceProfilesResponse_schema.parse(ipResponse),
mantleModelIds: BedrockWire_API_Models_List.MantleModelsResponse_schema.parse(mantleResponse),
};
},
convertToDescriptions: ({ foundationModels, inferenceProfiles, mantleModelIds }) =>
bedrockModelsToDescriptions(foundationModels, inferenceProfiles, mantleModelIds),
});
}
case 'gemini': {
return createListModelsDispatch({
fetchModels: async () => {
const { headers, url } = geminiAccess(access, null, GeminiWire_API_Models_List.getPath, false);
_wire?.logRequest('GET', url, headers);
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Gemini', signal });
_wire?.logResponse(wireModels);
const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models;
// [DEV] check for stale/unknown model definitions
geminiValidateParserOutput_DEV(wireModels, detailedModels);
geminiValidateModelDefs_DEV(detailedModels);
return detailedModels;
},
convertToDescriptions: (detailedModels) => {
// NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro),
// as the List API already has all the info on all the models
// first filter from the original list
const filteredModels = detailedModels.filter(geminiFilterModels);
// map to our output schema
const models = filteredModels
.map(geminiModelToModelDescription)
.filter(model => !!model)
.sort(geminiSortModels);
return geminiModelsAddVariants(models);
},
});
}
case 'ollama': {
return createListModelsDispatch({
fetchModels: async () => {
const { headers, url } = ollamaAccess(access, '/api/tags');
_wire?.logRequest('GET', url, headers);
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama', signal });
_wire?.logResponse(wireModels);
const models = wireOllamaListModelsSchema.parse(wireModels).models;
// retrieve info for each of the models
return await Promise.all(models.map(async (model) => {
// perform /api/show on each model to get detailed info
const { headers, url } = ollamaAccess(access, '/api/show');
const wireModelInfo = await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body: { 'name': model.name }, name: 'Ollama', signal });
const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo);
return { ...model, ...modelInfo };
}));
},
convertToDescriptions: (detailedModels) => {
return detailedModels.map((model) => {
// the model name is in the format "name:tag" (default tag = 'latest')
const [modelName, modelTag] = model.name.split(':');
// pretty label and description
const label = _capitalize(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : '');
const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {};
let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody
// prepend the parameters count and quantization level
if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) {
let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : '';
if (model.details.quantization_level)
firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')');
if (model.size)
firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`;
if (baseModel.hasTools)
firstLine += ' [tools]';
if (baseModel.hasVision)
firstLine += ' [vision]';
description = firstLine + '\n\n' + description;
}
/* Find the context window from the 'num_ctx' line in the parameters string, if present
* - https://github.com/enricoros/big-AGI/issues/309
* - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096
* - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future
*/
let contextWindow = baseModel.contextWindow || 8192;
if (model.parameters) {
// split the parameters into lines, and find one called "num_ctx ...spaces... number"
const paramsNumCtx = model.parameters.split('\n').find((line) => line.startsWith('num_ctx '));
if (paramsNumCtx) {
const numCtxValue: string = paramsNumCtx.split(/\s+/)[1];
if (numCtxValue) {
const numCtxNumber: number = parseInt(numCtxValue);
if (!isNaN(numCtxNumber))
contextWindow = numCtxNumber;
}
}
}
// auto-detect interfaces from the hardcoded description (in turn parsed from the html page)
const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : [];
if (baseModel.hasTools)
interfaces.push(LLM_IF_OAI_Fn);
if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic
interfaces.push(LLM_IF_OAI_Vision);
// console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n');
return {
id: model.name,
label,
created: Date.parse(model.modified_at) ?? undefined,
updated: Date.parse(model.modified_at) ?? undefined,
description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown',
contextWindow,
...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}),
interfaces,
};
});
},
});
}
case 'perplexity':
// [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/getting-started/pricing#sonar-models-chat-completions)
return createListModelsDispatch({
fetchModels: async () => null,
convertToDescriptions: () => perplexityHardcodedModelDescriptions().reduce(perplexityInjectVariants, []),
});
case 'xai':
// [xAI]: custom models listing
return createListModelsDispatch({
fetchModels: async () => xaiFetchModelDescriptions(access),
convertToDescriptions: models => models.sort(xaiModelSort),
});
case 'lmstudio':
// [LM Studio]: custom models listing with native API
return createListModelsDispatch({
fetchModels: async () => lmStudioFetchModels(access),
convertToDescriptions: (response) => lmStudioModelsToModelDescriptions(response.models),
});
case 'zai':
// [Z.ai]: curated models as primary source; list API is unreliable/abandoned.
// Optimistically try the API for 0-day model discovery, but never fail on it.
return createListModelsDispatch({
fetchModels: async (): Promise<string[]> => {
try {
const { headers, url } = openAIAccess(access, null, OPENAI_API_PATHS.models);
_wire?.logRequest('GET', url, headers);
const wireModels = await fetchJsonOrTRPCThrow<OpenAIWire_API_Models_List.Response>({ url, headers, name: 'OpenAI/Zai', signal });
_wire?.logResponse(wireModels);
return (wireModels?.data || []).map((m: { id: string }) => m.id);
} catch (error) {
// API is unreliable - log and continue with curated list only
console.warn('[Z.ai] Models list API failed, using curated models only:', (error as Error)?.message || error);
return [];
}
},
convertToDescriptions: (apiModelIds) => {
const curated = zaiCuratedModelDescriptions();
const discovered = zaiDiscoverModels(apiModelIds);
return [...curated, ...discovered].sort(zaiModelSort);
},
});
case 'alibaba':
case 'azure':
case 'deepseek':
case 'groq':
case 'localai':
case 'mistral':
case 'moonshot':
case 'openai':
case 'openpipe':
case 'openrouter':
case 'togetherai':
return createListModelsDispatch({
// [OpenAI-compatible dialects]: openAI-style fetch models list
fetchModels: async () => {
const { headers, url } = openAIAccess(access, null, OPENAI_API_PATHS.models);
_wire?.logRequest('GET', url, headers);
const wireModels = await fetchJsonOrTRPCThrow<OpenAIWire_API_Models_List.Response>({ url, headers, name: `OpenAI/${_capitalize(dialect)}`, signal });
_wire?.logResponse(wireModels);
return wireModels;
},
// OpenAI models conversions: dependent on the dialect
convertToDescriptions: (openAIWireModelsResponse) => {
// [Together] missing the .data property - so we have to do this early
if (dialect === 'togetherai')
return togetherAIModelsToModelDescriptions(openAIWireModelsResponse);
// [TLUS-style API] detect by structure: { data: [{ id, tier, capabilities, ... }] }
if (tlusApiHeuristic(openAIWireModelsResponse)) {
const tlusModels = tlusApiTryParse(openAIWireModelsResponse);
if (tlusModels) return tlusModels;
// fall through if failed
}
// NOTE: we don't zod here as it would strip unknown properties needed for some dialects - so we proceed optimistically
// let maybeModels = OpenAIWire_API_Models_List.Response_schema.parse(openAIWireModelsResponse).data || [];
let maybeModels = openAIWireModelsResponse?.data || [];
// de-duplicate by ids (can happen for local servers.. upstream bugs)
const preCount = maybeModels.length;
maybeModels = maybeModels.filter((model, index) => maybeModels.findIndex(m => m.id === model.id) === index);
if (preCount !== maybeModels.length && dialect !== 'mistral' /* [Mistral, 2025-11-17] Mistral has 2 duplicate models */)
console.warn(`openai.router.listModels: removed ${preCount - maybeModels.length} duplicate models for dialect ${dialect}`);
// sort by id
maybeModels.sort((a, b) => a.id.localeCompare(b.id));
// every dialect has a different way to enumerate models - we execute the mapping on the server side
switch (dialect) {
case 'alibaba':
return maybeModels
.filter(({ id }) => alibabaModelFilter(id))
.map(({ id, created }) => alibabaModelToModelDescription(id, created))
.sort(alibabaModelSort);
case 'azure':
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(maybeModels);
return azureOpenAIDeployments
.filter(azureDeploymentFilter)
.map(azureDeploymentToModelDescription)
.sort(openAISortModels);
case 'deepseek':
return maybeModels
.filter(({ id }) => deepseekModelFilter(id))
.map(({ id }) => deepseekModelToModelDescription(id))
// .reduce(deepseekInjectVariants, [] as ModelDescriptionSchema[]) // was used to inject V3.2-Speciale
.sort(deepseekModelSort);
case 'groq':
// [DEV] check for stale/unknown model definitions
groqValidateModelDefs_DEV(maybeModels.map(m => m.id));
return maybeModels
.filter(groqModelFilter)
.map(groqModelToModelDescription)
.sort(groqModelSortFn);
case 'localai':
return maybeModels
.map(({ id }) => localAIModelToModelDescription(id))
.sort(localAIModelSortFn);
case 'mistral':
return mistralModels(maybeModels);
case 'moonshot':
return maybeModels
.filter(moonshotModelFilter)
.map(moonshotModelToModelDescription)
.sort(moonshotModelSortFn);
case 'openai':
// [ChutesAI] special case for model enumeration
const oaiHost = access.oaiHost;
if (chutesAIHeuristic(oaiHost))
return chutesAIModelsToModelDescriptions(maybeModels);
// [FireworksAI] special case for model enumeration
if (fireworksAIHeuristic(oaiHost))
return fireworksAIModelsToModelDescriptions(maybeModels);
// [Novita] special case for model enumeration
if (novitaHeuristic(oaiHost))
return novitaModelsToModelDescriptions(openAIWireModelsResponse);
// [LLM API] OpenAI-compatible gateway with rich model metadata
if (llmapiHeuristic(oaiHost))
return llmapiModelsToModelDescriptions(openAIWireModelsResponse);
// [FastChat] make the best of the little info
if (fastAPIHeuristic(maybeModels))
return fastAPIModels(maybeModels);
// [OpenAI or OpenAI-compatible]: chat-only models, custom sort, manual mapping
const isNotOpenai = !!(oaiHost && !llmsHostnameMatches(oaiHost, 'api.openai.com')); // empty host (uses default) or explicitly api.openai.com
const models = maybeModels
// limit to only 'gpt' and 'non instruct' models
.filter(openAIModelFilter)
// to model description
.map((model: any): ModelDescriptionSchema => openAIModelToModelDescription(model.id, { isNotOpenai, modelCreated: model.created }))
// inject variants
.reduce(openAIInjectVariants, [])
// custom OpenAI sort
.sort(openAISortModels);
// [DEV] check for stale/unknown model definitions
openaiValidateModelDefs_DEV(maybeModels, models);
return models;
case 'openpipe':
return [
...maybeModels.map(openPipeModelToModelDescriptions),
...openPipeModelDescriptions().sort(openPipeModelSort),
];
case 'openrouter':
// openRouterStatTokenizers(maybeModels);
return maybeModels
.sort(openRouterModelFamilySortFn)
.map(openRouterModelToModelDescription)
.filter(desc => !!desc)
.reduce(openRouterInjectVariants, []);
default:
const _exhaustiveCheck: never = dialect;
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unhandled dialect: ${dialect}` });
}
},
});
default:
const _exhaustiveCheck: never = dialect;
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unsupported dialect: ${dialect}` });
}
}