import { TRPCError } from '@trpc/server'; import type { AixAPI_Access } from '~/modules/aix/server/api/aix.wiretypes'; import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import { createDebugWireLogger } from '~/server/wire'; import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; import type { ModelDescriptionSchema } from './llm.server.types'; import { llmDevValidateParameterSpecs_DEV, llmsAutoImplyInterfaces } from './models.mappings'; // protocol: Anthropic import { anthropicInjectVariants, anthropicValidateModelDefs_DEV, AnthropicWire_API_Models_List, hardcodedAnthropicModels, llmsAntCreatePlaceholderModel } from './anthropic/anthropic.models'; import { ANTHROPIC_API_PATHS, anthropicAccess } from './anthropic/anthropic.access'; // protocol: Bedrock import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane, bedrockURLMantle } from './bedrock/bedrock.access'; import { bedrockModelsToDescriptions, BedrockWire_API_Models_List } from './bedrock/bedrock.models'; // protocol: Gemini import { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes'; import { geminiAccess } from './gemini/gemini.access'; import { geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels, geminiValidateModelDefs_DEV, geminiValidateParserOutput_DEV } from './gemini/gemini.models'; // protocol: Ollama import { OLLAMA_BASE_MODELS } from './ollama/ollama.models'; import { ollamaAccess } from './ollama/ollama.access'; import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama/ollama.wiretypes'; // protocol: OpenAI-compatible import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes'; import { llmsHostnameMatches, OPENAI_API_PATHS, openAIAccess } from './openai/openai.access'; import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './openai/models/alibaba.models'; import { arceeAIHeuristic, arceeAIModelsToModelDescriptions } from './openai/models/arceeai.models'; import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './openai/models/azure.models'; import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './openai/models/chutesai.models'; import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './openai/models/deepseek.models'; import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models'; import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models'; import { groqModelFilter, groqModelSortFn, groqModelToModelDescription, groqValidateModelDefs_DEV } from './openai/models/groq.models'; import { minimaxHardcodedModelDescriptions, minimaxHeuristic } from './openai/models/minimax.models'; import { llmapiHeuristic, llmapiModelsToModelDescriptions } from './openai/models/llmapi.models'; import { novitaHeuristic, novitaModelsToModelDescriptions } from './openai/models/novita.models'; import { lmStudioFetchModels, lmStudioModelsToModelDescriptions } from './openai/models/lmstudio.models'; import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models'; import { mistralModels } from './openai/models/mistral.models'; import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models'; import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './openai/models/openpipe.models'; import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './openai/models/openrouter.models'; import { openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels, openaiValidateModelDefs_DEV } from './openai/models/openai.models'; import { perplexityHardcodedModelDescriptions, perplexityInjectVariants } from './openai/models/perplexity.models'; import { tlusApiHeuristic, tlusApiTryParse } from './openai/models/tlusapi.models'; import { togetherAIModelsToModelDescriptions } from './openai/models/together.models'; import { xaiFetchModelDescriptions, xaiModelSort } from './openai/models/xai.models'; import { zaiCuratedModelDescriptions, zaiDiscoverModels, zaiModelSort } from './openai/models/zai.models'; // -- Dispatch types -- export type ListModelsDispatch = { fetchModels: () => Promise; convertToDescriptions: (wireModels: TWireModels) => ModelDescriptionSchema[]; }; /** * Helper to create a dispatch with proper type inference. * TypeScript will infer TWireModels from fetchModels return type and enforce it in convertToDescriptions. */ function createListModelsDispatch(dispatch: ListModelsDispatch): ListModelsDispatch { return dispatch; } // -- Specialized Implementations -- Core of Server-side LLM Model Listing abstraction -- export async function listModelsRunDispatch(access: AixAPI_Access, signal?: AbortSignal): Promise { const dispatch = _listModelsCreateDispatch(access, signal); const wireModels = await dispatch.fetchModels(); const models = dispatch.convertToDescriptions(wireModels) .map(llmsAutoImplyInterfaces); // auto-inject implied IFs from parameterSpecs // DEV: validate parameterSpecs (enumValues ⊆ registry values, paramId existence) if (process.env.NODE_ENV === 'development') models.forEach(llmDevValidateParameterSpecs_DEV); return models; } // stub to reduce dependencies - either server/client or both function _capitalize(s: string): string { return s?.length ? (s.charAt(0).toUpperCase() + s.slice(1)) : s; } /** * Specializes to the correct vendor a request for listing models. * This follows the same pattern as AIX's chatGenerate dispatcher for consistency. */ function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal): ListModelsDispatch { // create the debug logger (if enabled) const _wire = createDebugWireLogger('LLMs'); // dialect is the only common property const { dialect } = access; switch (dialect) { case 'anthropic': { return createListModelsDispatch({ fetchModels: async () => { const { headers, url } = anthropicAccess(access, `${ANTHROPIC_API_PATHS.models}?limit=1000`, {/* ... no options for list ... */ }); _wire?.logRequest('GET', url, headers); const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal }); _wire?.logResponse(wireModels); return AnthropicWire_API_Models_List.Response_schema.parse(wireModels); }, convertToDescriptions: (wireModelsResponse) => { const { data: availableModels } = wireModelsResponse; // [DEV] check for stale/unknown model definitions anthropicValidateModelDefs_DEV(availableModels); // sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out const familyPrecedence = ['-4-7-', '-4-6', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-']; const classPrecedence = ['-opus-', '-sonnet-', '-haiku-']; const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f)); const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c)); // cast the models to the common schema return availableModels .sort((a, b) => { const familyA = getFamilyIdx(a.id); const familyB = getFamilyIdx(b.id); const classA = getClassIdx(a.id); const classB = getClassIdx(b.id); // family desc (lower index = better, -1 = unknown goes last) if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB); // class desc if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB); // date desc (newer first) - string comparison works since format is YYYYMMDD return b.id.localeCompare(a.id); }) .map((model): ModelDescriptionSchema => { // match model definition const knownModel = hardcodedAnthropicModels.find(m => m.id === model.id); if (knownModel) { // update model creation time, if provided if (!knownModel.created && model.created_at) knownModel.created = Math.round(new Date(model.created_at).getTime() / 1000); return knownModel; } // 0-day, new model: create an approximate model definition (placeholder) with sensible defaultss return llmsAntCreatePlaceholderModel(model); }) // inject thinking variants using the centralized variant system .reduce(anthropicInjectVariants, []); }, }); } case 'bedrock': { return createListModelsDispatch({ fetchModels: async () => { // construct URLs by region const region = bedrockResolveRegion(access); const fmUrl = bedrockURLControlPlane(region, '/foundation-models?byInferenceType=ON_DEMAND'); const ipUrl = bedrockURLControlPlane(region, '/inference-profiles?typeEquals=SYSTEM_DEFINED&maxResults=1000'); const mantleUrl = bedrockURLMantle(region, '/v1/models'); // sign and fetch all lists in parallel - each fails independently const [fmResult, ipResult, mantleIdsResult] = await Promise.allSettled([ // Foundation Models bedrockAccessAsync(access, 'GET', fmUrl, undefined) .then(fmAccess => fetchJsonOrTRPCThrow({ ...fmAccess, signal, name: 'Bedrock/FM' })), // Inference Profiles bedrockAccessAsync(access, 'GET', ipUrl, undefined) .then(ipAccess => fetchJsonOrTRPCThrow({ ...ipAccess, signal, name: 'Bedrock/IP' })), // Mantle Models bedrockAccessAsync(access, 'GET', mantleUrl, undefined) .then(mantleAccess => fetchJsonOrTRPCThrow({ ...mantleAccess, signal, name: 'Bedrock/Mantle' })), ]); // if both FM and IP failed, throw the first error so the user sees it if (fmResult.status === 'rejected' && ipResult.status === 'rejected') throw fmResult.reason; // degrade gracefully if any failed const fmResponse = fmResult.status === 'fulfilled' ? fmResult.value : { modelSummaries: [] }; const ipResponse = ipResult.status === 'fulfilled' ? ipResult.value : { inferenceProfileSummaries: [] }; const mantleResponse = mantleIdsResult.status === 'fulfilled' ? mantleIdsResult.value : { data: [] }; _wire?.logResponse(fmResponse); _wire?.logResponse(ipResponse); _wire?.logResponse(mantleResponse); return { foundationModels: BedrockWire_API_Models_List.FoundationModelsResponse_schema.parse(fmResponse), inferenceProfiles: BedrockWire_API_Models_List.InferenceProfilesResponse_schema.parse(ipResponse), mantleModelIds: BedrockWire_API_Models_List.MantleModelsResponse_schema.parse(mantleResponse), }; }, convertToDescriptions: ({ foundationModels, inferenceProfiles, mantleModelIds }) => bedrockModelsToDescriptions(foundationModels, inferenceProfiles, mantleModelIds), }); } case 'gemini': { return createListModelsDispatch({ fetchModels: async () => { const { headers, url } = geminiAccess(access, null, GeminiWire_API_Models_List.getPath, false); _wire?.logRequest('GET', url, headers); const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Gemini', signal }); _wire?.logResponse(wireModels); const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models; // [DEV] check for stale/unknown model definitions geminiValidateParserOutput_DEV(wireModels, detailedModels); geminiValidateModelDefs_DEV(detailedModels); return detailedModels; }, convertToDescriptions: (detailedModels) => { // NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro), // as the List API already has all the info on all the models // first filter from the original list const filteredModels = detailedModels.filter(geminiFilterModels); // map to our output schema const models = filteredModels .map(geminiModelToModelDescription) .filter(model => !!model) .sort(geminiSortModels); return geminiModelsAddVariants(models); }, }); } case 'ollama': { return createListModelsDispatch({ fetchModels: async () => { const { headers, url } = ollamaAccess(access, '/api/tags'); _wire?.logRequest('GET', url, headers); const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama', signal }); _wire?.logResponse(wireModels); const models = wireOllamaListModelsSchema.parse(wireModels).models; // retrieve info for each of the models (don't fail all if a single /api/show fails) const results = await Promise.allSettled(models.map(async (model) => { const { headers, url } = ollamaAccess(access, '/api/show'); const wireModelInfo = await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body: { 'name': model.name }, name: 'Ollama', signal }); const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo); return { ...model, ...modelInfo }; })); return results.map((result, i) => result.status === 'fulfilled' ? result.value : { ...models[i], details: null }, ); }, convertToDescriptions: (detailedModels) => { return detailedModels.map((model) => { // the model name is in the format "name:tag" (default tag = 'latest') const [modelName, modelTag] = model.name.split(':'); // pretty label and description const label = _capitalize(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : ''); const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {}; let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody // prepend the parameters count and quantization level if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) { let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : ''; if (model.details.quantization_level) firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')'); if (model.size) firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`; if (baseModel.hasTools) firstLine += ' [tools]'; if (baseModel.hasVision) firstLine += ' [vision]'; description = firstLine + '\n\n' + description; } /* Find the context window from the 'num_ctx' line in the parameters string, if present * - https://github.com/enricoros/big-AGI/issues/309 * - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096 * - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future */ let contextWindow = baseModel.contextWindow || 8192; if (model.parameters) { // split the parameters into lines, and find one called "num_ctx ...spaces... number" const paramsNumCtx = model.parameters.split('\n').find((line) => line.startsWith('num_ctx ')); if (paramsNumCtx) { const numCtxValue: string = paramsNumCtx.split(/\s+/)[1]; if (numCtxValue) { const numCtxNumber: number = parseInt(numCtxValue); if (!isNaN(numCtxNumber)) contextWindow = numCtxNumber; } } } // auto-detect interfaces from the hardcoded description (in turn parsed from the html page) const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : []; if (baseModel.hasTools) interfaces.push(LLM_IF_OAI_Fn); if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic interfaces.push(LLM_IF_OAI_Vision); // console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n'); return { id: model.name, label, created: Date.parse(model.modified_at) ?? undefined, updated: Date.parse(model.modified_at) ?? undefined, description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown', contextWindow, ...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}), interfaces, }; }); }, }); } case 'perplexity': // [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/getting-started/pricing#sonar-models-chat-completions) return createListModelsDispatch({ fetchModels: async () => null, convertToDescriptions: () => perplexityHardcodedModelDescriptions().reduce(perplexityInjectVariants, []), }); case 'xai': // [xAI]: custom models listing return createListModelsDispatch({ fetchModels: async () => xaiFetchModelDescriptions(access), convertToDescriptions: models => models.sort(xaiModelSort), }); case 'lmstudio': // [LM Studio]: custom models listing with native API return createListModelsDispatch({ fetchModels: async () => lmStudioFetchModels(access), convertToDescriptions: (response) => lmStudioModelsToModelDescriptions(response.models), }); case 'zai': // [Z.ai]: curated models as primary source; list API is unreliable/abandoned. // Optimistically try the API for 0-day model discovery, but never fail on it. return createListModelsDispatch({ fetchModels: async (): Promise => { try { const { headers, url } = openAIAccess(access, null, OPENAI_API_PATHS.models); _wire?.logRequest('GET', url, headers); const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'OpenAI/Zai', signal }); _wire?.logResponse(wireModels); return (wireModels?.data || []).map((m: { id: string }) => m.id); } catch (error) { // API is unreliable - log and continue with curated list only console.warn('[Z.ai] Models list API failed, using curated models only:', (error as Error)?.message || error); return []; } }, convertToDescriptions: (apiModelIds) => { const curated = zaiCuratedModelDescriptions(); const discovered = zaiDiscoverModels(apiModelIds); return [...curated, ...discovered].sort(zaiModelSort); }, }); case 'alibaba': case 'azure': case 'deepseek': case 'groq': case 'localai': case 'mistral': case 'moonshot': case 'openai': case 'openpipe': case 'openrouter': case 'togetherai': // Effective URL and headers - respects OPENAI_API_HOST server env and default hosts const { headers: oaiHeaders, url: oaiUrl } = openAIAccess(access, null, OPENAI_API_PATHS.models); return createListModelsDispatch({ // [OpenAI-compatible dialects]: openAI-style fetch models list fetchModels: async () => { // Bypass fetch for providers that do NOT have the /v1/models API yet - works in conjunction with the hardcoded models below const bypassFetch = (dialect === 'openai' && minimaxHeuristic(oaiUrl)); // [MiniMax] if (bypassFetch) return { data: [] }; // dummy response _wire?.logRequest('GET', oaiUrl, oaiHeaders); const wireModels = await fetchJsonOrTRPCThrow({ url: oaiUrl, headers: oaiHeaders, name: `OpenAI/${_capitalize(dialect)}`, signal, }); _wire?.logResponse(wireModels); return wireModels; }, // OpenAI models conversions: dependent on the dialect convertToDescriptions: (openAIWireModelsResponse) => { // [Together] missing the .data property - so we have to do this early if (dialect === 'togetherai') return togetherAIModelsToModelDescriptions(openAIWireModelsResponse); // [TLUS-style API] detect by structure: { data: [{ id, tier, capabilities, ... }] } if (tlusApiHeuristic(openAIWireModelsResponse)) { const tlusModels = tlusApiTryParse(openAIWireModelsResponse); if (tlusModels) return tlusModels; // fall through if failed } // NOTE: we don't zod here as it would strip unknown properties needed for some dialects - so we proceed optimistically // let maybeModels = OpenAIWire_API_Models_List.Response_schema.parse(openAIWireModelsResponse).data || []; let maybeModels = openAIWireModelsResponse?.data || []; // de-duplicate by ids (can happen for local servers.. upstream bugs) const preCount = maybeModels.length; maybeModels = maybeModels.filter((model, index) => maybeModels.findIndex(m => m.id === model.id) === index); if (preCount !== maybeModels.length && dialect !== 'mistral' /* [Mistral, 2025-11-17] Mistral has 2 duplicate models */) console.warn(`openai.router.listModels: removed ${preCount - maybeModels.length} duplicate models for dialect ${dialect}`); // sort by id maybeModels.sort((a, b) => a.id.localeCompare(b.id)); // every dialect has a different way to enumerate models - we execute the mapping on the server side switch (dialect) { case 'alibaba': return maybeModels .filter(({ id }) => alibabaModelFilter(id)) .map(({ id, created }) => alibabaModelToModelDescription(id, created)) .sort(alibabaModelSort); case 'azure': const azureOpenAIDeployments = azureParseFromDeploymentsAPI(maybeModels); return azureOpenAIDeployments .filter(azureDeploymentFilter) .map(azureDeploymentToModelDescription) .sort(openAISortModels); case 'deepseek': return maybeModels .filter(({ id }) => deepseekModelFilter(id)) .map(({ id }) => deepseekModelToModelDescription(id)) // .reduce(deepseekInjectVariants, [] as ModelDescriptionSchema[]) // was used to inject V3.2-Speciale .sort(deepseekModelSort); case 'groq': // [DEV] check for stale/unknown model definitions groqValidateModelDefs_DEV(maybeModels.map(m => m.id)); return maybeModels .filter(groqModelFilter) .map(groqModelToModelDescription) .sort(groqModelSortFn); case 'localai': return maybeModels .map(({ id }) => localAIModelToModelDescription(id)) .sort(localAIModelSortFn); case 'mistral': return mistralModels(maybeModels); case 'moonshot': return maybeModels .filter(moonshotModelFilter) .map(moonshotModelToModelDescription) .sort(moonshotModelSortFn); case 'openai': // [Arcee AI] special case for model enumeration if (arceeAIHeuristic(oaiUrl)) return arceeAIModelsToModelDescriptions(openAIWireModelsResponse); // [ChutesAI] special case for model enumeration if (chutesAIHeuristic(oaiUrl)) return chutesAIModelsToModelDescriptions(maybeModels); // [FireworksAI] special case for model enumeration if (fireworksAIHeuristic(oaiUrl)) return fireworksAIModelsToModelDescriptions(maybeModels); // [MiniMax] hardcoded models (no /v1/models API yet) if (minimaxHeuristic(oaiUrl)) return minimaxHardcodedModelDescriptions(); // [Novita] special case for model enumeration if (novitaHeuristic(oaiUrl)) return novitaModelsToModelDescriptions(openAIWireModelsResponse); // [LLM API] OpenAI-compatible gateway with rich model metadata if (llmapiHeuristic(oaiUrl)) return llmapiModelsToModelDescriptions(openAIWireModelsResponse); // [FastChat] make the best of the little info if (fastAPIHeuristic(maybeModels)) return fastAPIModels(maybeModels); // [OpenAI or OpenAI-compatible]: chat-only models, custom sort, manual mapping const oaiClientHost = access.oaiHost; const isNotOpenai = !!(oaiClientHost && !llmsHostnameMatches(oaiClientHost, 'api.openai.com')); // empty host (uses default) or explicitly api.openai.com const models = maybeModels // limit to only 'gpt' and 'non instruct' models .filter(openAIModelFilter) // to model description .map((model: any): ModelDescriptionSchema => openAIModelToModelDescription(model.id, { isNotOpenai, modelCreated: model.created })) // inject variants .reduce(openAIInjectVariants, []) // custom OpenAI sort .sort(openAISortModels); // [DEV] check for stale/unknown model definitions openaiValidateModelDefs_DEV(maybeModels, models); return models; case 'openpipe': return [ ...maybeModels.map(openPipeModelToModelDescriptions), ...openPipeModelDescriptions().sort(openPipeModelSort), ]; case 'openrouter': // openRouterStatTokenizers(maybeModels); return maybeModels .sort(openRouterModelFamilySortFn) .map(openRouterModelToModelDescription) .filter(desc => !!desc) .reduce(openRouterInjectVariants, []); default: const _exhaustiveCheck: never = dialect; throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unhandled dialect: ${dialect}` }); } }, }); default: const _exhaustiveCheck: never = dialect; throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unsupported dialect: ${dialect}` }); } }