mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 14:10:15 -07:00
536 lines
26 KiB
TypeScript
536 lines
26 KiB
TypeScript
import { TRPCError } from '@trpc/server';
|
|
|
|
import type { AixAPI_Access } from '~/modules/aix/server/api/aix.wiretypes';
|
|
|
|
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
|
|
|
import { createDebugWireLogger } from '~/server/wire';
|
|
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
|
|
|
import type { ModelDescriptionSchema } from './llm.server.types';
|
|
import { llmDevValidateParameterSpecs_DEV, llmsAutoImplyInterfaces } from './models.mappings';
|
|
|
|
|
|
// protocol: Anthropic
|
|
import { anthropicInjectVariants, anthropicValidateModelDefs_DEV, AnthropicWire_API_Models_List, hardcodedAnthropicModels, llmsAntCreatePlaceholderModel } from './anthropic/anthropic.models';
|
|
import { ANTHROPIC_API_PATHS, anthropicAccess } from './anthropic/anthropic.access';
|
|
|
|
// protocol: Bedrock
|
|
import { bedrockAccessAsync, bedrockResolveRegion, bedrockURLControlPlane, bedrockURLMantle } from './bedrock/bedrock.access';
|
|
import { bedrockModelsToDescriptions, BedrockWire_API_Models_List } from './bedrock/bedrock.models';
|
|
|
|
// protocol: Gemini
|
|
import { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
|
|
import { geminiAccess } from './gemini/gemini.access';
|
|
import { geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels, geminiValidateModelDefs_DEV, geminiValidateParserOutput_DEV } from './gemini/gemini.models';
|
|
|
|
// protocol: Ollama
|
|
import { OLLAMA_BASE_MODELS } from './ollama/ollama.models';
|
|
import { ollamaAccess } from './ollama/ollama.access';
|
|
import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama/ollama.wiretypes';
|
|
|
|
// protocol: OpenAI-compatible
|
|
import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
|
|
import { llmsHostnameMatches, OPENAI_API_PATHS, openAIAccess } from './openai/openai.access';
|
|
import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './openai/models/alibaba.models';
|
|
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './openai/models/azure.models';
|
|
import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './openai/models/chutesai.models';
|
|
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './openai/models/deepseek.models';
|
|
import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models';
|
|
import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models';
|
|
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription, groqValidateModelDefs_DEV } from './openai/models/groq.models';
|
|
import { llmapiHeuristic, llmapiModelsToModelDescriptions } from './openai/models/llmapi.models';
|
|
import { novitaHeuristic, novitaModelsToModelDescriptions } from './openai/models/novita.models';
|
|
import { lmStudioFetchModels, lmStudioModelsToModelDescriptions } from './openai/models/lmstudio.models';
|
|
import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models';
|
|
import { mistralModels } from './openai/models/mistral.models';
|
|
import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models';
|
|
import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './openai/models/openpipe.models';
|
|
import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './openai/models/openrouter.models';
|
|
import { openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels, openaiValidateModelDefs_DEV } from './openai/models/openai.models';
|
|
import { perplexityHardcodedModelDescriptions, perplexityInjectVariants } from './openai/models/perplexity.models';
|
|
import { tlusApiHeuristic, tlusApiTryParse } from './openai/models/tlusapi.models';
|
|
import { togetherAIModelsToModelDescriptions } from './openai/models/together.models';
|
|
import { xaiFetchModelDescriptions, xaiModelSort } from './openai/models/xai.models';
|
|
import { zaiCuratedModelDescriptions, zaiDiscoverModels, zaiModelSort } from './openai/models/zai.models';
|
|
|
|
|
|
// -- Dispatch types --
|
|
|
|
export type ListModelsDispatch<TWireModels = any> = {
|
|
fetchModels: () => Promise<TWireModels>;
|
|
convertToDescriptions: (wireModels: TWireModels) => ModelDescriptionSchema[];
|
|
};
|
|
|
|
/**
|
|
* Helper to create a dispatch with proper type inference.
|
|
* TypeScript will infer TWireModels from fetchModels return type and enforce it in convertToDescriptions.
|
|
*/
|
|
function createListModelsDispatch<T>(dispatch: ListModelsDispatch<T>): ListModelsDispatch<T> {
|
|
return dispatch;
|
|
}
|
|
|
|
|
|
// -- Specialized Implementations -- Core of Server-side LLM Model Listing abstraction --
|
|
|
|
export async function listModelsRunDispatch(access: AixAPI_Access, signal?: AbortSignal): Promise<ModelDescriptionSchema[]> {
|
|
const dispatch = _listModelsCreateDispatch(access, signal);
|
|
const wireModels = await dispatch.fetchModels();
|
|
const models = dispatch.convertToDescriptions(wireModels)
|
|
.map(llmsAutoImplyInterfaces); // auto-inject implied IFs from parameterSpecs
|
|
|
|
// DEV: validate parameterSpecs (enumValues ⊆ registry values, paramId existence)
|
|
if (process.env.NODE_ENV === 'development')
|
|
models.forEach(llmDevValidateParameterSpecs_DEV);
|
|
|
|
return models;
|
|
}
|
|
|
|
|
|
// stub to reduce dependencies - either server/client or both
|
|
function _capitalize(s: string): string {
|
|
return s?.length ? (s.charAt(0).toUpperCase() + s.slice(1)) : s;
|
|
}
|
|
|
|
|
|
/**
|
|
* Specializes to the correct vendor a request for listing models.
|
|
* This follows the same pattern as AIX's chatGenerate dispatcher for consistency.
|
|
*/
|
|
function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal): ListModelsDispatch {
|
|
|
|
// create the debug logger (if enabled)
|
|
const _wire = createDebugWireLogger('LLMs');
|
|
|
|
// dialect is the only common property
|
|
const { dialect } = access;
|
|
|
|
switch (dialect) {
|
|
|
|
case 'anthropic': {
|
|
return createListModelsDispatch({
|
|
fetchModels: async () => {
|
|
const { headers, url } = anthropicAccess(access, `${ANTHROPIC_API_PATHS.models}?limit=1000`, {/* ... no options for list ... */ });
|
|
_wire?.logRequest('GET', url, headers);
|
|
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal });
|
|
_wire?.logResponse(wireModels);
|
|
return AnthropicWire_API_Models_List.Response_schema.parse(wireModels);
|
|
},
|
|
convertToDescriptions: (wireModelsResponse) => {
|
|
const { data: availableModels } = wireModelsResponse;
|
|
|
|
// [DEV] check for stale/unknown model definitions
|
|
anthropicValidateModelDefs_DEV(availableModels);
|
|
|
|
// sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out
|
|
const familyPrecedence = ['-4-7-', '-4-6', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
|
|
const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
|
|
|
|
const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f));
|
|
const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c));
|
|
|
|
// cast the models to the common schema
|
|
return availableModels
|
|
.sort((a, b) => {
|
|
const familyA = getFamilyIdx(a.id);
|
|
const familyB = getFamilyIdx(b.id);
|
|
const classA = getClassIdx(a.id);
|
|
const classB = getClassIdx(b.id);
|
|
|
|
// family desc (lower index = better, -1 = unknown goes last)
|
|
if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB);
|
|
// class desc
|
|
if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
|
|
// date desc (newer first) - string comparison works since format is YYYYMMDD
|
|
return b.id.localeCompare(a.id);
|
|
})
|
|
.map((model): ModelDescriptionSchema => {
|
|
// match model definition
|
|
const knownModel = hardcodedAnthropicModels.find(m => m.id === model.id);
|
|
if (knownModel) {
|
|
|
|
// update model creation time, if provided
|
|
if (!knownModel.created && model.created_at)
|
|
knownModel.created = Math.round(new Date(model.created_at).getTime() / 1000);
|
|
|
|
return knownModel;
|
|
}
|
|
|
|
// 0-day, new model: create an approximate model definition (placeholder) with sensible defaultss
|
|
return llmsAntCreatePlaceholderModel(model);
|
|
})
|
|
// inject thinking variants using the centralized variant system
|
|
.reduce(anthropicInjectVariants, []);
|
|
},
|
|
});
|
|
}
|
|
|
|
case 'bedrock': {
|
|
return createListModelsDispatch({
|
|
fetchModels: async () => {
|
|
|
|
// construct URLs by region
|
|
const region = bedrockResolveRegion(access);
|
|
const fmUrl = bedrockURLControlPlane(region, '/foundation-models?byInferenceType=ON_DEMAND');
|
|
const ipUrl = bedrockURLControlPlane(region, '/inference-profiles?typeEquals=SYSTEM_DEFINED&maxResults=1000');
|
|
const mantleUrl = bedrockURLMantle(region, '/v1/models');
|
|
|
|
// sign and fetch all lists in parallel - each fails independently
|
|
const [fmResult, ipResult, mantleIdsResult] = await Promise.allSettled([
|
|
// Foundation Models
|
|
bedrockAccessAsync(access, 'GET', fmUrl, undefined)
|
|
.then(fmAccess => fetchJsonOrTRPCThrow({ ...fmAccess, signal, name: 'Bedrock/FM' })),
|
|
// Inference Profiles
|
|
bedrockAccessAsync(access, 'GET', ipUrl, undefined)
|
|
.then(ipAccess => fetchJsonOrTRPCThrow({ ...ipAccess, signal, name: 'Bedrock/IP' })),
|
|
// Mantle Models
|
|
bedrockAccessAsync(access, 'GET', mantleUrl, undefined)
|
|
.then(mantleAccess => fetchJsonOrTRPCThrow({ ...mantleAccess, signal, name: 'Bedrock/Mantle' })),
|
|
]);
|
|
|
|
// if both FM and IP failed, throw the first error so the user sees it
|
|
if (fmResult.status === 'rejected' && ipResult.status === 'rejected')
|
|
throw fmResult.reason;
|
|
|
|
// degrade gracefully if any failed
|
|
const fmResponse = fmResult.status === 'fulfilled' ? fmResult.value : { modelSummaries: [] };
|
|
const ipResponse = ipResult.status === 'fulfilled' ? ipResult.value : { inferenceProfileSummaries: [] };
|
|
const mantleResponse = mantleIdsResult.status === 'fulfilled' ? mantleIdsResult.value : { data: [] };
|
|
|
|
_wire?.logResponse(fmResponse);
|
|
_wire?.logResponse(ipResponse);
|
|
_wire?.logResponse(mantleResponse);
|
|
|
|
return {
|
|
foundationModels: BedrockWire_API_Models_List.FoundationModelsResponse_schema.parse(fmResponse),
|
|
inferenceProfiles: BedrockWire_API_Models_List.InferenceProfilesResponse_schema.parse(ipResponse),
|
|
mantleModelIds: BedrockWire_API_Models_List.MantleModelsResponse_schema.parse(mantleResponse),
|
|
};
|
|
},
|
|
convertToDescriptions: ({ foundationModels, inferenceProfiles, mantleModelIds }) =>
|
|
bedrockModelsToDescriptions(foundationModels, inferenceProfiles, mantleModelIds),
|
|
});
|
|
}
|
|
|
|
case 'gemini': {
|
|
return createListModelsDispatch({
|
|
fetchModels: async () => {
|
|
const { headers, url } = geminiAccess(access, null, GeminiWire_API_Models_List.getPath, false);
|
|
_wire?.logRequest('GET', url, headers);
|
|
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Gemini', signal });
|
|
_wire?.logResponse(wireModels);
|
|
const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models;
|
|
|
|
// [DEV] check for stale/unknown model definitions
|
|
geminiValidateParserOutput_DEV(wireModels, detailedModels);
|
|
geminiValidateModelDefs_DEV(detailedModels);
|
|
|
|
return detailedModels;
|
|
},
|
|
convertToDescriptions: (detailedModels) => {
|
|
// NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro),
|
|
// as the List API already has all the info on all the models
|
|
|
|
// first filter from the original list
|
|
const filteredModels = detailedModels.filter(geminiFilterModels);
|
|
|
|
// map to our output schema
|
|
const models = filteredModels
|
|
.map(geminiModelToModelDescription)
|
|
.filter(model => !!model)
|
|
.sort(geminiSortModels);
|
|
return geminiModelsAddVariants(models);
|
|
},
|
|
});
|
|
}
|
|
|
|
case 'ollama': {
|
|
return createListModelsDispatch({
|
|
fetchModels: async () => {
|
|
const { headers, url } = ollamaAccess(access, '/api/tags');
|
|
_wire?.logRequest('GET', url, headers);
|
|
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama', signal });
|
|
_wire?.logResponse(wireModels);
|
|
const models = wireOllamaListModelsSchema.parse(wireModels).models;
|
|
|
|
// retrieve info for each of the models
|
|
return await Promise.all(models.map(async (model) => {
|
|
|
|
// perform /api/show on each model to get detailed info
|
|
const { headers, url } = ollamaAccess(access, '/api/show');
|
|
const wireModelInfo = await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body: { 'name': model.name }, name: 'Ollama', signal });
|
|
|
|
const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo);
|
|
return { ...model, ...modelInfo };
|
|
}));
|
|
},
|
|
convertToDescriptions: (detailedModels) => {
|
|
return detailedModels.map((model) => {
|
|
// the model name is in the format "name:tag" (default tag = 'latest')
|
|
const [modelName, modelTag] = model.name.split(':');
|
|
|
|
// pretty label and description
|
|
const label = _capitalize(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : '');
|
|
const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {};
|
|
let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody
|
|
|
|
// prepend the parameters count and quantization level
|
|
if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) {
|
|
let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : '';
|
|
if (model.details.quantization_level)
|
|
firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')');
|
|
if (model.size)
|
|
firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`;
|
|
if (baseModel.hasTools)
|
|
firstLine += ' [tools]';
|
|
if (baseModel.hasVision)
|
|
firstLine += ' [vision]';
|
|
description = firstLine + '\n\n' + description;
|
|
}
|
|
|
|
/* Find the context window from the 'num_ctx' line in the parameters string, if present
|
|
* - https://github.com/enricoros/big-AGI/issues/309
|
|
* - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096
|
|
* - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future
|
|
*/
|
|
let contextWindow = baseModel.contextWindow || 8192;
|
|
if (model.parameters) {
|
|
// split the parameters into lines, and find one called "num_ctx ...spaces... number"
|
|
const paramsNumCtx = model.parameters.split('\n').find((line) => line.startsWith('num_ctx '));
|
|
if (paramsNumCtx) {
|
|
const numCtxValue: string = paramsNumCtx.split(/\s+/)[1];
|
|
if (numCtxValue) {
|
|
const numCtxNumber: number = parseInt(numCtxValue);
|
|
if (!isNaN(numCtxNumber))
|
|
contextWindow = numCtxNumber;
|
|
}
|
|
}
|
|
}
|
|
|
|
// auto-detect interfaces from the hardcoded description (in turn parsed from the html page)
|
|
const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : [];
|
|
if (baseModel.hasTools)
|
|
interfaces.push(LLM_IF_OAI_Fn);
|
|
if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic
|
|
interfaces.push(LLM_IF_OAI_Vision);
|
|
|
|
// console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n');
|
|
|
|
return {
|
|
id: model.name,
|
|
label,
|
|
created: Date.parse(model.modified_at) ?? undefined,
|
|
updated: Date.parse(model.modified_at) ?? undefined,
|
|
description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown',
|
|
contextWindow,
|
|
...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}),
|
|
interfaces,
|
|
};
|
|
});
|
|
},
|
|
});
|
|
}
|
|
|
|
case 'perplexity':
|
|
// [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/getting-started/pricing#sonar-models-chat-completions)
|
|
return createListModelsDispatch({
|
|
fetchModels: async () => null,
|
|
convertToDescriptions: () => perplexityHardcodedModelDescriptions().reduce(perplexityInjectVariants, []),
|
|
});
|
|
|
|
case 'xai':
|
|
// [xAI]: custom models listing
|
|
return createListModelsDispatch({
|
|
fetchModels: async () => xaiFetchModelDescriptions(access),
|
|
convertToDescriptions: models => models.sort(xaiModelSort),
|
|
});
|
|
|
|
case 'lmstudio':
|
|
// [LM Studio]: custom models listing with native API
|
|
return createListModelsDispatch({
|
|
fetchModels: async () => lmStudioFetchModels(access),
|
|
convertToDescriptions: (response) => lmStudioModelsToModelDescriptions(response.models),
|
|
});
|
|
|
|
case 'zai':
|
|
// [Z.ai]: curated models as primary source; list API is unreliable/abandoned.
|
|
// Optimistically try the API for 0-day model discovery, but never fail on it.
|
|
return createListModelsDispatch({
|
|
fetchModels: async (): Promise<string[]> => {
|
|
try {
|
|
const { headers, url } = openAIAccess(access, null, OPENAI_API_PATHS.models);
|
|
_wire?.logRequest('GET', url, headers);
|
|
const wireModels = await fetchJsonOrTRPCThrow<OpenAIWire_API_Models_List.Response>({ url, headers, name: 'OpenAI/Zai', signal });
|
|
_wire?.logResponse(wireModels);
|
|
return (wireModels?.data || []).map((m: { id: string }) => m.id);
|
|
} catch (error) {
|
|
// API is unreliable - log and continue with curated list only
|
|
console.warn('[Z.ai] Models list API failed, using curated models only:', (error as Error)?.message || error);
|
|
return [];
|
|
}
|
|
},
|
|
convertToDescriptions: (apiModelIds) => {
|
|
const curated = zaiCuratedModelDescriptions();
|
|
const discovered = zaiDiscoverModels(apiModelIds);
|
|
return [...curated, ...discovered].sort(zaiModelSort);
|
|
},
|
|
});
|
|
|
|
case 'alibaba':
|
|
case 'azure':
|
|
case 'deepseek':
|
|
case 'groq':
|
|
case 'localai':
|
|
case 'mistral':
|
|
case 'moonshot':
|
|
case 'openai':
|
|
case 'openpipe':
|
|
case 'openrouter':
|
|
case 'togetherai':
|
|
return createListModelsDispatch({
|
|
|
|
// [OpenAI-compatible dialects]: openAI-style fetch models list
|
|
fetchModels: async () => {
|
|
const { headers, url } = openAIAccess(access, null, OPENAI_API_PATHS.models);
|
|
_wire?.logRequest('GET', url, headers);
|
|
const wireModels = await fetchJsonOrTRPCThrow<OpenAIWire_API_Models_List.Response>({ url, headers, name: `OpenAI/${_capitalize(dialect)}`, signal });
|
|
_wire?.logResponse(wireModels);
|
|
return wireModels;
|
|
},
|
|
|
|
// OpenAI models conversions: dependent on the dialect
|
|
convertToDescriptions: (openAIWireModelsResponse) => {
|
|
|
|
// [Together] missing the .data property - so we have to do this early
|
|
if (dialect === 'togetherai')
|
|
return togetherAIModelsToModelDescriptions(openAIWireModelsResponse);
|
|
|
|
// [TLUS-style API] detect by structure: { data: [{ id, tier, capabilities, ... }] }
|
|
if (tlusApiHeuristic(openAIWireModelsResponse)) {
|
|
const tlusModels = tlusApiTryParse(openAIWireModelsResponse);
|
|
if (tlusModels) return tlusModels;
|
|
// fall through if failed
|
|
}
|
|
|
|
// NOTE: we don't zod here as it would strip unknown properties needed for some dialects - so we proceed optimistically
|
|
// let maybeModels = OpenAIWire_API_Models_List.Response_schema.parse(openAIWireModelsResponse).data || [];
|
|
let maybeModels = openAIWireModelsResponse?.data || [];
|
|
|
|
// de-duplicate by ids (can happen for local servers.. upstream bugs)
|
|
const preCount = maybeModels.length;
|
|
maybeModels = maybeModels.filter((model, index) => maybeModels.findIndex(m => m.id === model.id) === index);
|
|
if (preCount !== maybeModels.length && dialect !== 'mistral' /* [Mistral, 2025-11-17] Mistral has 2 duplicate models */)
|
|
console.warn(`openai.router.listModels: removed ${preCount - maybeModels.length} duplicate models for dialect ${dialect}`);
|
|
|
|
// sort by id
|
|
maybeModels.sort((a, b) => a.id.localeCompare(b.id));
|
|
|
|
// every dialect has a different way to enumerate models - we execute the mapping on the server side
|
|
switch (dialect) {
|
|
case 'alibaba':
|
|
return maybeModels
|
|
.filter(({ id }) => alibabaModelFilter(id))
|
|
.map(({ id, created }) => alibabaModelToModelDescription(id, created))
|
|
.sort(alibabaModelSort);
|
|
|
|
case 'azure':
|
|
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(maybeModels);
|
|
return azureOpenAIDeployments
|
|
.filter(azureDeploymentFilter)
|
|
.map(azureDeploymentToModelDescription)
|
|
.sort(openAISortModels);
|
|
|
|
case 'deepseek':
|
|
return maybeModels
|
|
.filter(({ id }) => deepseekModelFilter(id))
|
|
.map(({ id }) => deepseekModelToModelDescription(id))
|
|
// .reduce(deepseekInjectVariants, [] as ModelDescriptionSchema[]) // was used to inject V3.2-Speciale
|
|
.sort(deepseekModelSort);
|
|
|
|
case 'groq':
|
|
// [DEV] check for stale/unknown model definitions
|
|
groqValidateModelDefs_DEV(maybeModels.map(m => m.id));
|
|
return maybeModels
|
|
.filter(groqModelFilter)
|
|
.map(groqModelToModelDescription)
|
|
.sort(groqModelSortFn);
|
|
|
|
case 'localai':
|
|
return maybeModels
|
|
.map(({ id }) => localAIModelToModelDescription(id))
|
|
.sort(localAIModelSortFn);
|
|
|
|
case 'mistral':
|
|
return mistralModels(maybeModels);
|
|
|
|
case 'moonshot':
|
|
return maybeModels
|
|
.filter(moonshotModelFilter)
|
|
.map(moonshotModelToModelDescription)
|
|
.sort(moonshotModelSortFn);
|
|
|
|
case 'openai':
|
|
// [ChutesAI] special case for model enumeration
|
|
const oaiHost = access.oaiHost;
|
|
if (chutesAIHeuristic(oaiHost))
|
|
return chutesAIModelsToModelDescriptions(maybeModels);
|
|
|
|
// [FireworksAI] special case for model enumeration
|
|
if (fireworksAIHeuristic(oaiHost))
|
|
return fireworksAIModelsToModelDescriptions(maybeModels);
|
|
|
|
// [Novita] special case for model enumeration
|
|
if (novitaHeuristic(oaiHost))
|
|
return novitaModelsToModelDescriptions(openAIWireModelsResponse);
|
|
|
|
// [LLM API] OpenAI-compatible gateway with rich model metadata
|
|
if (llmapiHeuristic(oaiHost))
|
|
return llmapiModelsToModelDescriptions(openAIWireModelsResponse);
|
|
|
|
// [FastChat] make the best of the little info
|
|
if (fastAPIHeuristic(maybeModels))
|
|
return fastAPIModels(maybeModels);
|
|
|
|
// [OpenAI or OpenAI-compatible]: chat-only models, custom sort, manual mapping
|
|
const isNotOpenai = !!(oaiHost && !llmsHostnameMatches(oaiHost, 'api.openai.com')); // empty host (uses default) or explicitly api.openai.com
|
|
const models = maybeModels
|
|
// limit to only 'gpt' and 'non instruct' models
|
|
.filter(openAIModelFilter)
|
|
// to model description
|
|
.map((model: any): ModelDescriptionSchema => openAIModelToModelDescription(model.id, { isNotOpenai, modelCreated: model.created }))
|
|
// inject variants
|
|
.reduce(openAIInjectVariants, [])
|
|
// custom OpenAI sort
|
|
.sort(openAISortModels);
|
|
|
|
// [DEV] check for stale/unknown model definitions
|
|
openaiValidateModelDefs_DEV(maybeModels, models);
|
|
return models;
|
|
|
|
case 'openpipe':
|
|
return [
|
|
...maybeModels.map(openPipeModelToModelDescriptions),
|
|
...openPipeModelDescriptions().sort(openPipeModelSort),
|
|
];
|
|
|
|
case 'openrouter':
|
|
// openRouterStatTokenizers(maybeModels);
|
|
return maybeModels
|
|
.sort(openRouterModelFamilySortFn)
|
|
.map(openRouterModelToModelDescription)
|
|
.filter(desc => !!desc)
|
|
.reduce(openRouterInjectVariants, []);
|
|
|
|
default:
|
|
const _exhaustiveCheck: never = dialect;
|
|
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unhandled dialect: ${dialect}` });
|
|
}
|
|
},
|
|
});
|
|
|
|
default:
|
|
const _exhaustiveCheck: never = dialect;
|
|
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unsupported dialect: ${dialect}` });
|
|
}
|
|
}
|