LLMs: Central Dispatch

This commit is contained in:
Enrico Ros
2025-11-17 03:29:40 -08:00
parent ef0ff55f1f
commit 3fa3bb5d03
43 changed files with 580 additions and 461 deletions
@@ -4,7 +4,7 @@ description: Update Alibaba model definitions with latest pricing and capabiliti
Update `src/modules/llms/server/openai/models/alibaba.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Models & Pricing: https://www.alibabacloud.com/help/en/model-studio/models
@@ -4,7 +4,7 @@ description: Update Anthropic model definitions with latest pricing and capabili
Update `src/modules/llms/server/anthropic/anthropic.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Models: https://docs.claude.com/en/docs/about-claude/models/overview
@@ -4,7 +4,7 @@ description: Update DeepSeek model definitions with latest pricing and capabilit
Update `src/modules/llms/server/openai/models/deepseek.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Pricing: https://api-docs.deepseek.com/quick_start/pricing
@@ -4,7 +4,7 @@ description: Update Gemini model definitions with latest pricing and capabilitie
Update `src/modules/llms/server/gemini/gemini.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.types.ts`, `src/modules/llms/server/llm.server.types.ts`, and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.types.ts`, `src/modules/llms/server/llm.server.types.ts`, and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Models: https://ai.google.dev/gemini-api/docs/models
+1 -1
View File
@@ -4,7 +4,7 @@ description: Update Groq model definitions with latest pricing and capabilities
Update `src/modules/llms/server/openai/models/groq.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Models: https://console.groq.com/docs/models
+1 -1
View File
@@ -4,7 +4,7 @@ description: Update Kimi model definitions with latest pricing and capabilities
Update `src/modules/llms/server/openai/models/moonshot.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Pricing: https://platform.moonshot.ai/docs/pricing/chat
@@ -4,7 +4,7 @@ description: Update Mistral model definitions with latest pricing and capabiliti
Update `src/modules/llms/server/openai/models/mistral.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Models: https://docs.mistral.ai/getting-started/models/models_overview/
@@ -4,7 +4,7 @@ description: Update Ollama model definitions with latest featured models
Update `src/modules/llms/server/ollama/ollama.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Automated Workflow:**
```bash
@@ -4,7 +4,7 @@ description: Update OpenAI model definitions with latest pricing and capabilitie
Update `src/modules/llms/server/openai/models/openai.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Manual hint:** For pricing page, expand all tables before copying content.
@@ -4,7 +4,7 @@ description: Update OpenPipe model definitions with latest pricing and capabilit
Update `src/modules/llms/server/openai/models/openpipe.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Base Models: https://docs.openpipe.ai/base-models
@@ -4,7 +4,7 @@ description: Update Perplexity model definitions with latest pricing and capabil
Update `src/modules/llms/server/openai/models/perplexity.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Models: https://docs.perplexity.ai/getting-started/models
+1 -1
View File
@@ -4,7 +4,7 @@ description: Update xAI model definitions with latest pricing and capabilities
Update `src/modules/llms/server/openai/models/xai.models.ts` with latest model definitions.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
**Primary Sources:**
- Models & Pricing: https://docs.x.ai/docs/models?cluster=us-east-1#detailed-pricing-for-all-grok-models
+1 -1
View File
@@ -54,7 +54,7 @@ If the running LocalAI instance is configured with a [Model Gallery](https://loc
At the time of writing, LocalAI does not publish the model `context window size`.
Every model is assumed to be capable of chatting, and with a context window of 4096 tokens.
Please update the [src/modules/llms/transports/server/openai/models/models.data.ts](../src/modules/llms/server/openai/models/models.data.ts)
Please update the [src/modules/llms/server/models.mappings.ts](../src/modules/llms/server/models.mappings.ts)
file with the mapping information between LocalAI model IDs and names/descriptions/tokens, etc.
# 🤝 Support
@@ -1,8 +1,15 @@
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import * as z from 'zod/v4';
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
import { Release } from '~/common/app.release';
import type { ModelDescriptionSchema } from '../llm.server.types';
// configuration
export const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild;
const ANT_PAR_WEB: ModelDescriptionSchema['parameterSpecs'] = [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
@@ -239,3 +246,78 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
// retired: 'claude-2.1'
// retired: 'claude-2.0'
];
// -- Wire Types --
/**
* Namespace for the Anthropic API Models List response schema.
* NOTE: not merged into AIX because of possible circular dependency issues - future work.
*/
export namespace AnthropicWire_API_Models_List {
export type ModelObject = z.infer<typeof ModelObject_schema>;
const ModelObject_schema = z.object({
type: z.literal('model'),
id: z.string(),
display_name: z.string(),
created_at: z.string(),
});
export const Response_schema = z.object({
data: z.array(ModelObject_schema),
has_more: z.boolean(),
first_id: z.string().nullable(),
last_id: z.string().nullable(),
});
}
// -- Helper Functions --
/**
* DEV: Checks for obsoleted models that are defined in hardcodedAnthropicModels but no longer present in the API.
* Similar to Gemini's geminiDevCheckForSuperfluousModels_DEV.
*/
export function llmsAntDevCheckForObsoletedModels_DEV(availableModels: AnthropicWire_API_Models_List.ModelObject[]): void {
if (DEV_DEBUG_ANTHROPIC_MODELS) {
const apiModelIds = new Set(availableModels.map(m => m.id));
const obsoletedModels = hardcodedAnthropicModels.filter(m => !apiModelIds.has(m.id));
if (obsoletedModels.length > 0)
console.log(`[DEV] Anthropic: obsoleted model definitions: [ ${obsoletedModels.map(m => m.id).join(', ')} ]`);
}
}
/**
* Create a placeholder ModelDescriptionSchema for Anthropic models not in the hardcoded list.
* Uses sensible defaults with the newest available interfaces for day-0 support.
*/
export function llmsAntCreatePlaceholderModel(model: AnthropicWire_API_Models_List.ModelObject): ModelDescriptionSchema {
return {
id: model.id,
label: model.display_name,
created: Math.round(new Date(model.created_at).getTime() / 1000),
description: 'Newest model, description not available yet.',
contextWindow: 200000,
maxCompletionTokens: 8192,
trainingDataCutoff: 'Latest',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
// chatPrice: ...
// benchmark: ...
};
}
/**
* Injects the LLM_IF_Tools_WebSearch interface for models that have web search/fetch parameters.
* This allows the UI to show the web search indicator automatically based on model capabilities.
*/
export function llmsAntInjectWebSearchInterface(model: ModelDescriptionSchema): ModelDescriptionSchema {
const hasWebParams = model.parameterSpecs?.some(spec =>
spec.paramId === 'llmVndAntWebSearch' || spec.paramId === 'llmVndAntWebFetch',
);
return (hasWebParams && !model.interfaces?.includes(LLM_IF_Tools_WebSearch)) ? {
...model,
interfaces: [...model.interfaces, LLM_IF_Tools_WebSearch],
} : model;
}
@@ -5,21 +5,15 @@ import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
import { env } from '~/server/env';
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
import { Release } from '~/common/app.release';
import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types';
import { hardcodedAnthropicModels, hardcodedAnthropicVariants } from './anthropic.models';
import { fixupHost } from '~/modules/llms/server/openai/openai.router';
import { ListModelsResponse_schema } from '../llm.server.types';
import { fixupHost } from '../openai/openai.router';
import { listModelsRunDispatch } from '../listModels.dispatch';
// configuration and defaults
const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com';
const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild;
const DEFAULT_ANTHROPIC_HEADERS = {
// Latest version hasn't changed (as of Feb 2025)
'anthropic-version': '2023-06-01',
@@ -165,10 +159,6 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string,
};
}
function roundTime(date: string) {
return Math.round(new Date(date).getTime() / 1000);
}
// Input Schemas
@@ -185,23 +175,6 @@ const listModelsInputSchema = z.object({
});
// Helpers
/**
* Injects the LLM_IF_Tools_WebSearch interface for models that have web search/fetch parameters.
* This allows the UI to show the web search indicator automatically based on model capabilities.
*/
function _injectWebSearchInterface(model: ModelDescriptionSchema): ModelDescriptionSchema {
const hasWebParams = model.parameterSpecs?.some(spec =>
spec.paramId === 'llmVndAntWebSearch' || spec.paramId === 'llmVndAntWebFetch'
);
return (hasWebParams && !model.interfaces?.includes(LLM_IF_Tools_WebSearch)) ? {
...model,
interfaces: [...model.interfaces, LLM_IF_Tools_WebSearch],
} : model;
}
// Router
export const llmAnthropicRouter = createTRPCRouter({
@@ -210,81 +183,9 @@ export const llmAnthropicRouter = createTRPCRouter({
listModels: publicProcedure
.input(listModelsInputSchema)
.output(ListModelsResponse_schema)
.query(async ({ input: { access } }) => {
.query(async ({ input: { access }, signal }) => {
// get the models
const wireModels = await anthropicGETOrThrow(access, '/v1/models?limit=1000');
const { data: availableModels } = AnthropicWire_API_Models_List.Response_schema.parse(wireModels);
// sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out
const familyPrecedence = ['-4-7-', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f));
const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c));
// cast the models to the common schema
const models = availableModels
.sort((a, b) => {
const familyA = getFamilyIdx(a.id);
const familyB = getFamilyIdx(b.id);
const classA = getClassIdx(a.id);
const classB = getClassIdx(b.id);
// family desc (lower index = better, -1 = unknown goes last)
if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB);
// class desc
if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
// date desc (newer first) - string comparison works since format is YYYYMMDD
return b.id.localeCompare(a.id);
})
.reduce((acc, model) => {
// find the model description
const hardcodedModel = hardcodedAnthropicModels.find(m => m.id === model.id);
if (hardcodedModel) {
// update creation date
if (!hardcodedModel.created && model.created_at)
hardcodedModel.created = roundTime(model.created_at);
// add FIRST a thinking variant, if defined
if (hardcodedAnthropicVariants[model.id])
acc.push({
...hardcodedModel,
...hardcodedAnthropicVariants[model.id],
});
// add the base model
acc.push(hardcodedModel);
} else {
// for day-0 support of new models, create a placeholder model using sensible defaults
const novelModel = _createPlaceholderModel(model);
// if (DEV_DEBUG_ANTHROPIC_MODELS) // kind of important...
console.log('[DEV] anthropic.router: new model found, please configure it:', novelModel.id);
acc.push(novelModel);
}
return acc;
}, [] as ModelDescriptionSchema[])
.map(_injectWebSearchInterface);
// developers warning for obsoleted models (we have them, but they are not in the API response anymore)
if (DEV_DEBUG_ANTHROPIC_MODELS) {
const apiModelIds = new Set(availableModels.map(m => m.id));
const additionalModels = hardcodedAnthropicModels.filter(m => !apiModelIds.has(m.id));
if (additionalModels.length > 0)
console.log('[DEV] anthropic.router: obsoleted models:', additionalModels.map(m => m.id).join(', '));
}
// additionalModels.forEach(m => {
// m.label += ' (Removed)';
// m.isLegacy = true;
// });
// models.push(...additionalModels);
const models = await listModelsRunDispatch(access, signal);
return { models };
}),
@@ -328,47 +229,3 @@ export const llmAnthropicRouter = createTRPCRouter({
}),
});
/**
* Create a placeholder ModelDescriptionSchema for models not in the hardcoded list,
* using sensible defaults with the newest available interfaces.
*/
function _createPlaceholderModel(model: AnthropicWire_API_Models_List.ModelObject): ModelDescriptionSchema {
return {
id: model.id,
label: model.display_name,
created: Math.round(new Date(model.created_at).getTime() / 1000),
description: 'Newest model, description not available yet.',
contextWindow: 200000,
maxCompletionTokens: 8192,
trainingDataCutoff: 'Latest',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
// chatPrice: ...
// benchmark: ...
};
}
/**
* Namespace for the Anthropic API Models List response schema.
* NOTE: not merged into AIX because of possible circular dependency issues - future work.
*/
namespace AnthropicWire_API_Models_List {
export type ModelObject = z.infer<typeof ModelObject_schema>;
const ModelObject_schema = z.object({
type: z.literal('model'),
id: z.string(),
display_name: z.string(),
created_at: z.string(),
});
export type Response = z.infer<typeof Response_schema>;
export const Response_schema = z.object({
data: z.array(ModelObject_schema),
has_more: z.boolean(),
first_id: z.string().nullable(),
last_id: z.string().nullable(),
});
}
@@ -7,11 +7,11 @@ import packageJson from '../../../../../package.json';
import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
import { GeminiWire_API_Models_List, GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
import { GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
import { ListModelsResponse_schema } from '../llm.server.types';
import { geminiDevCheckForParserMisses_DEV, geminiDevCheckForSuperfluousModels_DEV, geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels } from './gemini.models';
import { fixupHost } from '~/modules/llms/server/openai/openai.router';
import { fixupHost } from '../openai/openai.router';
import { listModelsRunDispatch } from '../listModels.dispatch';
// Default hosts
@@ -93,29 +93,11 @@ export const llmGeminiRouter = createTRPCRouter({
listModels: publicProcedure
.input(accessOnlySchema)
.output(ListModelsResponse_schema)
.query(async ({ input }) => {
.query(async ({ input, signal }) => {
// get the models
const wireModels = await geminiGET(input.access, null, GeminiWire_API_Models_List.getPath, false);
const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models;
geminiDevCheckForParserMisses_DEV(wireModels, detailedModels);
geminiDevCheckForSuperfluousModels_DEV(detailedModels.map(model => model.name));
const models = await listModelsRunDispatch(input.access, signal);
// NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro).,
// as the List API already all the info on all the models
// first filter from the original list
const filteredModels = detailedModels.filter(geminiFilterModels);
// map to our output schema
const models = filteredModels
.map(geminiModelToModelDescription)
.filter(model => !!model)
.sort(geminiSortModels);
return {
models: geminiModelsAddVariants(models),
};
return { models };
}),
});
@@ -0,0 +1,425 @@
import { TRPCError } from '@trpc/server';
import type { AixAPI_Access } from '~/modules/aix/server/api/aix.wiretypes';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
import { serverCapitalizeFirstLetter } from '~/server/wire';
import type { ModelDescriptionSchema } from './llm.server.types';
// protocol: Anthropic
import { AnthropicWire_API_Models_List, hardcodedAnthropicModels, hardcodedAnthropicVariants, llmsAntCreatePlaceholderModel, llmsAntDevCheckForObsoletedModels_DEV, llmsAntInjectWebSearchInterface } from './anthropic/anthropic.models';
import { anthropicAccess } from './anthropic/anthropic.router';
// protocol: Gemini
import { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
import { geminiAccess } from './gemini/gemini.router';
import { geminiDevCheckForParserMisses_DEV, geminiDevCheckForSuperfluousModels_DEV, geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels } from './gemini/gemini.models';
// protocol: Ollama
import { OLLAMA_BASE_MODELS } from './ollama/ollama.models';
import { ollamaAccess } from './ollama/ollama.router';
import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama/ollama.wiretypes';
// protocol: OpenAI-compatible
import { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './openai/models/alibaba.models';
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './openai/models/azure.models';
import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './openai/models/chutesai.models';
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './openai/models/deepseek.models';
import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models';
import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models';
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './openai/models/groq.models';
import { lmStudioModelToModelDescription } from './openai/models/lmstudio.models';
import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models';
import { mistralModels } from './openai/models/mistral.models';
import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models';
import { openAIAccess } from './openai/openai.router';
import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './openai/models/openpipe.models';
import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './openai/models/openrouter.models';
import { openaiDevCheckForModelsOverlap_DEV, openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './openai/models/openai.models';
import { perplexityHardcodedModelDescriptions, perplexityInjectVariants } from './openai/models/perplexity.models';
import { togetherAIModelsToModelDescriptions } from './openai/models/together.models';
import { xaiFetchModelDescriptions, xaiModelSort } from './openai/models/xai.models';
// -- Dispatch types --
export type ListModelsDispatch<TWireModels = any> = {
fetchModels: () => Promise<TWireModels>;
convertToDescriptions: (wireModels: TWireModels) => ModelDescriptionSchema[];
};
/**
* Helper to create a dispatch with proper type inference.
* TypeScript will infer TWireModels from fetchModels return type and enforce it in convertToDescriptions.
*/
function createDispatch<T>(dispatch: ListModelsDispatch<T>): ListModelsDispatch<T> {
return dispatch;
}
// -- Specialized Implementations -- Core of Server-side LLM Model Listing abstraction --
export async function listModelsRunDispatch(access: AixAPI_Access, signal?: AbortSignal) {
const dispatch = _listModelsCreateDispatch(access, signal);
const wireModels = await dispatch.fetchModels();
return dispatch.convertToDescriptions(wireModels);
}
/**
* Specializes to the correct vendor a request for listing models.
* This follows the same pattern as AIX's chatGenerate dispatcher for consistency.
*/
function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal): ListModelsDispatch {
// dialect is the only common property
const { dialect } = access;
switch (dialect) {
case 'anthropic': {
return createDispatch({
fetchModels: async () => {
const { headers, url } = anthropicAccess(access, '/v1/models?limit=1000', {/* ... no options for list ... */ });
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal });
return AnthropicWire_API_Models_List.Response_schema.parse(wireModels);
},
convertToDescriptions: (wireModelsResponse) => {
const { data: availableModels } = wireModelsResponse;
// sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out
const familyPrecedence = ['-4-7-', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f));
const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c));
// cast the models to the common schema
const models = availableModels
.sort((a, b) => {
const familyA = getFamilyIdx(a.id);
const familyB = getFamilyIdx(b.id);
const classA = getClassIdx(a.id);
const classB = getClassIdx(b.id);
// family desc (lower index = better, -1 = unknown goes last)
if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB);
// class desc
if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
// date desc (newer first) - string comparison works since format is YYYYMMDD
return b.id.localeCompare(a.id);
})
.reduce((acc: ModelDescriptionSchema[], model) => {
// find the model description
const hardcodedModel = hardcodedAnthropicModels.find(m => m.id === model.id);
if (hardcodedModel) {
// update creation date
function roundTime(date: string) {
return Math.round(new Date(date).getTime() / 1000);
}
if (!hardcodedModel.created && model.created_at)
hardcodedModel.created = roundTime(model.created_at);
// add FIRST a thinking variant, if defined
if (hardcodedAnthropicVariants[model.id])
acc.push({
...hardcodedModel,
...hardcodedAnthropicVariants[model.id],
});
// add the base model
acc.push(hardcodedModel);
} else {
// for day-0 support of new models, create a placeholder model using sensible defaults
const novelModel = llmsAntCreatePlaceholderModel(model);
// if (DEV_DEBUG_ANTHROPIC_MODELS) // kind of important...
console.log('[DEV] anthropic.router: new model found, please configure it:', novelModel.id);
acc.push(novelModel);
}
return acc;
}, [] as ModelDescriptionSchema[])
.map(llmsAntInjectWebSearchInterface);
// [DEV] check for obsoleted models (defined but no longer in API response)
llmsAntDevCheckForObsoletedModels_DEV(availableModels);
return models;
},
});
}
case 'gemini': {
return createDispatch({
fetchModels: async () => {
const { headers, url } = geminiAccess(access, null, GeminiWire_API_Models_List.getPath, false);
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Gemini', signal });
const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models;
// [DEV] check for missing or superfluous models
geminiDevCheckForParserMisses_DEV(wireModels, detailedModels);
geminiDevCheckForSuperfluousModels_DEV(detailedModels.map((model: any) => model.name));
return detailedModels;
},
convertToDescriptions: (detailedModels) => {
// NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro),
// as the List API already has all the info on all the models
// first filter from the original list
const filteredModels = detailedModels.filter(geminiFilterModels);
// map to our output schema
const models = filteredModels
.map(geminiModelToModelDescription)
.filter(model => !!model)
.sort(geminiSortModels);
return geminiModelsAddVariants(models);
},
});
}
case 'ollama': {
return createDispatch({
fetchModels: async () => {
const { headers, url } = ollamaAccess(access, '/api/tags');
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama', signal });
const models = wireOllamaListModelsSchema.parse(wireModels).models;
// retrieve info for each of the models
return await Promise.all(models.map(async (model) => {
// perform /api/show on each model to get detailed info
const { headers, url } = ollamaAccess(access, '/api/show');
const wireModelInfo = await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body: { 'name': model.name }, name: 'Ollama', signal });
const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo);
return { ...model, ...modelInfo };
}));
},
convertToDescriptions: (detailedModels) => {
return detailedModels.map((model) => {
// the model name is in the format "name:tag" (default tag = 'latest')
const [modelName, modelTag] = model.name.split(':');
// pretty label and description
const label = serverCapitalizeFirstLetter(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : '');
const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {};
let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody
// prepend the parameters count and quantization level
if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) {
let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : '';
if (model.details.quantization_level)
firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')');
if (model.size)
firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`;
if (baseModel.hasTools)
firstLine += ' [tools]';
if (baseModel.hasVision)
firstLine += ' [vision]';
description = firstLine + '\n\n' + description;
}
/* Find the context window from the 'num_ctx' line in the parameters string, if present
* - https://github.com/enricoros/big-AGI/issues/309
* - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096
* - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future
*/
let contextWindow = baseModel.contextWindow || 8192;
if (model.parameters) {
// split the parameters into lines, and find one called "num_ctx ...spaces... number"
const paramsNumCtx = model.parameters.split('\n').find((line) => line.startsWith('num_ctx '));
if (paramsNumCtx) {
const numCtxValue: string = paramsNumCtx.split(/\s+/)[1];
if (numCtxValue) {
const numCtxNumber: number = parseInt(numCtxValue);
if (!isNaN(numCtxNumber))
contextWindow = numCtxNumber;
}
}
}
// auto-detect interfaces from the hardcoded description (in turn parsed from the html page)
const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : [];
if (baseModel.hasTools)
interfaces.push(LLM_IF_OAI_Fn);
if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic
interfaces.push(LLM_IF_OAI_Vision);
// console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n');
return {
id: model.name,
label,
created: Date.parse(model.modified_at) ?? undefined,
updated: Date.parse(model.modified_at) ?? undefined,
description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown',
contextWindow,
...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}),
interfaces,
};
});
},
});
}
case 'perplexity':
// [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/getting-started/pricing#sonar-models-chat-completions)
return createDispatch({
fetchModels: async () => null,
convertToDescriptions: () => perplexityHardcodedModelDescriptions().reduce(perplexityInjectVariants, []),
});
case 'xai':
// [xAI]: custom models listing
return createDispatch({
fetchModels: async () => xaiFetchModelDescriptions(access),
convertToDescriptions: models => models.sort(xaiModelSort),
});
case 'alibaba':
case 'azure':
case 'deepseek':
case 'groq':
case 'lmstudio':
case 'localai':
case 'mistral':
case 'moonshot':
case 'openai':
case 'openpipe':
case 'openrouter':
case 'togetherai':
return createDispatch({
// [OpenAI-compatible dialects]: fetch openAI-style /v1/models API
fetchModels: async () => {
const { headers, url } = openAIAccess(access, null, '/v1/models');
return fetchJsonOrTRPCThrow<OpenAIWire_API_Models_List.Response>({ url, headers, name: `OpenAI/${serverCapitalizeFirstLetter(dialect)}`, signal });
},
// OpenAI models conversions: dependent on the dialect
convertToDescriptions: (openAIWireModelsResponse) => {
// [Together] missing the .data property - so we have to do this early
if (dialect === 'togetherai')
return togetherAIModelsToModelDescriptions(openAIWireModelsResponse);
// NOTE: we don't zod here as it would strip unknown properties needed for some dialects - so we proceed optimistically
// let maybeModels = OpenAIWire_API_Models_List.Response_schema.parse(openAIWireModelsResponse).data || [];
let maybeModels = openAIWireModelsResponse?.data || [];
// de-duplicate by ids (can happen for local servers.. upstream bugs)
const preCount = maybeModels.length;
maybeModels = maybeModels.filter((model, index) => maybeModels.findIndex(m => m.id === model.id) === index);
if (preCount !== maybeModels.length)
console.warn(`openai.router.listModels: removed ${preCount - maybeModels.length} duplicate models for dialect ${dialect}`);
// sort by id
maybeModels.sort((a, b) => a.id.localeCompare(b.id));
// every dialect has a different way to enumerate models - we execute the mapping on the server side
switch (dialect) {
case 'alibaba':
return maybeModels
.filter(({ id }) => alibabaModelFilter(id))
.map(({ id, created }) => alibabaModelToModelDescription(id, created))
.sort(alibabaModelSort);
case 'azure':
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(maybeModels);
return azureOpenAIDeployments
.filter(azureDeploymentFilter)
.map(azureDeploymentToModelDescription)
.sort(openAISortModels);
case 'deepseek':
return maybeModels
.filter(({ id }) => deepseekModelFilter(id))
.map(({ id }) => deepseekModelToModelDescription(id))
.sort(deepseekModelSort);
case 'groq':
return maybeModels
.filter(groqModelFilter)
.map(groqModelToModelDescription)
.sort(groqModelSortFn);
case 'lmstudio':
return maybeModels
.map(({ id }) => lmStudioModelToModelDescription(id));
case 'localai':
return maybeModels
.map(({ id }) => localAIModelToModelDescription(id))
.sort(localAIModelSortFn);
case 'mistral':
return mistralModels(maybeModels);
case 'moonshot':
return maybeModels
.filter(moonshotModelFilter)
.map(moonshotModelToModelDescription)
.sort(moonshotModelSortFn);
case 'openai':
// [ChutesAI] special case for model enumeration
const oaiHost = access.oaiHost;
if (chutesAIHeuristic(oaiHost))
return chutesAIModelsToModelDescriptions(maybeModels);
// [FireworksAI] special case for model enumeration
if (fireworksAIHeuristic(oaiHost))
return fireworksAIModelsToModelDescriptions(maybeModels);
// [FastChat] make the best of the little info
if (fastAPIHeuristic(maybeModels))
return fastAPIModels(maybeModels);
// [OpenAI or OpenAI-compatible]: chat-only models, custom sort, manual mapping
const models = maybeModels
// limit to only 'gpt' and 'non instruct' models
.filter(openAIModelFilter)
// to model description
.map((model: any): ModelDescriptionSchema => openAIModelToModelDescription(model.id, model.created))
// inject variants
.reduce(openAIInjectVariants, [] as ModelDescriptionSchema[])
// custom OpenAI sort
.sort(openAISortModels);
// [DEV] check for superfluous and missing models
openaiDevCheckForModelsOverlap_DEV(maybeModels, models);
return models;
case 'openpipe':
return [
...maybeModels.map(openPipeModelToModelDescriptions),
...openPipeModelDescriptions().sort(openPipeModelSort),
];
case 'openrouter':
// openRouterStatTokenizers(maybeModels);
return maybeModels
.sort(openRouterModelFamilySortFn)
.map(openRouterModelToModelDescription)
.filter(desc => !!desc)
.reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
default:
const _exhaustiveCheck: never = dialect;
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unhandled dialect: ${dialect}` });
}
},
});
default:
const _exhaustiveCheck: never = dialect;
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unsupported dialect: ${dialect}` });
}
}
@@ -1,4 +1,4 @@
import type { ModelDescriptionSchema } from '../../llm.server.types';
import type { ModelDescriptionSchema } from './llm.server.types';
// -- Manual model mappings: types and helper --
+17 -94
View File
@@ -3,23 +3,18 @@ import { TRPCError } from '@trpc/server';
import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
import { env } from '~/server/env';
import { fetchJsonOrTRPCThrow, fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { capitalizeFirstLetter } from '~/common/util/textUtils';
import { fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
import { serverCapitalizeFirstLetter } from '~/server/wire';
import { ListModelsResponse_schema } from '../llm.server.types';
import { fixupHost } from '../openai/openai.router';
import { listModelsRunDispatch } from '../listModels.dispatch';
import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama.wiretypes';
import { fixupHost } from '~/modules/llms/server/openai/openai.router';
// Default hosts
// configuration
const DEFAULT_OLLAMA_HOST = 'http://127.0.0.1:11434';
// export const OLLAMA_PATH_CHAT = '/api/chat';
const OLLAMA_PATH_TAGS = '/api/tags';
const OLLAMA_PATH_SHOW = '/api/show';
// Mappers
@@ -84,15 +79,15 @@ export function ollamaCompletionPayload(model: OpenAIModelSchema, history: OpenA
};
}*/
async function ollamaGET<TOut extends object>(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
const { headers, url } = ollamaAccess(access, apiPath);
return await fetchJsonOrTRPCThrow<TOut>({ url, headers, name: 'Ollama' });
}
// async function ollamaGET<TOut extends object>(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
// const { headers, url } = ollamaAccess(access, apiPath);
// return await fetchJsonOrTRPCThrow<TOut>({ url, headers, name: 'Ollama' });
// }
async function ollamaPOST<TOut extends object, TPostBody extends object>(access: OllamaAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
const { headers, url } = ollamaAccess(access, apiPath);
return await fetchJsonOrTRPCThrow<TOut, TPostBody>({ url, method: 'POST', headers, body, name: 'Ollama' });
}
// async function ollamaPOST<TOut extends object, TPostBody extends object>(access: OllamaAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
// const { headers, url } = ollamaAccess(access, apiPath);
// return await fetchJsonOrTRPCThrow<TOut, TPostBody>({ url, method: 'POST', headers, body, name: 'Ollama' });
// }
// Input/Output Schemas
@@ -137,7 +132,7 @@ export const llmOllamaRouter = createTRPCRouter({
return {
pullableModels: Object.entries(OLLAMA_BASE_MODELS).map(([model_id, model]) => ({
id: model_id,
label: capitalizeFirstLetter(model_id),
label: serverCapitalizeFirstLetter(model_id),
tag: 'latest',
tags: model.tags?.length ? model.tags : [],
description: '', // model.description, // REMOVED description - bloated and not used by nobody
@@ -185,83 +180,11 @@ export const llmOllamaRouter = createTRPCRouter({
listModels: publicProcedure
.input(accessOnlySchema)
.output(ListModelsResponse_schema)
.query(async ({ input }) => {
.query(async ({ input, signal }) => {
// get the models
const wireModels = await ollamaGET(input.access, OLLAMA_PATH_TAGS);
let models = wireOllamaListModelsSchema.parse(wireModels).models;
const models = await listModelsRunDispatch(input.access, signal);
// retrieve info for each of the models (/api/show, post call, in parallel)
const detailedModels = await Promise.all(models.map(async model => {
const wireModelInfo = await ollamaPOST(input.access, { 'name': model.name }, OLLAMA_PATH_SHOW);
const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo);
return { ...model, ...modelInfo };
}));
return {
models: detailedModels.map(model => {
// the model name is in the format "name:tag" (default tag = 'latest')
const [modelName, modelTag] = model.name.split(':');
// pretty label and description
const label = capitalizeFirstLetter(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : '');
const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {};
let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody
// prepend the parameters count and quantization level
if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) {
let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : '';
if (model.details.quantization_level)
firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')');
if (model.size)
firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`;
if (baseModel.hasTools)
firstLine += ' [tools]';
if (baseModel.hasVision)
firstLine += ' [vision]';
description = firstLine + '\n\n' + description;
}
/* Find the context window from the 'num_ctx' line in the parameters string, if present
* - https://github.com/enricoros/big-AGI/issues/309
* - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096
* - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future
*/
let contextWindow = baseModel.contextWindow || 8192;
if (model.parameters) {
// split the parameters into lines, and find one called "num_ctx ...spaces... number"
const paramsNumCtx = model.parameters.split('\n').find(line => line.startsWith('num_ctx '));
if (paramsNumCtx) {
const numCtxValue: string = paramsNumCtx.split(/\s+/)[1];
if (numCtxValue) {
const numCtxNumber: number = parseInt(numCtxValue);
if (!isNaN(numCtxNumber))
contextWindow = numCtxNumber;
}
}
}
// auto-detect interfaces from the hardcoded description (in turn parsed from the html page)
const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : [];
if (baseModel.hasTools)
interfaces.push(LLM_IF_OAI_Fn);
if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic
interfaces.push(LLM_IF_OAI_Vision);
// console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n');
return {
id: model.name,
label,
created: Date.parse(model.modified_at) ?? undefined,
updated: Date.parse(model.modified_at) ?? undefined,
description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown',
contextWindow,
...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}),
interfaces,
};
}),
};
return { models };
}),
});
@@ -2,7 +2,7 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stor
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
// - Models & Pricing: https://www.alibabacloud.com/help/en/model-studio/models
// - Billing Guide: https://www.alibabacloud.com/help/en/model-studio/billing-for-model-studio
@@ -11,7 +11,7 @@ import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.serv
import type { OpenAIAccessSchema } from '../openai.router';
import { fixupHost } from '../openai.router';
import { fromManualMapping, ManualMappings } from './models.data';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
import { _fallbackOpenAIModel, _knownOpenAIChatModels } from './openai.models';
@@ -6,7 +6,7 @@ import { serverCapitalizeFirstLetter } from '~/server/wire';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
export function chutesAIHeuristic(hostname: string) {
@@ -2,7 +2,7 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning }
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
const _knownDeepseekChatModels: ManualMappings = [
@@ -3,7 +3,7 @@ import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/w
import { DModelInterfaceV1, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
const _fastAPIKnownModels: ManualMappings = [
@@ -4,8 +4,8 @@ import { serverCapitalizeFirstLetter } from '~/server/wire';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
import { wireFireworksAIListOutputSchema } from '../fireworksai.wiretypes';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
import { wireFireworksAIListOutputSchema } from '../wiretypes/fireworksai.wiretypes';
export function fireworksAIHeuristic(hostname: string) {
@@ -1,8 +1,8 @@
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
import { wireGroqModelsListOutputSchema } from '../groq.wiretypes';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
import { wireGroqModelsListOutputSchema } from '../wiretypes/groq.wiretypes';
/**
@@ -1,7 +1,7 @@
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping } from './models.data';
import { fromManualMapping } from '../../models.mappings';
export function lmStudioModelToModelDescription(modelId: string): ModelDescriptionSchema {
@@ -1,8 +1,9 @@
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { capitalizeFirstLetter } from '~/common/util/textUtils';
import { serverCapitalizeFirstLetter } from '~/server/wire';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, type ManualMappings } from './models.data';
import { fromManualMapping, type ManualMappings } from '../../models.mappings';
// [LocalAI]
@@ -37,7 +38,7 @@ export function localAIModelToModelDescription(modelId: string): ModelDescriptio
.replace(' Q4_K_M', ' (Q4_K_M)')
.replace(' F16', ' (F16)')
.split(' ')
.map(capitalizeFirstLetter)
.map(serverCapitalizeFirstLetter)
.join(' ');
const description = `LocalAI model. File: ${modelId}`;
@@ -3,7 +3,7 @@ import * as z from 'zod/v4';
import { LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
const IF_K2 = [
@@ -4,7 +4,7 @@ import { DModelInterfaceV1, LLM_IF_HOTFIX_NoTemperature, LLM_IF_HOTFIX_StripImag
import { Release } from '~/common/app.release';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, KnownModel, ManualMappings } from './models.data';
import { fromManualMapping, KnownModel, ManualMappings } from '../../models.mappings';
// OpenAI Model Variants
@@ -1,8 +1,8 @@
import type { ModelDescriptionSchema } from '~/modules/llms/server/llm.server.types';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { _knownOpenAIChatModels } from '~/modules/llms/server/openai/models/openai.models';
import { wireOpenPipeModelOutputSchema } from '~/modules/llms/server/openai/openpipe.wiretypes';
import { fromManualMapping, KnownModel } from '~/modules/llms/server/openai/models/models.data';
import { wireOpenPipeModelOutputSchema } from '~/modules/llms/server/openai/wiretypes/openpipe.wiretypes';
import { fromManualMapping, KnownModel } from '~/modules/llms/server/models.mappings';
const _knownOpenPipeChatModels: ModelDescriptionSchema[] = [
@@ -3,8 +3,8 @@ import * as z from 'zod/v4';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping } from './models.data';
import { wireOpenrouterModelsListOutputSchema } from '../openrouter.wiretypes';
import { fromManualMapping } from '../../models.mappings';
import { wireOpenrouterModelsListOutputSchema } from '../wiretypes/openrouter.wiretypes';
// configuration
@@ -144,7 +144,7 @@ export function perplexityInjectVariants(models: ModelDescriptionSchema[], model
return models;
}
export function perplexityAIModelDescriptions() {
export function perplexityHardcodedModelDescriptions() {
// Returns the list of known Perplexity models
return _knownPerplexityChatModels;
}
@@ -1,8 +1,8 @@
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
import { wireTogetherAIListOutputSchema } from '../togetherai.wiretypes';
import { fromManualMapping, ManualMappings } from '../../models.mappings';
import { wireTogetherAIListOutputSchema } from '../wiretypes/togetherai.wiretypes';
// Note: 2025-01-28 - we used to have harcoded models here, but now we have a dynamic
@@ -5,7 +5,7 @@ import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, KnownModel, ManualMappings } from './models.data';
import { fromManualMapping, KnownModel, ManualMappings } from '../../models.mappings';
import { openAIAccess, OpenAIAccessSchema } from '../openai.router';
@@ -179,7 +179,7 @@ const _knownXAIChatModels: ManualMappings = [
// xAI Model Descriptions
export async function xaiModelDescriptions(access: OpenAIAccessSchema): Promise<ModelDescriptionSchema[]> {
export async function xaiFetchModelDescriptions(access: OpenAIAccessSchema): Promise<ModelDescriptionSchema[]> {
// List models
const { headers, url } = openAIAccess(access, null, '/v1/language-models');
+5 -156
View File
@@ -7,31 +7,15 @@ import { fetchJsonOrTRPCThrow, TRPCFetcherError } from '~/server/trpc/trpc.route
import { serverCapitalizeFirstLetter } from '~/server/wire';
import type { T2ICreateImageAsyncStreamOp } from '~/modules/t2i/t2i.server';
import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
import { heartbeatsWhileAwaiting } from '~/modules/aix/server/dispatch/heartbeatsWhileAwaiting';
import { Brand } from '~/common/app.config';
import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types';
import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models';
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureOpenAIAccess, azureParseFromDeploymentsAPI } from './models/azure.models';
import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models';
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models';
import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models';
import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './models/fireworksai.models';
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './models/groq.models';
import { lmStudioModelToModelDescription } from './models/lmstudio.models';
import { localAIModelSortFn, localAIModelToModelDescription } from './models/localai.models';
import { mistralModels } from './models/mistral.models';
import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './models/moonshot.models';
import { openaiDevCheckForModelsOverlap_DEV, openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './models/openpipe.models';
import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
import { perplexityAIModelDescriptions, perplexityInjectVariants } from './models/perplexity.models';
import { togetherAIModelsToModelDescriptions } from './models/together.models';
import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
import { xaiModelDescriptions, xaiModelSort } from './models/xai.models';
import { azureOpenAIAccess } from './models/azure.models';
import { listModelsRunDispatch } from '../listModels.dispatch';
import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './wiretypes/localai.wiretypes';
const openAIDialects = z.enum([
@@ -181,142 +165,7 @@ export const llmOpenAIRouter = createTRPCRouter({
.query(async ({ input: { access }, signal }): Promise<{ models: ModelDescriptionSchema[] }> => {
let models: ModelDescriptionSchema[];
// [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/guides/model-cards)
if (access.dialect === 'perplexity') {
models = perplexityAIModelDescriptions()
.reduce(perplexityInjectVariants, [] as ModelDescriptionSchema[]);
return { models };
}
// [xAI]: custom models listing
if (access.dialect === 'xai')
return { models: (await xaiModelDescriptions(access)).sort(xaiModelSort) };
// [OpenAI-dialects]: fetch openAI-style for all but Azure (will be then used in each dialect)
const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire_API_Models_List.Response>(access, '/v1/models', signal);
// [Together] missing the .data property
if (access.dialect === 'togetherai')
return { models: togetherAIModelsToModelDescriptions(openAIWireModelsResponse) };
let openAIModels = openAIWireModelsResponse?.data || [];
// de-duplicate by ids (can happen for local servers.. upstream bugs)
const preCount = openAIModels.length;
openAIModels = openAIModels.filter((model, index) => openAIModels.findIndex(m => m.id === model.id) === index);
if (preCount !== openAIModels.length)
console.warn(`openai.router.listModels: removed ${preCount - openAIModels.length} duplicate models for dialect ${access.dialect}`);
// sort by id
openAIModels.sort((a, b) => a.id.localeCompare(b.id));
// every dialect has a different way to enumerate models - we execute the mapping on the server side
switch (access.dialect) {
case 'alibaba':
models = openAIModels
.filter(({ id }) => alibabaModelFilter(id))
.map(({ id, created }) => alibabaModelToModelDescription(id, created))
.sort(alibabaModelSort);
break;
case 'azure':
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(openAIModels);
models = azureOpenAIDeployments
.filter(azureDeploymentFilter)
.map(azureDeploymentToModelDescription)
.sort(openAISortModels);
break;
case 'deepseek':
models = openAIModels
.filter(({ id }) => deepseekModelFilter(id))
.map(({ id }) => deepseekModelToModelDescription(id))
.sort(deepseekModelSort);
break;
case 'groq':
models = openAIModels
.filter(groqModelFilter)
.map(groqModelToModelDescription)
.sort(groqModelSortFn);
break;
case 'lmstudio':
models = openAIModels
.map(({ id }) => lmStudioModelToModelDescription(id));
break;
// [LocalAI]: map id to label
case 'localai':
models = openAIModels
.map(({ id }) => localAIModelToModelDescription(id))
.sort(localAIModelSortFn);
break;
case 'mistral':
models = mistralModels(openAIModels);
break;
case 'moonshot':
models = openAIModels
.filter(moonshotModelFilter)
.map(moonshotModelToModelDescription)
.sort(moonshotModelSortFn);
break;
// [OpenAI]: chat-only models, custom sort, manual mapping
case 'openai':
// [ChutesAI] special case for model enumeration
if (chutesAIHeuristic(access.oaiHost))
return { models: chutesAIModelsToModelDescriptions(openAIModels) };
// [FireworksAI] special case for model enumeration
if (fireworksAIHeuristic(access.oaiHost))
return { models: fireworksAIModelsToModelDescriptions(openAIModels) };
// [FastChat] make the best of the little info
if (fastAPIHeuristic(openAIModels))
return { models: fastAPIModels(openAIModels) };
models = openAIModels
// limit to only 'gpt' and 'non instruct' models
.filter(openAIModelFilter)
// to model description
.map((model): ModelDescriptionSchema => openAIModelToModelDescription(model.id, model.created))
// inject variants
.reduce(openAIInjectVariants, [] as ModelDescriptionSchema[])
// custom OpenAI sort
.sort(openAISortModels);
// [DEV] check for superfluous and missing models
openaiDevCheckForModelsOverlap_DEV(openAIWireModelsResponse, models);
break;
case 'openpipe':
models = [
...openAIModels.map(openPipeModelToModelDescriptions),
...openPipeModelDescriptions().sort(openPipeModelSort),
];
break;
case 'openrouter':
// openRouterStatTokenizers(openAIModels);
models = openAIModels
.sort(openRouterModelFamilySortFn)
.map(openRouterModelToModelDescription)
.filter(desc => !!desc)
.reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
break;
}
const models = await listModelsRunDispatch(access, signal);
return { models };
}),