diff --git a/.claude/commands/llms/update-models-alibaba.md b/.claude/commands/llms/update-models-alibaba.md index 3f7566b46..33265ef59 100644 --- a/.claude/commands/llms/update-models-alibaba.md +++ b/.claude/commands/llms/update-models-alibaba.md @@ -4,7 +4,7 @@ description: Update Alibaba model definitions with latest pricing and capabiliti Update `src/modules/llms/server/openai/models/alibaba.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Models & Pricing: https://www.alibabacloud.com/help/en/model-studio/models diff --git a/.claude/commands/llms/update-models-anthropic.md b/.claude/commands/llms/update-models-anthropic.md index d8db182c7..7e4b59c31 100644 --- a/.claude/commands/llms/update-models-anthropic.md +++ b/.claude/commands/llms/update-models-anthropic.md @@ -4,7 +4,7 @@ description: Update Anthropic model definitions with latest pricing and capabili Update `src/modules/llms/server/anthropic/anthropic.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Models: https://docs.claude.com/en/docs/about-claude/models/overview diff --git a/.claude/commands/llms/update-models-deepseek.md b/.claude/commands/llms/update-models-deepseek.md index 0d02ae3b7..1f965a544 100644 --- a/.claude/commands/llms/update-models-deepseek.md +++ b/.claude/commands/llms/update-models-deepseek.md @@ -4,7 +4,7 @@ description: Update DeepSeek model definitions with latest pricing and capabilit Update `src/modules/llms/server/openai/models/deepseek.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Pricing: https://api-docs.deepseek.com/quick_start/pricing diff --git a/.claude/commands/llms/update-models-gemini.md b/.claude/commands/llms/update-models-gemini.md index c160ffeae..5a5f2198a 100644 --- a/.claude/commands/llms/update-models-gemini.md +++ b/.claude/commands/llms/update-models-gemini.md @@ -4,7 +4,7 @@ description: Update Gemini model definitions with latest pricing and capabilitie Update `src/modules/llms/server/gemini/gemini.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.types.ts`, `src/modules/llms/server/llm.server.types.ts`, and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.types.ts`, `src/modules/llms/server/llm.server.types.ts`, and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Models: https://ai.google.dev/gemini-api/docs/models diff --git a/.claude/commands/llms/update-models-groq.md b/.claude/commands/llms/update-models-groq.md index 12a117dd4..25d873a0f 100644 --- a/.claude/commands/llms/update-models-groq.md +++ b/.claude/commands/llms/update-models-groq.md @@ -4,7 +4,7 @@ description: Update Groq model definitions with latest pricing and capabilities Update `src/modules/llms/server/openai/models/groq.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Models: https://console.groq.com/docs/models diff --git a/.claude/commands/llms/update-models-kimi.md b/.claude/commands/llms/update-models-kimi.md index 62ff92b83..0fb827297 100644 --- a/.claude/commands/llms/update-models-kimi.md +++ b/.claude/commands/llms/update-models-kimi.md @@ -4,7 +4,7 @@ description: Update Kimi model definitions with latest pricing and capabilities Update `src/modules/llms/server/openai/models/moonshot.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Pricing: https://platform.moonshot.ai/docs/pricing/chat diff --git a/.claude/commands/llms/update-models-mistral.md b/.claude/commands/llms/update-models-mistral.md index 210d6d0a1..0c2f63bb6 100644 --- a/.claude/commands/llms/update-models-mistral.md +++ b/.claude/commands/llms/update-models-mistral.md @@ -4,7 +4,7 @@ description: Update Mistral model definitions with latest pricing and capabiliti Update `src/modules/llms/server/openai/models/mistral.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Models: https://docs.mistral.ai/getting-started/models/models_overview/ diff --git a/.claude/commands/llms/update-models-ollama.md b/.claude/commands/llms/update-models-ollama.md index d4019f89e..fb1986da9 100644 --- a/.claude/commands/llms/update-models-ollama.md +++ b/.claude/commands/llms/update-models-ollama.md @@ -4,7 +4,7 @@ description: Update Ollama model definitions with latest featured models Update `src/modules/llms/server/ollama/ollama.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Automated Workflow:** ```bash diff --git a/.claude/commands/llms/update-models-openai.md b/.claude/commands/llms/update-models-openai.md index c0963dd35..2e2ff85c6 100644 --- a/.claude/commands/llms/update-models-openai.md +++ b/.claude/commands/llms/update-models-openai.md @@ -4,7 +4,7 @@ description: Update OpenAI model definitions with latest pricing and capabilitie Update `src/modules/llms/server/openai/models/openai.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Manual hint:** For pricing page, expand all tables before copying content. diff --git a/.claude/commands/llms/update-models-openpipe.md b/.claude/commands/llms/update-models-openpipe.md index 61f5478c1..5c4974ee8 100644 --- a/.claude/commands/llms/update-models-openpipe.md +++ b/.claude/commands/llms/update-models-openpipe.md @@ -4,7 +4,7 @@ description: Update OpenPipe model definitions with latest pricing and capabilit Update `src/modules/llms/server/openai/models/openpipe.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Base Models: https://docs.openpipe.ai/base-models diff --git a/.claude/commands/llms/update-models-perplexity.md b/.claude/commands/llms/update-models-perplexity.md index 61e53b54c..34deda12b 100644 --- a/.claude/commands/llms/update-models-perplexity.md +++ b/.claude/commands/llms/update-models-perplexity.md @@ -4,7 +4,7 @@ description: Update Perplexity model definitions with latest pricing and capabil Update `src/modules/llms/server/openai/models/perplexity.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Models: https://docs.perplexity.ai/getting-started/models diff --git a/.claude/commands/llms/update-models-xai.md b/.claude/commands/llms/update-models-xai.md index 5df7f9011..ece880f20 100644 --- a/.claude/commands/llms/update-models-xai.md +++ b/.claude/commands/llms/update-models-xai.md @@ -4,7 +4,7 @@ description: Update xAI model definitions with latest pricing and capabilities Update `src/modules/llms/server/openai/models/xai.models.ts` with latest model definitions. -Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code. +Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code. **Primary Sources:** - Models & Pricing: https://docs.x.ai/docs/models?cluster=us-east-1#detailed-pricing-for-all-grok-models diff --git a/docs/config-local-localai.md b/docs/config-local-localai.md index 0bab8d45f..de2e92e4c 100644 --- a/docs/config-local-localai.md +++ b/docs/config-local-localai.md @@ -54,7 +54,7 @@ If the running LocalAI instance is configured with a [Model Gallery](https://loc At the time of writing, LocalAI does not publish the model `context window size`. Every model is assumed to be capable of chatting, and with a context window of 4096 tokens. -Please update the [src/modules/llms/transports/server/openai/models/models.data.ts](../src/modules/llms/server/openai/models/models.data.ts) +Please update the [src/modules/llms/server/models.mappings.ts](../src/modules/llms/server/models.mappings.ts) file with the mapping information between LocalAI model IDs and names/descriptions/tokens, etc. # 🤝 Support diff --git a/src/modules/llms/server/anthropic/anthropic.models.ts b/src/modules/llms/server/anthropic/anthropic.models.ts index b8f0aef30..d472742ff 100644 --- a/src/modules/llms/server/anthropic/anthropic.models.ts +++ b/src/modules/llms/server/anthropic/anthropic.models.ts @@ -1,8 +1,15 @@ -import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; +import * as z from 'zod/v4'; + +import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types'; +import { Release } from '~/common/app.release'; import type { ModelDescriptionSchema } from '../llm.server.types'; +// configuration +export const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild; + + const ANT_PAR_WEB: ModelDescriptionSchema['parameterSpecs'] = [ { paramId: 'llmVndAntWebSearch' }, { paramId: 'llmVndAntWebFetch' }, @@ -239,3 +246,78 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo // retired: 'claude-2.1' // retired: 'claude-2.0' ]; + + +// -- Wire Types -- + +/** + * Namespace for the Anthropic API Models List response schema. + * NOTE: not merged into AIX because of possible circular dependency issues - future work. + */ +export namespace AnthropicWire_API_Models_List { + + export type ModelObject = z.infer; + const ModelObject_schema = z.object({ + type: z.literal('model'), + id: z.string(), + display_name: z.string(), + created_at: z.string(), + }); + + export const Response_schema = z.object({ + data: z.array(ModelObject_schema), + has_more: z.boolean(), + first_id: z.string().nullable(), + last_id: z.string().nullable(), + }); + +} + + +// -- Helper Functions -- + +/** + * DEV: Checks for obsoleted models that are defined in hardcodedAnthropicModels but no longer present in the API. + * Similar to Gemini's geminiDevCheckForSuperfluousModels_DEV. + */ +export function llmsAntDevCheckForObsoletedModels_DEV(availableModels: AnthropicWire_API_Models_List.ModelObject[]): void { + if (DEV_DEBUG_ANTHROPIC_MODELS) { + const apiModelIds = new Set(availableModels.map(m => m.id)); + const obsoletedModels = hardcodedAnthropicModels.filter(m => !apiModelIds.has(m.id)); + if (obsoletedModels.length > 0) + console.log(`[DEV] Anthropic: obsoleted model definitions: [ ${obsoletedModels.map(m => m.id).join(', ')} ]`); + } +} + +/** + * Create a placeholder ModelDescriptionSchema for Anthropic models not in the hardcoded list. + * Uses sensible defaults with the newest available interfaces for day-0 support. + */ +export function llmsAntCreatePlaceholderModel(model: AnthropicWire_API_Models_List.ModelObject): ModelDescriptionSchema { + return { + id: model.id, + label: model.display_name, + created: Math.round(new Date(model.created_at).getTime() / 1000), + description: 'Newest model, description not available yet.', + contextWindow: 200000, + maxCompletionTokens: 8192, + trainingDataCutoff: 'Latest', + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching], + // chatPrice: ... + // benchmark: ... + }; +} + +/** + * Injects the LLM_IF_Tools_WebSearch interface for models that have web search/fetch parameters. + * This allows the UI to show the web search indicator automatically based on model capabilities. + */ +export function llmsAntInjectWebSearchInterface(model: ModelDescriptionSchema): ModelDescriptionSchema { + const hasWebParams = model.parameterSpecs?.some(spec => + spec.paramId === 'llmVndAntWebSearch' || spec.paramId === 'llmVndAntWebFetch', + ); + return (hasWebParams && !model.interfaces?.includes(LLM_IF_Tools_WebSearch)) ? { + ...model, + interfaces: [...model.interfaces, LLM_IF_Tools_WebSearch], + } : model; +} diff --git a/src/modules/llms/server/anthropic/anthropic.router.ts b/src/modules/llms/server/anthropic/anthropic.router.ts index ecdd2aa59..9f97ef9bc 100644 --- a/src/modules/llms/server/anthropic/anthropic.router.ts +++ b/src/modules/llms/server/anthropic/anthropic.router.ts @@ -5,21 +5,15 @@ import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server'; import { env } from '~/server/env'; import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; -import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types'; -import { Release } from '~/common/app.release'; - -import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types'; - -import { hardcodedAnthropicModels, hardcodedAnthropicVariants } from './anthropic.models'; -import { fixupHost } from '~/modules/llms/server/openai/openai.router'; +import { ListModelsResponse_schema } from '../llm.server.types'; +import { fixupHost } from '../openai/openai.router'; +import { listModelsRunDispatch } from '../listModels.dispatch'; // configuration and defaults const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com'; const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com'; -const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild; - const DEFAULT_ANTHROPIC_HEADERS = { // Latest version hasn't changed (as of Feb 2025) 'anthropic-version': '2023-06-01', @@ -165,10 +159,6 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string, }; } -function roundTime(date: string) { - return Math.round(new Date(date).getTime() / 1000); -} - // Input Schemas @@ -185,23 +175,6 @@ const listModelsInputSchema = z.object({ }); -// Helpers - -/** - * Injects the LLM_IF_Tools_WebSearch interface for models that have web search/fetch parameters. - * This allows the UI to show the web search indicator automatically based on model capabilities. - */ -function _injectWebSearchInterface(model: ModelDescriptionSchema): ModelDescriptionSchema { - const hasWebParams = model.parameterSpecs?.some(spec => - spec.paramId === 'llmVndAntWebSearch' || spec.paramId === 'llmVndAntWebFetch' - ); - return (hasWebParams && !model.interfaces?.includes(LLM_IF_Tools_WebSearch)) ? { - ...model, - interfaces: [...model.interfaces, LLM_IF_Tools_WebSearch], - } : model; -} - - // Router export const llmAnthropicRouter = createTRPCRouter({ @@ -210,81 +183,9 @@ export const llmAnthropicRouter = createTRPCRouter({ listModels: publicProcedure .input(listModelsInputSchema) .output(ListModelsResponse_schema) - .query(async ({ input: { access } }) => { + .query(async ({ input: { access }, signal }) => { - // get the models - const wireModels = await anthropicGETOrThrow(access, '/v1/models?limit=1000'); - const { data: availableModels } = AnthropicWire_API_Models_List.Response_schema.parse(wireModels); - - // sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out - const familyPrecedence = ['-4-7-', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-']; - const classPrecedence = ['-opus-', '-sonnet-', '-haiku-']; - - const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f)); - const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c)); - - // cast the models to the common schema - const models = availableModels - .sort((a, b) => { - const familyA = getFamilyIdx(a.id); - const familyB = getFamilyIdx(b.id); - const classA = getClassIdx(a.id); - const classB = getClassIdx(b.id); - - // family desc (lower index = better, -1 = unknown goes last) - if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB); - // class desc - if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB); - // date desc (newer first) - string comparison works since format is YYYYMMDD - return b.id.localeCompare(a.id); - }) - .reduce((acc, model) => { - - // find the model description - const hardcodedModel = hardcodedAnthropicModels.find(m => m.id === model.id); - if (hardcodedModel) { - - // update creation date - if (!hardcodedModel.created && model.created_at) - hardcodedModel.created = roundTime(model.created_at); - - // add FIRST a thinking variant, if defined - if (hardcodedAnthropicVariants[model.id]) - acc.push({ - ...hardcodedModel, - ...hardcodedAnthropicVariants[model.id], - }); - - // add the base model - acc.push(hardcodedModel); - - } else { - - // for day-0 support of new models, create a placeholder model using sensible defaults - const novelModel = _createPlaceholderModel(model); - // if (DEV_DEBUG_ANTHROPIC_MODELS) // kind of important... - console.log('[DEV] anthropic.router: new model found, please configure it:', novelModel.id); - acc.push(novelModel); - - } - - return acc; - }, [] as ModelDescriptionSchema[]) - .map(_injectWebSearchInterface); - - // developers warning for obsoleted models (we have them, but they are not in the API response anymore) - if (DEV_DEBUG_ANTHROPIC_MODELS) { - const apiModelIds = new Set(availableModels.map(m => m.id)); - const additionalModels = hardcodedAnthropicModels.filter(m => !apiModelIds.has(m.id)); - if (additionalModels.length > 0) - console.log('[DEV] anthropic.router: obsoleted models:', additionalModels.map(m => m.id).join(', ')); - } - - // additionalModels.forEach(m => { - // m.label += ' (Removed)'; - // m.isLegacy = true; - // }); - // models.push(...additionalModels); + const models = await listModelsRunDispatch(access, signal); return { models }; }), @@ -328,47 +229,3 @@ export const llmAnthropicRouter = createTRPCRouter({ }), }); - - -/** - * Create a placeholder ModelDescriptionSchema for models not in the hardcoded list, - * using sensible defaults with the newest available interfaces. - */ -function _createPlaceholderModel(model: AnthropicWire_API_Models_List.ModelObject): ModelDescriptionSchema { - return { - id: model.id, - label: model.display_name, - created: Math.round(new Date(model.created_at).getTime() / 1000), - description: 'Newest model, description not available yet.', - contextWindow: 200000, - maxCompletionTokens: 8192, - trainingDataCutoff: 'Latest', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching], - // chatPrice: ... - // benchmark: ... - }; -} - -/** - * Namespace for the Anthropic API Models List response schema. - * NOTE: not merged into AIX because of possible circular dependency issues - future work. - */ -namespace AnthropicWire_API_Models_List { - - export type ModelObject = z.infer; - const ModelObject_schema = z.object({ - type: z.literal('model'), - id: z.string(), - display_name: z.string(), - created_at: z.string(), - }); - - export type Response = z.infer; - export const Response_schema = z.object({ - data: z.array(ModelObject_schema), - has_more: z.boolean(), - first_id: z.string().nullable(), - last_id: z.string().nullable(), - }); - -} diff --git a/src/modules/llms/server/gemini/gemini.router.ts b/src/modules/llms/server/gemini/gemini.router.ts index 93bb93473..9d8d8130a 100644 --- a/src/modules/llms/server/gemini/gemini.router.ts +++ b/src/modules/llms/server/gemini/gemini.router.ts @@ -7,11 +7,11 @@ import packageJson from '../../../../../package.json'; import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server'; import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; -import { GeminiWire_API_Models_List, GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes'; +import { GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes'; import { ListModelsResponse_schema } from '../llm.server.types'; -import { geminiDevCheckForParserMisses_DEV, geminiDevCheckForSuperfluousModels_DEV, geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels } from './gemini.models'; -import { fixupHost } from '~/modules/llms/server/openai/openai.router'; +import { fixupHost } from '../openai/openai.router'; +import { listModelsRunDispatch } from '../listModels.dispatch'; // Default hosts @@ -93,29 +93,11 @@ export const llmGeminiRouter = createTRPCRouter({ listModels: publicProcedure .input(accessOnlySchema) .output(ListModelsResponse_schema) - .query(async ({ input }) => { + .query(async ({ input, signal }) => { - // get the models - const wireModels = await geminiGET(input.access, null, GeminiWire_API_Models_List.getPath, false); - const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models; - geminiDevCheckForParserMisses_DEV(wireModels, detailedModels); - geminiDevCheckForSuperfluousModels_DEV(detailedModels.map(model => model.name)); + const models = await listModelsRunDispatch(input.access, signal); - // NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro)., - // as the List API already all the info on all the models - - // first filter from the original list - const filteredModels = detailedModels.filter(geminiFilterModels); - - // map to our output schema - const models = filteredModels - .map(geminiModelToModelDescription) - .filter(model => !!model) - .sort(geminiSortModels); - - return { - models: geminiModelsAddVariants(models), - }; + return { models }; }), }); diff --git a/src/modules/llms/server/listModels.dispatch.ts b/src/modules/llms/server/listModels.dispatch.ts new file mode 100644 index 000000000..1ae99958a --- /dev/null +++ b/src/modules/llms/server/listModels.dispatch.ts @@ -0,0 +1,425 @@ +import { TRPCError } from '@trpc/server'; + +import type { AixAPI_Access } from '~/modules/aix/server/api/aix.wiretypes'; + +import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; + +import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; +import { serverCapitalizeFirstLetter } from '~/server/wire'; + +import type { ModelDescriptionSchema } from './llm.server.types'; + + +// protocol: Anthropic +import { AnthropicWire_API_Models_List, hardcodedAnthropicModels, hardcodedAnthropicVariants, llmsAntCreatePlaceholderModel, llmsAntDevCheckForObsoletedModels_DEV, llmsAntInjectWebSearchInterface } from './anthropic/anthropic.models'; +import { anthropicAccess } from './anthropic/anthropic.router'; + +// protocol: Gemini +import { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes'; +import { geminiAccess } from './gemini/gemini.router'; +import { geminiDevCheckForParserMisses_DEV, geminiDevCheckForSuperfluousModels_DEV, geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels } from './gemini/gemini.models'; + +// protocol: Ollama +import { OLLAMA_BASE_MODELS } from './ollama/ollama.models'; +import { ollamaAccess } from './ollama/ollama.router'; +import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama/ollama.wiretypes'; + +// protocol: OpenAI-compatible +import { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes'; +import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './openai/models/alibaba.models'; +import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './openai/models/azure.models'; +import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './openai/models/chutesai.models'; +import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './openai/models/deepseek.models'; +import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models'; +import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models'; +import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './openai/models/groq.models'; +import { lmStudioModelToModelDescription } from './openai/models/lmstudio.models'; +import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models'; +import { mistralModels } from './openai/models/mistral.models'; +import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models'; +import { openAIAccess } from './openai/openai.router'; +import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './openai/models/openpipe.models'; +import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './openai/models/openrouter.models'; +import { openaiDevCheckForModelsOverlap_DEV, openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './openai/models/openai.models'; +import { perplexityHardcodedModelDescriptions, perplexityInjectVariants } from './openai/models/perplexity.models'; +import { togetherAIModelsToModelDescriptions } from './openai/models/together.models'; +import { xaiFetchModelDescriptions, xaiModelSort } from './openai/models/xai.models'; + + +// -- Dispatch types -- + +export type ListModelsDispatch = { + fetchModels: () => Promise; + convertToDescriptions: (wireModels: TWireModels) => ModelDescriptionSchema[]; +}; + +/** + * Helper to create a dispatch with proper type inference. + * TypeScript will infer TWireModels from fetchModels return type and enforce it in convertToDescriptions. + */ +function createDispatch(dispatch: ListModelsDispatch): ListModelsDispatch { + return dispatch; +} + + +// -- Specialized Implementations -- Core of Server-side LLM Model Listing abstraction -- + +export async function listModelsRunDispatch(access: AixAPI_Access, signal?: AbortSignal) { + const dispatch = _listModelsCreateDispatch(access, signal); + const wireModels = await dispatch.fetchModels(); + return dispatch.convertToDescriptions(wireModels); +} + +/** + * Specializes to the correct vendor a request for listing models. + * This follows the same pattern as AIX's chatGenerate dispatcher for consistency. + */ +function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal): ListModelsDispatch { + + // dialect is the only common property + const { dialect } = access; + + switch (dialect) { + + case 'anthropic': { + return createDispatch({ + fetchModels: async () => { + const { headers, url } = anthropicAccess(access, '/v1/models?limit=1000', {/* ... no options for list ... */ }); + const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal }); + return AnthropicWire_API_Models_List.Response_schema.parse(wireModels); + }, + convertToDescriptions: (wireModelsResponse) => { + const { data: availableModels } = wireModelsResponse; + + // sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out + const familyPrecedence = ['-4-7-', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-']; + const classPrecedence = ['-opus-', '-sonnet-', '-haiku-']; + + const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f)); + const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c)); + + // cast the models to the common schema + const models = availableModels + .sort((a, b) => { + const familyA = getFamilyIdx(a.id); + const familyB = getFamilyIdx(b.id); + const classA = getClassIdx(a.id); + const classB = getClassIdx(b.id); + + // family desc (lower index = better, -1 = unknown goes last) + if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB); + // class desc + if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB); + // date desc (newer first) - string comparison works since format is YYYYMMDD + return b.id.localeCompare(a.id); + }) + .reduce((acc: ModelDescriptionSchema[], model) => { + // find the model description + const hardcodedModel = hardcodedAnthropicModels.find(m => m.id === model.id); + if (hardcodedModel) { + + // update creation date + function roundTime(date: string) { + return Math.round(new Date(date).getTime() / 1000); + } + + if (!hardcodedModel.created && model.created_at) + hardcodedModel.created = roundTime(model.created_at); + + // add FIRST a thinking variant, if defined + if (hardcodedAnthropicVariants[model.id]) + acc.push({ + ...hardcodedModel, + ...hardcodedAnthropicVariants[model.id], + }); + + // add the base model + acc.push(hardcodedModel); + } else { + // for day-0 support of new models, create a placeholder model using sensible defaults + const novelModel = llmsAntCreatePlaceholderModel(model); + // if (DEV_DEBUG_ANTHROPIC_MODELS) // kind of important... + console.log('[DEV] anthropic.router: new model found, please configure it:', novelModel.id); + acc.push(novelModel); + } + return acc; + }, [] as ModelDescriptionSchema[]) + .map(llmsAntInjectWebSearchInterface); + + // [DEV] check for obsoleted models (defined but no longer in API response) + llmsAntDevCheckForObsoletedModels_DEV(availableModels); + + return models; + }, + }); + } + + case 'gemini': { + return createDispatch({ + fetchModels: async () => { + const { headers, url } = geminiAccess(access, null, GeminiWire_API_Models_List.getPath, false); + const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Gemini', signal }); + const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models; + + // [DEV] check for missing or superfluous models + geminiDevCheckForParserMisses_DEV(wireModels, detailedModels); + geminiDevCheckForSuperfluousModels_DEV(detailedModels.map((model: any) => model.name)); + + return detailedModels; + }, + convertToDescriptions: (detailedModels) => { + // NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro), + // as the List API already has all the info on all the models + + // first filter from the original list + const filteredModels = detailedModels.filter(geminiFilterModels); + + // map to our output schema + const models = filteredModels + .map(geminiModelToModelDescription) + .filter(model => !!model) + .sort(geminiSortModels); + return geminiModelsAddVariants(models); + }, + }); + } + + case 'ollama': { + return createDispatch({ + fetchModels: async () => { + const { headers, url } = ollamaAccess(access, '/api/tags'); + const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama', signal }); + const models = wireOllamaListModelsSchema.parse(wireModels).models; + + // retrieve info for each of the models + return await Promise.all(models.map(async (model) => { + + // perform /api/show on each model to get detailed info + const { headers, url } = ollamaAccess(access, '/api/show'); + const wireModelInfo = await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body: { 'name': model.name }, name: 'Ollama', signal }); + + const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo); + return { ...model, ...modelInfo }; + })); + }, + convertToDescriptions: (detailedModels) => { + return detailedModels.map((model) => { + // the model name is in the format "name:tag" (default tag = 'latest') + const [modelName, modelTag] = model.name.split(':'); + + // pretty label and description + const label = serverCapitalizeFirstLetter(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : ''); + const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {}; + let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody + + // prepend the parameters count and quantization level + if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) { + let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : ''; + if (model.details.quantization_level) + firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')'); + if (model.size) + firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`; + if (baseModel.hasTools) + firstLine += ' [tools]'; + if (baseModel.hasVision) + firstLine += ' [vision]'; + description = firstLine + '\n\n' + description; + } + + /* Find the context window from the 'num_ctx' line in the parameters string, if present + * - https://github.com/enricoros/big-AGI/issues/309 + * - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096 + * - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future + */ + let contextWindow = baseModel.contextWindow || 8192; + if (model.parameters) { + // split the parameters into lines, and find one called "num_ctx ...spaces... number" + const paramsNumCtx = model.parameters.split('\n').find((line) => line.startsWith('num_ctx ')); + if (paramsNumCtx) { + const numCtxValue: string = paramsNumCtx.split(/\s+/)[1]; + if (numCtxValue) { + const numCtxNumber: number = parseInt(numCtxValue); + if (!isNaN(numCtxNumber)) + contextWindow = numCtxNumber; + } + } + } + + // auto-detect interfaces from the hardcoded description (in turn parsed from the html page) + const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : []; + if (baseModel.hasTools) + interfaces.push(LLM_IF_OAI_Fn); + if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic + interfaces.push(LLM_IF_OAI_Vision); + + // console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n'); + + return { + id: model.name, + label, + created: Date.parse(model.modified_at) ?? undefined, + updated: Date.parse(model.modified_at) ?? undefined, + description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown', + contextWindow, + ...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}), + interfaces, + }; + }); + }, + }); + } + + case 'perplexity': + // [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/getting-started/pricing#sonar-models-chat-completions) + return createDispatch({ + fetchModels: async () => null, + convertToDescriptions: () => perplexityHardcodedModelDescriptions().reduce(perplexityInjectVariants, []), + }); + + case 'xai': + // [xAI]: custom models listing + return createDispatch({ + fetchModels: async () => xaiFetchModelDescriptions(access), + convertToDescriptions: models => models.sort(xaiModelSort), + }); + + case 'alibaba': + case 'azure': + case 'deepseek': + case 'groq': + case 'lmstudio': + case 'localai': + case 'mistral': + case 'moonshot': + case 'openai': + case 'openpipe': + case 'openrouter': + case 'togetherai': + return createDispatch({ + + // [OpenAI-compatible dialects]: fetch openAI-style /v1/models API + fetchModels: async () => { + const { headers, url } = openAIAccess(access, null, '/v1/models'); + return fetchJsonOrTRPCThrow({ url, headers, name: `OpenAI/${serverCapitalizeFirstLetter(dialect)}`, signal }); + }, + + // OpenAI models conversions: dependent on the dialect + convertToDescriptions: (openAIWireModelsResponse) => { + + // [Together] missing the .data property - so we have to do this early + if (dialect === 'togetherai') + return togetherAIModelsToModelDescriptions(openAIWireModelsResponse); + + // NOTE: we don't zod here as it would strip unknown properties needed for some dialects - so we proceed optimistically + // let maybeModels = OpenAIWire_API_Models_List.Response_schema.parse(openAIWireModelsResponse).data || []; + let maybeModels = openAIWireModelsResponse?.data || []; + + // de-duplicate by ids (can happen for local servers.. upstream bugs) + const preCount = maybeModels.length; + maybeModels = maybeModels.filter((model, index) => maybeModels.findIndex(m => m.id === model.id) === index); + if (preCount !== maybeModels.length) + console.warn(`openai.router.listModels: removed ${preCount - maybeModels.length} duplicate models for dialect ${dialect}`); + + // sort by id + maybeModels.sort((a, b) => a.id.localeCompare(b.id)); + + // every dialect has a different way to enumerate models - we execute the mapping on the server side + switch (dialect) { + case 'alibaba': + return maybeModels + .filter(({ id }) => alibabaModelFilter(id)) + .map(({ id, created }) => alibabaModelToModelDescription(id, created)) + .sort(alibabaModelSort); + + case 'azure': + const azureOpenAIDeployments = azureParseFromDeploymentsAPI(maybeModels); + return azureOpenAIDeployments + .filter(azureDeploymentFilter) + .map(azureDeploymentToModelDescription) + .sort(openAISortModels); + + case 'deepseek': + return maybeModels + .filter(({ id }) => deepseekModelFilter(id)) + .map(({ id }) => deepseekModelToModelDescription(id)) + .sort(deepseekModelSort); + + case 'groq': + return maybeModels + .filter(groqModelFilter) + .map(groqModelToModelDescription) + .sort(groqModelSortFn); + + case 'lmstudio': + return maybeModels + .map(({ id }) => lmStudioModelToModelDescription(id)); + + case 'localai': + return maybeModels + .map(({ id }) => localAIModelToModelDescription(id)) + .sort(localAIModelSortFn); + + case 'mistral': + return mistralModels(maybeModels); + + case 'moonshot': + return maybeModels + .filter(moonshotModelFilter) + .map(moonshotModelToModelDescription) + .sort(moonshotModelSortFn); + + case 'openai': + // [ChutesAI] special case for model enumeration + const oaiHost = access.oaiHost; + if (chutesAIHeuristic(oaiHost)) + return chutesAIModelsToModelDescriptions(maybeModels); + + // [FireworksAI] special case for model enumeration + if (fireworksAIHeuristic(oaiHost)) + return fireworksAIModelsToModelDescriptions(maybeModels); + + // [FastChat] make the best of the little info + if (fastAPIHeuristic(maybeModels)) + return fastAPIModels(maybeModels); + + // [OpenAI or OpenAI-compatible]: chat-only models, custom sort, manual mapping + const models = maybeModels + // limit to only 'gpt' and 'non instruct' models + .filter(openAIModelFilter) + // to model description + .map((model: any): ModelDescriptionSchema => openAIModelToModelDescription(model.id, model.created)) + // inject variants + .reduce(openAIInjectVariants, [] as ModelDescriptionSchema[]) + // custom OpenAI sort + .sort(openAISortModels); + + // [DEV] check for superfluous and missing models + openaiDevCheckForModelsOverlap_DEV(maybeModels, models); + return models; + + case 'openpipe': + return [ + ...maybeModels.map(openPipeModelToModelDescriptions), + ...openPipeModelDescriptions().sort(openPipeModelSort), + ]; + + case 'openrouter': + // openRouterStatTokenizers(maybeModels); + return maybeModels + .sort(openRouterModelFamilySortFn) + .map(openRouterModelToModelDescription) + .filter(desc => !!desc) + .reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]); + + default: + const _exhaustiveCheck: never = dialect; + throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unhandled dialect: ${dialect}` }); + } + }, + }); + + default: + const _exhaustiveCheck: never = dialect; + throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unsupported dialect: ${dialect}` }); + } +} diff --git a/src/modules/llms/server/openai/models.cba.ts b/src/modules/llms/server/models.cba.ts similarity index 100% rename from src/modules/llms/server/openai/models.cba.ts rename to src/modules/llms/server/models.cba.ts diff --git a/src/modules/llms/server/openai/models/models.data.ts b/src/modules/llms/server/models.mappings.ts similarity index 98% rename from src/modules/llms/server/openai/models/models.data.ts rename to src/modules/llms/server/models.mappings.ts index f52bf9403..b22be433f 100644 --- a/src/modules/llms/server/openai/models/models.data.ts +++ b/src/modules/llms/server/models.mappings.ts @@ -1,4 +1,4 @@ -import type { ModelDescriptionSchema } from '../../llm.server.types'; +import type { ModelDescriptionSchema } from './llm.server.types'; // -- Manual model mappings: types and helper -- diff --git a/src/modules/llms/server/ollama/ollama.router.ts b/src/modules/llms/server/ollama/ollama.router.ts index 08f3fff86..37fc1394d 100644 --- a/src/modules/llms/server/ollama/ollama.router.ts +++ b/src/modules/llms/server/ollama/ollama.router.ts @@ -3,23 +3,18 @@ import { TRPCError } from '@trpc/server'; import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server'; import { env } from '~/server/env'; -import { fetchJsonOrTRPCThrow, fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; - -import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; -import { capitalizeFirstLetter } from '~/common/util/textUtils'; +import { fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; +import { serverCapitalizeFirstLetter } from '~/server/wire'; import { ListModelsResponse_schema } from '../llm.server.types'; +import { fixupHost } from '../openai/openai.router'; +import { listModelsRunDispatch } from '../listModels.dispatch'; import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models'; -import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama.wiretypes'; -import { fixupHost } from '~/modules/llms/server/openai/openai.router'; -// Default hosts +// configuration const DEFAULT_OLLAMA_HOST = 'http://127.0.0.1:11434'; -// export const OLLAMA_PATH_CHAT = '/api/chat'; -const OLLAMA_PATH_TAGS = '/api/tags'; -const OLLAMA_PATH_SHOW = '/api/show'; // Mappers @@ -84,15 +79,15 @@ export function ollamaCompletionPayload(model: OpenAIModelSchema, history: OpenA }; }*/ -async function ollamaGET(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise { - const { headers, url } = ollamaAccess(access, apiPath); - return await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama' }); -} +// async function ollamaGET(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise { +// const { headers, url } = ollamaAccess(access, apiPath); +// return await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama' }); +// } -async function ollamaPOST(access: OllamaAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise { - const { headers, url } = ollamaAccess(access, apiPath); - return await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body, name: 'Ollama' }); -} +// async function ollamaPOST(access: OllamaAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise { +// const { headers, url } = ollamaAccess(access, apiPath); +// return await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body, name: 'Ollama' }); +// } // Input/Output Schemas @@ -137,7 +132,7 @@ export const llmOllamaRouter = createTRPCRouter({ return { pullableModels: Object.entries(OLLAMA_BASE_MODELS).map(([model_id, model]) => ({ id: model_id, - label: capitalizeFirstLetter(model_id), + label: serverCapitalizeFirstLetter(model_id), tag: 'latest', tags: model.tags?.length ? model.tags : [], description: '', // model.description, // REMOVED description - bloated and not used by nobody @@ -185,83 +180,11 @@ export const llmOllamaRouter = createTRPCRouter({ listModels: publicProcedure .input(accessOnlySchema) .output(ListModelsResponse_schema) - .query(async ({ input }) => { + .query(async ({ input, signal }) => { - // get the models - const wireModels = await ollamaGET(input.access, OLLAMA_PATH_TAGS); - let models = wireOllamaListModelsSchema.parse(wireModels).models; + const models = await listModelsRunDispatch(input.access, signal); - // retrieve info for each of the models (/api/show, post call, in parallel) - const detailedModels = await Promise.all(models.map(async model => { - const wireModelInfo = await ollamaPOST(input.access, { 'name': model.name }, OLLAMA_PATH_SHOW); - const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo); - return { ...model, ...modelInfo }; - })); - - return { - models: detailedModels.map(model => { - // the model name is in the format "name:tag" (default tag = 'latest') - const [modelName, modelTag] = model.name.split(':'); - - // pretty label and description - const label = capitalizeFirstLetter(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : ''); - const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {}; - let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody - - // prepend the parameters count and quantization level - if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) { - let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : ''; - if (model.details.quantization_level) - firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')'); - if (model.size) - firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`; - if (baseModel.hasTools) - firstLine += ' [tools]'; - if (baseModel.hasVision) - firstLine += ' [vision]'; - description = firstLine + '\n\n' + description; - } - - /* Find the context window from the 'num_ctx' line in the parameters string, if present - * - https://github.com/enricoros/big-AGI/issues/309 - * - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096 - * - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future - */ - let contextWindow = baseModel.contextWindow || 8192; - if (model.parameters) { - // split the parameters into lines, and find one called "num_ctx ...spaces... number" - const paramsNumCtx = model.parameters.split('\n').find(line => line.startsWith('num_ctx ')); - if (paramsNumCtx) { - const numCtxValue: string = paramsNumCtx.split(/\s+/)[1]; - if (numCtxValue) { - const numCtxNumber: number = parseInt(numCtxValue); - if (!isNaN(numCtxNumber)) - contextWindow = numCtxNumber; - } - } - } - - // auto-detect interfaces from the hardcoded description (in turn parsed from the html page) - const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : []; - if (baseModel.hasTools) - interfaces.push(LLM_IF_OAI_Fn); - if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic - interfaces.push(LLM_IF_OAI_Vision); - - // console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n'); - - return { - id: model.name, - label, - created: Date.parse(model.modified_at) ?? undefined, - updated: Date.parse(model.modified_at) ?? undefined, - description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown', - contextWindow, - ...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}), - interfaces, - }; - }), - }; + return { models }; }), }); diff --git a/src/modules/llms/server/openai/models/alibaba.models.ts b/src/modules/llms/server/openai/models/alibaba.models.ts index 0e400c6fa..df17335cd 100644 --- a/src/modules/llms/server/openai/models/alibaba.models.ts +++ b/src/modules/llms/server/openai/models/alibaba.models.ts @@ -2,7 +2,7 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stor import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, ManualMappings } from './models.data'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; // - Models & Pricing: https://www.alibabacloud.com/help/en/model-studio/models // - Billing Guide: https://www.alibabacloud.com/help/en/model-studio/billing-for-model-studio diff --git a/src/modules/llms/server/openai/models/azure.models.ts b/src/modules/llms/server/openai/models/azure.models.ts index 904cf90fc..48657b1b8 100644 --- a/src/modules/llms/server/openai/models/azure.models.ts +++ b/src/modules/llms/server/openai/models/azure.models.ts @@ -11,7 +11,7 @@ import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.serv import type { OpenAIAccessSchema } from '../openai.router'; import { fixupHost } from '../openai.router'; -import { fromManualMapping, ManualMappings } from './models.data'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; import { _fallbackOpenAIModel, _knownOpenAIChatModels } from './openai.models'; diff --git a/src/modules/llms/server/openai/models/chutesai.models.ts b/src/modules/llms/server/openai/models/chutesai.models.ts index abc452c79..3410f3433 100644 --- a/src/modules/llms/server/openai/models/chutesai.models.ts +++ b/src/modules/llms/server/openai/models/chutesai.models.ts @@ -6,7 +6,7 @@ import { serverCapitalizeFirstLetter } from '~/server/wire'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, ManualMappings } from './models.data'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; export function chutesAIHeuristic(hostname: string) { diff --git a/src/modules/llms/server/openai/models/deepseek.models.ts b/src/modules/llms/server/openai/models/deepseek.models.ts index dbd0e4dbe..e6a6c1ac7 100644 --- a/src/modules/llms/server/openai/models/deepseek.models.ts +++ b/src/modules/llms/server/openai/models/deepseek.models.ts @@ -2,7 +2,7 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning } import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, ManualMappings } from './models.data'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; const _knownDeepseekChatModels: ManualMappings = [ diff --git a/src/modules/llms/server/openai/models/fastapi.models.ts b/src/modules/llms/server/openai/models/fastapi.models.ts index 0677114e8..f0baf94b2 100644 --- a/src/modules/llms/server/openai/models/fastapi.models.ts +++ b/src/modules/llms/server/openai/models/fastapi.models.ts @@ -3,7 +3,7 @@ import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/w import { DModelInterfaceV1, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, ManualMappings } from './models.data'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; const _fastAPIKnownModels: ManualMappings = [ diff --git a/src/modules/llms/server/openai/models/fireworksai.models.ts b/src/modules/llms/server/openai/models/fireworksai.models.ts index 4b0541a03..b04677e4d 100644 --- a/src/modules/llms/server/openai/models/fireworksai.models.ts +++ b/src/modules/llms/server/openai/models/fireworksai.models.ts @@ -4,8 +4,8 @@ import { serverCapitalizeFirstLetter } from '~/server/wire'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, ManualMappings } from './models.data'; -import { wireFireworksAIListOutputSchema } from '../fireworksai.wiretypes'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; +import { wireFireworksAIListOutputSchema } from '../wiretypes/fireworksai.wiretypes'; export function fireworksAIHeuristic(hostname: string) { diff --git a/src/modules/llms/server/openai/models/groq.models.ts b/src/modules/llms/server/openai/models/groq.models.ts index acd956bb4..8a80acc2a 100644 --- a/src/modules/llms/server/openai/models/groq.models.ts +++ b/src/modules/llms/server/openai/models/groq.models.ts @@ -1,8 +1,8 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, ManualMappings } from './models.data'; -import { wireGroqModelsListOutputSchema } from '../groq.wiretypes'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; +import { wireGroqModelsListOutputSchema } from '../wiretypes/groq.wiretypes'; /** diff --git a/src/modules/llms/server/openai/models/lmstudio.models.ts b/src/modules/llms/server/openai/models/lmstudio.models.ts index 7943c23b3..579d60708 100644 --- a/src/modules/llms/server/openai/models/lmstudio.models.ts +++ b/src/modules/llms/server/openai/models/lmstudio.models.ts @@ -1,7 +1,7 @@ import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping } from './models.data'; +import { fromManualMapping } from '../../models.mappings'; export function lmStudioModelToModelDescription(modelId: string): ModelDescriptionSchema { diff --git a/src/modules/llms/server/openai/models/localai.models.ts b/src/modules/llms/server/openai/models/localai.models.ts index 32bc9d1d3..e6ade0958 100644 --- a/src/modules/llms/server/openai/models/localai.models.ts +++ b/src/modules/llms/server/openai/models/localai.models.ts @@ -1,8 +1,9 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; -import { capitalizeFirstLetter } from '~/common/util/textUtils'; + +import { serverCapitalizeFirstLetter } from '~/server/wire'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, type ManualMappings } from './models.data'; +import { fromManualMapping, type ManualMappings } from '../../models.mappings'; // [LocalAI] @@ -37,7 +38,7 @@ export function localAIModelToModelDescription(modelId: string): ModelDescriptio .replace(' Q4_K_M', ' (Q4_K_M)') .replace(' F16', ' (F16)') .split(' ') - .map(capitalizeFirstLetter) + .map(serverCapitalizeFirstLetter) .join(' '); const description = `LocalAI model. File: ${modelId}`; diff --git a/src/modules/llms/server/openai/models/moonshot.models.ts b/src/modules/llms/server/openai/models/moonshot.models.ts index 1930ab2d2..cd160c4cb 100644 --- a/src/modules/llms/server/openai/models/moonshot.models.ts +++ b/src/modules/llms/server/openai/models/moonshot.models.ts @@ -3,7 +3,7 @@ import * as z from 'zod/v4'; import { LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, ManualMappings } from './models.data'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; const IF_K2 = [ diff --git a/src/modules/llms/server/openai/models/openai.models.ts b/src/modules/llms/server/openai/models/openai.models.ts index 9462246a8..e993cf128 100644 --- a/src/modules/llms/server/openai/models/openai.models.ts +++ b/src/modules/llms/server/openai/models/openai.models.ts @@ -4,7 +4,7 @@ import { DModelInterfaceV1, LLM_IF_HOTFIX_NoTemperature, LLM_IF_HOTFIX_StripImag import { Release } from '~/common/app.release'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, KnownModel, ManualMappings } from './models.data'; +import { fromManualMapping, KnownModel, ManualMappings } from '../../models.mappings'; // OpenAI Model Variants diff --git a/src/modules/llms/server/openai/models/openpipe.models.ts b/src/modules/llms/server/openai/models/openpipe.models.ts index 1272d8f50..7ad47d4b1 100644 --- a/src/modules/llms/server/openai/models/openpipe.models.ts +++ b/src/modules/llms/server/openai/models/openpipe.models.ts @@ -1,8 +1,8 @@ import type { ModelDescriptionSchema } from '~/modules/llms/server/llm.server.types'; import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import { _knownOpenAIChatModels } from '~/modules/llms/server/openai/models/openai.models'; -import { wireOpenPipeModelOutputSchema } from '~/modules/llms/server/openai/openpipe.wiretypes'; -import { fromManualMapping, KnownModel } from '~/modules/llms/server/openai/models/models.data'; +import { wireOpenPipeModelOutputSchema } from '~/modules/llms/server/openai/wiretypes/openpipe.wiretypes'; +import { fromManualMapping, KnownModel } from '~/modules/llms/server/models.mappings'; const _knownOpenPipeChatModels: ModelDescriptionSchema[] = [ diff --git a/src/modules/llms/server/openai/models/openrouter.models.ts b/src/modules/llms/server/openai/models/openrouter.models.ts index 5b50c93b5..73030bd7d 100644 --- a/src/modules/llms/server/openai/models/openrouter.models.ts +++ b/src/modules/llms/server/openai/models/openrouter.models.ts @@ -3,8 +3,8 @@ import * as z from 'zod/v4'; import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping } from './models.data'; -import { wireOpenrouterModelsListOutputSchema } from '../openrouter.wiretypes'; +import { fromManualMapping } from '../../models.mappings'; +import { wireOpenrouterModelsListOutputSchema } from '../wiretypes/openrouter.wiretypes'; // configuration diff --git a/src/modules/llms/server/openai/models/perplexity.models.ts b/src/modules/llms/server/openai/models/perplexity.models.ts index 7c7421a6c..945fdd90b 100644 --- a/src/modules/llms/server/openai/models/perplexity.models.ts +++ b/src/modules/llms/server/openai/models/perplexity.models.ts @@ -144,7 +144,7 @@ export function perplexityInjectVariants(models: ModelDescriptionSchema[], model return models; } -export function perplexityAIModelDescriptions() { +export function perplexityHardcodedModelDescriptions() { // Returns the list of known Perplexity models return _knownPerplexityChatModels; } diff --git a/src/modules/llms/server/openai/models/together.models.ts b/src/modules/llms/server/openai/models/together.models.ts index fe5e43bbf..94a1f7036 100644 --- a/src/modules/llms/server/openai/models/together.models.ts +++ b/src/modules/llms/server/openai/models/together.models.ts @@ -1,8 +1,8 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, ManualMappings } from './models.data'; -import { wireTogetherAIListOutputSchema } from '../togetherai.wiretypes'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; +import { wireTogetherAIListOutputSchema } from '../wiretypes/togetherai.wiretypes'; // Note: 2025-01-28 - we used to have harcoded models here, but now we have a dynamic diff --git a/src/modules/llms/server/openai/models/xai.models.ts b/src/modules/llms/server/openai/models/xai.models.ts index 72229d33d..0881bbb6e 100644 --- a/src/modules/llms/server/openai/models/xai.models.ts +++ b/src/modules/llms/server/openai/models/xai.models.ts @@ -5,7 +5,7 @@ import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; -import { fromManualMapping, KnownModel, ManualMappings } from './models.data'; +import { fromManualMapping, KnownModel, ManualMappings } from '../../models.mappings'; import { openAIAccess, OpenAIAccessSchema } from '../openai.router'; @@ -179,7 +179,7 @@ const _knownXAIChatModels: ManualMappings = [ // xAI Model Descriptions -export async function xaiModelDescriptions(access: OpenAIAccessSchema): Promise { +export async function xaiFetchModelDescriptions(access: OpenAIAccessSchema): Promise { // List models const { headers, url } = openAIAccess(access, null, '/v1/language-models'); diff --git a/src/modules/llms/server/openai/openai.router.ts b/src/modules/llms/server/openai/openai.router.ts index 68ea5936d..94d3bc4e6 100644 --- a/src/modules/llms/server/openai/openai.router.ts +++ b/src/modules/llms/server/openai/openai.router.ts @@ -7,31 +7,15 @@ import { fetchJsonOrTRPCThrow, TRPCFetcherError } from '~/server/trpc/trpc.route import { serverCapitalizeFirstLetter } from '~/server/wire'; import type { T2ICreateImageAsyncStreamOp } from '~/modules/t2i/t2i.server'; +import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes'; import { heartbeatsWhileAwaiting } from '~/modules/aix/server/dispatch/heartbeatsWhileAwaiting'; import { Brand } from '~/common/app.config'; -import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes'; - import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types'; -import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models'; -import { azureDeploymentFilter, azureDeploymentToModelDescription, azureOpenAIAccess, azureParseFromDeploymentsAPI } from './models/azure.models'; -import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models'; -import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models'; -import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models'; -import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './models/fireworksai.models'; -import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './models/groq.models'; -import { lmStudioModelToModelDescription } from './models/lmstudio.models'; -import { localAIModelSortFn, localAIModelToModelDescription } from './models/localai.models'; -import { mistralModels } from './models/mistral.models'; -import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './models/moonshot.models'; -import { openaiDevCheckForModelsOverlap_DEV, openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models'; -import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './models/openpipe.models'; -import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models'; -import { perplexityAIModelDescriptions, perplexityInjectVariants } from './models/perplexity.models'; -import { togetherAIModelsToModelDescriptions } from './models/together.models'; -import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes'; -import { xaiModelDescriptions, xaiModelSort } from './models/xai.models'; +import { azureOpenAIAccess } from './models/azure.models'; +import { listModelsRunDispatch } from '../listModels.dispatch'; +import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './wiretypes/localai.wiretypes'; const openAIDialects = z.enum([ @@ -181,142 +165,7 @@ export const llmOpenAIRouter = createTRPCRouter({ .query(async ({ input: { access }, signal }): Promise<{ models: ModelDescriptionSchema[] }> => { - let models: ModelDescriptionSchema[]; - - // [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/guides/model-cards) - if (access.dialect === 'perplexity') { - models = perplexityAIModelDescriptions() - .reduce(perplexityInjectVariants, [] as ModelDescriptionSchema[]); - return { models }; - } - - // [xAI]: custom models listing - if (access.dialect === 'xai') - return { models: (await xaiModelDescriptions(access)).sort(xaiModelSort) }; - - // [OpenAI-dialects]: fetch openAI-style for all but Azure (will be then used in each dialect) - const openAIWireModelsResponse = await openaiGETOrThrow(access, '/v1/models', signal); - - // [Together] missing the .data property - if (access.dialect === 'togetherai') - return { models: togetherAIModelsToModelDescriptions(openAIWireModelsResponse) }; - - let openAIModels = openAIWireModelsResponse?.data || []; - - // de-duplicate by ids (can happen for local servers.. upstream bugs) - const preCount = openAIModels.length; - openAIModels = openAIModels.filter((model, index) => openAIModels.findIndex(m => m.id === model.id) === index); - if (preCount !== openAIModels.length) - console.warn(`openai.router.listModels: removed ${preCount - openAIModels.length} duplicate models for dialect ${access.dialect}`); - - // sort by id - openAIModels.sort((a, b) => a.id.localeCompare(b.id)); - - // every dialect has a different way to enumerate models - we execute the mapping on the server side - switch (access.dialect) { - - case 'alibaba': - models = openAIModels - .filter(({ id }) => alibabaModelFilter(id)) - .map(({ id, created }) => alibabaModelToModelDescription(id, created)) - .sort(alibabaModelSort); - break; - - case 'azure': - const azureOpenAIDeployments = azureParseFromDeploymentsAPI(openAIModels); - models = azureOpenAIDeployments - .filter(azureDeploymentFilter) - .map(azureDeploymentToModelDescription) - .sort(openAISortModels); - break; - - case 'deepseek': - models = openAIModels - .filter(({ id }) => deepseekModelFilter(id)) - .map(({ id }) => deepseekModelToModelDescription(id)) - .sort(deepseekModelSort); - break; - - case 'groq': - models = openAIModels - .filter(groqModelFilter) - .map(groqModelToModelDescription) - .sort(groqModelSortFn); - break; - - case 'lmstudio': - models = openAIModels - .map(({ id }) => lmStudioModelToModelDescription(id)); - break; - - // [LocalAI]: map id to label - case 'localai': - models = openAIModels - .map(({ id }) => localAIModelToModelDescription(id)) - .sort(localAIModelSortFn); - break; - - case 'mistral': - models = mistralModels(openAIModels); - break; - - case 'moonshot': - models = openAIModels - .filter(moonshotModelFilter) - .map(moonshotModelToModelDescription) - .sort(moonshotModelSortFn); - break; - - // [OpenAI]: chat-only models, custom sort, manual mapping - case 'openai': - - // [ChutesAI] special case for model enumeration - if (chutesAIHeuristic(access.oaiHost)) - return { models: chutesAIModelsToModelDescriptions(openAIModels) }; - - // [FireworksAI] special case for model enumeration - if (fireworksAIHeuristic(access.oaiHost)) - return { models: fireworksAIModelsToModelDescriptions(openAIModels) }; - - // [FastChat] make the best of the little info - if (fastAPIHeuristic(openAIModels)) - return { models: fastAPIModels(openAIModels) }; - - models = openAIModels - - // limit to only 'gpt' and 'non instruct' models - .filter(openAIModelFilter) - - // to model description - .map((model): ModelDescriptionSchema => openAIModelToModelDescription(model.id, model.created)) - - // inject variants - .reduce(openAIInjectVariants, [] as ModelDescriptionSchema[]) - - // custom OpenAI sort - .sort(openAISortModels); - - // [DEV] check for superfluous and missing models - openaiDevCheckForModelsOverlap_DEV(openAIWireModelsResponse, models); - break; - - case 'openpipe': - models = [ - ...openAIModels.map(openPipeModelToModelDescriptions), - ...openPipeModelDescriptions().sort(openPipeModelSort), - ]; - break; - - case 'openrouter': - // openRouterStatTokenizers(openAIModels); - models = openAIModels - .sort(openRouterModelFamilySortFn) - .map(openRouterModelToModelDescription) - .filter(desc => !!desc) - .reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]); - break; - - } + const models = await listModelsRunDispatch(access, signal); return { models }; }), diff --git a/src/modules/llms/server/openai/fireworksai.wiretypes.ts b/src/modules/llms/server/openai/wiretypes/fireworksai.wiretypes.ts similarity index 100% rename from src/modules/llms/server/openai/fireworksai.wiretypes.ts rename to src/modules/llms/server/openai/wiretypes/fireworksai.wiretypes.ts diff --git a/src/modules/llms/server/openai/groq.wiretypes.ts b/src/modules/llms/server/openai/wiretypes/groq.wiretypes.ts similarity index 100% rename from src/modules/llms/server/openai/groq.wiretypes.ts rename to src/modules/llms/server/openai/wiretypes/groq.wiretypes.ts diff --git a/src/modules/llms/server/openai/localai.wiretypes.ts b/src/modules/llms/server/openai/wiretypes/localai.wiretypes.ts similarity index 100% rename from src/modules/llms/server/openai/localai.wiretypes.ts rename to src/modules/llms/server/openai/wiretypes/localai.wiretypes.ts diff --git a/src/modules/llms/server/openai/openpipe.wiretypes.ts b/src/modules/llms/server/openai/wiretypes/openpipe.wiretypes.ts similarity index 100% rename from src/modules/llms/server/openai/openpipe.wiretypes.ts rename to src/modules/llms/server/openai/wiretypes/openpipe.wiretypes.ts diff --git a/src/modules/llms/server/openai/openrouter.wiretypes.ts b/src/modules/llms/server/openai/wiretypes/openrouter.wiretypes.ts similarity index 100% rename from src/modules/llms/server/openai/openrouter.wiretypes.ts rename to src/modules/llms/server/openai/wiretypes/openrouter.wiretypes.ts diff --git a/src/modules/llms/server/openai/togetherai.wiretypes.ts b/src/modules/llms/server/openai/wiretypes/togetherai.wiretypes.ts similarity index 100% rename from src/modules/llms/server/openai/togetherai.wiretypes.ts rename to src/modules/llms/server/openai/wiretypes/togetherai.wiretypes.ts