LLMs: Central Dispatch

2026-05-10 21:50:14 -07:00 · 2025-11-17 03:29:40 -08:00
parent ef0ff55f1f
commit 3fa3bb5d03
43 changed files with 580 additions and 461 deletions
@@ -4,7 +4,7 @@ description: Update Alibaba model definitions with latest pricing and capabiliti

 Update `src/modules/llms/server/openai/models/alibaba.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Models & Pricing: https://www.alibabacloud.com/help/en/model-studio/models
@@ -4,7 +4,7 @@ description: Update Anthropic model definitions with latest pricing and capabili

 Update `src/modules/llms/server/anthropic/anthropic.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Models: https://docs.claude.com/en/docs/about-claude/models/overview
@@ -4,7 +4,7 @@ description: Update DeepSeek model definitions with latest pricing and capabilit

 Update `src/modules/llms/server/openai/models/deepseek.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Pricing: https://api-docs.deepseek.com/quick_start/pricing
@@ -4,7 +4,7 @@ description: Update Gemini model definitions with latest pricing and capabilitie

 Update `src/modules/llms/server/gemini/gemini.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.types.ts`, `src/modules/llms/server/llm.server.types.ts`, and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.types.ts`, `src/modules/llms/server/llm.server.types.ts`, and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Models: https://ai.google.dev/gemini-api/docs/models
@@ -4,7 +4,7 @@ description: Update Groq model definitions with latest pricing and capabilities

 Update `src/modules/llms/server/openai/models/groq.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Models: https://console.groq.com/docs/models
@@ -4,7 +4,7 @@ description: Update Kimi model definitions with latest pricing and capabilities

 Update `src/modules/llms/server/openai/models/moonshot.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Pricing: https://platform.moonshot.ai/docs/pricing/chat
@@ -4,7 +4,7 @@ description: Update Mistral model definitions with latest pricing and capabiliti

 Update `src/modules/llms/server/openai/models/mistral.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Models: https://docs.mistral.ai/getting-started/models/models_overview/
@@ -4,7 +4,7 @@ description: Update Ollama model definitions with latest featured models

 Update `src/modules/llms/server/ollama/ollama.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Automated Workflow:**
 ```bash
@@ -4,7 +4,7 @@ description: Update OpenAI model definitions with latest pricing and capabilitie

 Update `src/modules/llms/server/openai/models/openai.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Manual hint:** For pricing page, expand all tables before copying content.

@@ -4,7 +4,7 @@ description: Update OpenPipe model definitions with latest pricing and capabilit

 Update `src/modules/llms/server/openai/models/openpipe.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Base Models: https://docs.openpipe.ai/base-models
@@ -4,7 +4,7 @@ description: Update Perplexity model definitions with latest pricing and capabil

 Update `src/modules/llms/server/openai/models/perplexity.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Models: https://docs.perplexity.ai/getting-started/models
@@ -4,7 +4,7 @@ description: Update xAI model definitions with latest pricing and capabilities

 Update `src/modules/llms/server/openai/models/xai.models.ts` with latest model definitions.

-Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
+Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.

 **Primary Sources:**
 - Models & Pricing: https://docs.x.ai/docs/models?cluster=us-east-1#detailed-pricing-for-all-grok-models
@@ -54,7 +54,7 @@ If the running LocalAI instance is configured with a [Model Gallery](https://loc

 At the time of writing, LocalAI does not publish the model `context window size`.
 Every model is assumed to be capable of chatting, and with a context window of 4096 tokens.
-Please update the [src/modules/llms/transports/server/openai/models/models.data.ts](../src/modules/llms/server/openai/models/models.data.ts)
+Please update the [src/modules/llms/server/models.mappings.ts](../src/modules/llms/server/models.mappings.ts)
 file with the mapping information between LocalAI model IDs and names/descriptions/tokens, etc.

 # 🤝 Support
@@ -1,8 +1,15 @@
-import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
+import * as z from 'zod/v4';
+
+import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
+import { Release } from '~/common/app.release';

 import type { ModelDescriptionSchema } from '../llm.server.types';


+// configuration
+export const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild;
+
+
 const ANT_PAR_WEB: ModelDescriptionSchema['parameterSpecs'] = [
  { paramId: 'llmVndAntWebSearch' },
  { paramId: 'llmVndAntWebFetch' },
@@ -239,3 +246,78 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
  // retired: 'claude-2.1'
  // retired: 'claude-2.0'
 ];
+
+
+// -- Wire Types --
+
+/**
+ * Namespace for the Anthropic API Models List response schema.
+ * NOTE: not merged into AIX because of possible circular dependency issues - future work.
+ */
+export namespace AnthropicWire_API_Models_List {
+
+  export type ModelObject = z.infer<typeof ModelObject_schema>;
+  const ModelObject_schema = z.object({
+    type: z.literal('model'),
+    id: z.string(),
+    display_name: z.string(),
+    created_at: z.string(),
+  });
+
+  export const Response_schema = z.object({
+    data: z.array(ModelObject_schema),
+    has_more: z.boolean(),
+    first_id: z.string().nullable(),
+    last_id: z.string().nullable(),
+  });
+
+}
+
+
+// -- Helper Functions --
+
+/**
+ * DEV: Checks for obsoleted models that are defined in hardcodedAnthropicModels but no longer present in the API.
+ * Similar to Gemini's geminiDevCheckForSuperfluousModels_DEV.
+ */
+export function llmsAntDevCheckForObsoletedModels_DEV(availableModels: AnthropicWire_API_Models_List.ModelObject[]): void {
+  if (DEV_DEBUG_ANTHROPIC_MODELS) {
+    const apiModelIds = new Set(availableModels.map(m => m.id));
+    const obsoletedModels = hardcodedAnthropicModels.filter(m => !apiModelIds.has(m.id));
+    if (obsoletedModels.length > 0)
+      console.log(`[DEV] Anthropic: obsoleted model definitions: [ ${obsoletedModels.map(m => m.id).join(', ')} ]`);
+  }
+}
+
+/**
+ * Create a placeholder ModelDescriptionSchema for Anthropic models not in the hardcoded list.
+ * Uses sensible defaults with the newest available interfaces for day-0 support.
+ */
+export function llmsAntCreatePlaceholderModel(model: AnthropicWire_API_Models_List.ModelObject): ModelDescriptionSchema {
+  return {
+    id: model.id,
+    label: model.display_name,
+    created: Math.round(new Date(model.created_at).getTime() / 1000),
+    description: 'Newest model, description not available yet.',
+    contextWindow: 200000,
+    maxCompletionTokens: 8192,
+    trainingDataCutoff: 'Latest',
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
+    // chatPrice: ...
+    // benchmark: ...
+  };
+}
+
+/**
+ * Injects the LLM_IF_Tools_WebSearch interface for models that have web search/fetch parameters.
+ * This allows the UI to show the web search indicator automatically based on model capabilities.
+ */
+export function llmsAntInjectWebSearchInterface(model: ModelDescriptionSchema): ModelDescriptionSchema {
+  const hasWebParams = model.parameterSpecs?.some(spec =>
+    spec.paramId === 'llmVndAntWebSearch' || spec.paramId === 'llmVndAntWebFetch',
+  );
+  return (hasWebParams && !model.interfaces?.includes(LLM_IF_Tools_WebSearch)) ? {
+    ...model,
+    interfaces: [...model.interfaces, LLM_IF_Tools_WebSearch],
+  } : model;
+}
@@ -5,21 +5,15 @@ import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
 import { env } from '~/server/env';
 import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';

-import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
-import { Release } from '~/common/app.release';
-
-import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types';
-
-import { hardcodedAnthropicModels, hardcodedAnthropicVariants } from './anthropic.models';
-import { fixupHost } from '~/modules/llms/server/openai/openai.router';
+import { ListModelsResponse_schema } from '../llm.server.types';
+import { fixupHost } from '../openai/openai.router';
+import { listModelsRunDispatch } from '../listModels.dispatch';


 // configuration and defaults
 const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
 const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com';

-const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild;
-
 const DEFAULT_ANTHROPIC_HEADERS = {
  // Latest version hasn't changed (as of Feb 2025)
  'anthropic-version': '2023-06-01',
@@ -165,10 +159,6 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string,
  };
 }

-function roundTime(date: string) {
-  return Math.round(new Date(date).getTime() / 1000);
-}
-

 // Input Schemas

@@ -185,23 +175,6 @@ const listModelsInputSchema = z.object({
 });


-// Helpers
-
-/**
- * Injects the LLM_IF_Tools_WebSearch interface for models that have web search/fetch parameters.
- * This allows the UI to show the web search indicator automatically based on model capabilities.
- */
-function _injectWebSearchInterface(model: ModelDescriptionSchema): ModelDescriptionSchema {
-  const hasWebParams = model.parameterSpecs?.some(spec =>
-    spec.paramId === 'llmVndAntWebSearch' || spec.paramId === 'llmVndAntWebFetch'
-  );
-  return (hasWebParams && !model.interfaces?.includes(LLM_IF_Tools_WebSearch)) ? {
-    ...model,
-    interfaces: [...model.interfaces, LLM_IF_Tools_WebSearch],
-  } : model;
-}
-
-
 // Router

 export const llmAnthropicRouter = createTRPCRouter({
@@ -210,81 +183,9 @@ export const llmAnthropicRouter = createTRPCRouter({
  listModels: publicProcedure
    .input(listModelsInputSchema)
    .output(ListModelsResponse_schema)
-    .query(async ({ input: { access } }) => {
+    .query(async ({ input: { access }, signal }) => {

-      // get the models
-      const wireModels = await anthropicGETOrThrow(access, '/v1/models?limit=1000');
-      const { data: availableModels } = AnthropicWire_API_Models_List.Response_schema.parse(wireModels);
-
-      // sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out
-      const familyPrecedence = ['-4-7-', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
-      const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
-
-      const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f));
-      const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c));
-
-      // cast the models to the common schema
-      const models = availableModels
-        .sort((a, b) => {
-          const familyA = getFamilyIdx(a.id);
-          const familyB = getFamilyIdx(b.id);
-          const classA = getClassIdx(a.id);
-          const classB = getClassIdx(b.id);
-
-          // family desc (lower index = better, -1 = unknown goes last)
-          if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB);
-          // class desc
-          if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
-          // date desc (newer first) - string comparison works since format is YYYYMMDD
-          return b.id.localeCompare(a.id);
-        })
-        .reduce((acc, model) => {
-
-          // find the model description
-          const hardcodedModel = hardcodedAnthropicModels.find(m => m.id === model.id);
-          if (hardcodedModel) {
-
-            // update creation date
-            if (!hardcodedModel.created && model.created_at)
-              hardcodedModel.created = roundTime(model.created_at);
-
-            // add FIRST a thinking variant, if defined
-            if (hardcodedAnthropicVariants[model.id])
-              acc.push({
-                ...hardcodedModel,
-                ...hardcodedAnthropicVariants[model.id],
-              });
-
-            // add the base model
-            acc.push(hardcodedModel);
-
-          } else {
-
-            // for day-0 support of new models, create a placeholder model using sensible defaults
-            const novelModel = _createPlaceholderModel(model);
-            // if (DEV_DEBUG_ANTHROPIC_MODELS) // kind of important...
-            console.log('[DEV] anthropic.router: new model found, please configure it:', novelModel.id);
-            acc.push(novelModel);
-
-          }
-
-          return acc;
-        }, [] as ModelDescriptionSchema[])
-        .map(_injectWebSearchInterface);
-
-      // developers warning for obsoleted models (we have them, but they are not in the API response anymore)
-      if (DEV_DEBUG_ANTHROPIC_MODELS) {
-        const apiModelIds = new Set(availableModels.map(m => m.id));
-        const additionalModels = hardcodedAnthropicModels.filter(m => !apiModelIds.has(m.id));
-        if (additionalModels.length > 0)
-          console.log('[DEV] anthropic.router: obsoleted models:', additionalModels.map(m => m.id).join(', '));
-      }
-
-      // additionalModels.forEach(m => {
-      //   m.label += ' (Removed)';
-      //   m.isLegacy = true;
-      // });
-      // models.push(...additionalModels);
+      const models = await listModelsRunDispatch(access, signal);

      return { models };
    }),
@@ -328,47 +229,3 @@ export const llmAnthropicRouter = createTRPCRouter({
    }),

 });
-
-
-/**
- * Create a placeholder ModelDescriptionSchema for models not in the hardcoded list,
- * using sensible defaults with the newest available interfaces.
- */
-function _createPlaceholderModel(model: AnthropicWire_API_Models_List.ModelObject): ModelDescriptionSchema {
-  return {
-    id: model.id,
-    label: model.display_name,
-    created: Math.round(new Date(model.created_at).getTime() / 1000),
-    description: 'Newest model, description not available yet.',
-    contextWindow: 200000,
-    maxCompletionTokens: 8192,
-    trainingDataCutoff: 'Latest',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
-    // chatPrice: ...
-    // benchmark: ...
-  };
-}
-
-/**
- * Namespace for the Anthropic API Models List response schema.
- * NOTE: not merged into AIX because of possible circular dependency issues - future work.
- */
-namespace AnthropicWire_API_Models_List {
-
-  export type ModelObject = z.infer<typeof ModelObject_schema>;
-  const ModelObject_schema = z.object({
-    type: z.literal('model'),
-    id: z.string(),
-    display_name: z.string(),
-    created_at: z.string(),
-  });
-
-  export type Response = z.infer<typeof Response_schema>;
-  export const Response_schema = z.object({
-    data: z.array(ModelObject_schema),
-    has_more: z.boolean(),
-    first_id: z.string().nullable(),
-    last_id: z.string().nullable(),
-  });
-
-}
@@ -7,11 +7,11 @@ import packageJson from '../../../../../package.json';
 import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
 import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';

-import { GeminiWire_API_Models_List, GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
+import { GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';

 import { ListModelsResponse_schema } from '../llm.server.types';
-import { geminiDevCheckForParserMisses_DEV, geminiDevCheckForSuperfluousModels_DEV, geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels } from './gemini.models';
-import { fixupHost } from '~/modules/llms/server/openai/openai.router';
+import { fixupHost } from '../openai/openai.router';
+import { listModelsRunDispatch } from '../listModels.dispatch';


 // Default hosts
@@ -93,29 +93,11 @@ export const llmGeminiRouter = createTRPCRouter({
  listModels: publicProcedure
    .input(accessOnlySchema)
    .output(ListModelsResponse_schema)
-    .query(async ({ input }) => {
+    .query(async ({ input, signal }) => {

-      // get the models
-      const wireModels = await geminiGET(input.access, null, GeminiWire_API_Models_List.getPath, false);
-      const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models;
-      geminiDevCheckForParserMisses_DEV(wireModels, detailedModels);
-      geminiDevCheckForSuperfluousModels_DEV(detailedModels.map(model => model.name));
+      const models = await listModelsRunDispatch(input.access, signal);

-      // NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro).,
-      //       as the List API already all the info on all the models
-
-      // first filter from the original list
-      const filteredModels = detailedModels.filter(geminiFilterModels);
-
-      // map to our output schema
-      const models = filteredModels
-        .map(geminiModelToModelDescription)
-        .filter(model => !!model)
-        .sort(geminiSortModels);
-
-      return {
-        models: geminiModelsAddVariants(models),
-      };
+      return { models };
    }),

 });
@@ -0,0 +1,425 @@
+import { TRPCError } from '@trpc/server';
+
+import type { AixAPI_Access } from '~/modules/aix/server/api/aix.wiretypes';
+
+import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
+
+import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
+import { serverCapitalizeFirstLetter } from '~/server/wire';
+
+import type { ModelDescriptionSchema } from './llm.server.types';
+
+
+// protocol: Anthropic
+import { AnthropicWire_API_Models_List, hardcodedAnthropicModels, hardcodedAnthropicVariants, llmsAntCreatePlaceholderModel, llmsAntDevCheckForObsoletedModels_DEV, llmsAntInjectWebSearchInterface } from './anthropic/anthropic.models';
+import { anthropicAccess } from './anthropic/anthropic.router';
+
+// protocol: Gemini
+import { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
+import { geminiAccess } from './gemini/gemini.router';
+import { geminiDevCheckForParserMisses_DEV, geminiDevCheckForSuperfluousModels_DEV, geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels } from './gemini/gemini.models';
+
+// protocol: Ollama
+import { OLLAMA_BASE_MODELS } from './ollama/ollama.models';
+import { ollamaAccess } from './ollama/ollama.router';
+import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama/ollama.wiretypes';
+
+// protocol: OpenAI-compatible
+import { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
+import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './openai/models/alibaba.models';
+import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './openai/models/azure.models';
+import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './openai/models/chutesai.models';
+import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './openai/models/deepseek.models';
+import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models';
+import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models';
+import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './openai/models/groq.models';
+import { lmStudioModelToModelDescription } from './openai/models/lmstudio.models';
+import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models';
+import { mistralModels } from './openai/models/mistral.models';
+import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models';
+import { openAIAccess } from './openai/openai.router';
+import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './openai/models/openpipe.models';
+import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './openai/models/openrouter.models';
+import { openaiDevCheckForModelsOverlap_DEV, openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './openai/models/openai.models';
+import { perplexityHardcodedModelDescriptions, perplexityInjectVariants } from './openai/models/perplexity.models';
+import { togetherAIModelsToModelDescriptions } from './openai/models/together.models';
+import { xaiFetchModelDescriptions, xaiModelSort } from './openai/models/xai.models';
+
+
+// -- Dispatch types --
+
+export type ListModelsDispatch<TWireModels = any> = {
+  fetchModels: () => Promise<TWireModels>;
+  convertToDescriptions: (wireModels: TWireModels) => ModelDescriptionSchema[];
+};
+
+/**
+ * Helper to create a dispatch with proper type inference.
+ * TypeScript will infer TWireModels from fetchModels return type and enforce it in convertToDescriptions.
+ */
+function createDispatch<T>(dispatch: ListModelsDispatch<T>): ListModelsDispatch<T> {
+  return dispatch;
+}
+
+
+// -- Specialized Implementations -- Core of Server-side LLM Model Listing abstraction --
+
+export async function listModelsRunDispatch(access: AixAPI_Access, signal?: AbortSignal) {
+  const dispatch = _listModelsCreateDispatch(access, signal);
+  const wireModels = await dispatch.fetchModels();
+  return dispatch.convertToDescriptions(wireModels);
+}
+
+/**
+ * Specializes to the correct vendor a request for listing models.
+ * This follows the same pattern as AIX's chatGenerate dispatcher for consistency.
+ */
+function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal): ListModelsDispatch {
+
+  // dialect is the only common property
+  const { dialect } = access;
+
+  switch (dialect) {
+
+    case 'anthropic': {
+      return createDispatch({
+        fetchModels: async () => {
+          const { headers, url } = anthropicAccess(access, '/v1/models?limit=1000', {/* ... no options for list ... */ });
+          const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal });
+          return AnthropicWire_API_Models_List.Response_schema.parse(wireModels);
+        },
+        convertToDescriptions: (wireModelsResponse) => {
+          const { data: availableModels } = wireModelsResponse;
+
+          // sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out
+          const familyPrecedence = ['-4-7-', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
+          const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
+
+          const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f));
+          const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c));
+
+          // cast the models to the common schema
+          const models = availableModels
+            .sort((a, b) => {
+              const familyA = getFamilyIdx(a.id);
+              const familyB = getFamilyIdx(b.id);
+              const classA = getClassIdx(a.id);
+              const classB = getClassIdx(b.id);
+
+              // family desc (lower index = better, -1 = unknown goes last)
+              if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB);
+              // class desc
+              if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
+              // date desc (newer first) - string comparison works since format is YYYYMMDD
+              return b.id.localeCompare(a.id);
+            })
+            .reduce((acc: ModelDescriptionSchema[], model) => {
+              // find the model description
+              const hardcodedModel = hardcodedAnthropicModels.find(m => m.id === model.id);
+              if (hardcodedModel) {
+
+                // update creation date
+                function roundTime(date: string) {
+                  return Math.round(new Date(date).getTime() / 1000);
+                }
+
+                if (!hardcodedModel.created && model.created_at)
+                  hardcodedModel.created = roundTime(model.created_at);
+
+                // add FIRST a thinking variant, if defined
+                if (hardcodedAnthropicVariants[model.id])
+                  acc.push({
+                    ...hardcodedModel,
+                    ...hardcodedAnthropicVariants[model.id],
+                  });
+
+                // add the base model
+                acc.push(hardcodedModel);
+              } else {
+                // for day-0 support of new models, create a placeholder model using sensible defaults
+                const novelModel = llmsAntCreatePlaceholderModel(model);
+                // if (DEV_DEBUG_ANTHROPIC_MODELS) // kind of important...
+                console.log('[DEV] anthropic.router: new model found, please configure it:', novelModel.id);
+                acc.push(novelModel);
+              }
+              return acc;
+            }, [] as ModelDescriptionSchema[])
+            .map(llmsAntInjectWebSearchInterface);
+
+          // [DEV] check for obsoleted models (defined but no longer in API response)
+          llmsAntDevCheckForObsoletedModels_DEV(availableModels);
+
+          return models;
+        },
+      });
+    }
+
+    case 'gemini': {
+      return createDispatch({
+        fetchModels: async () => {
+          const { headers, url } = geminiAccess(access, null, GeminiWire_API_Models_List.getPath, false);
+          const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Gemini', signal });
+          const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models;
+
+          // [DEV] check for missing or superfluous models
+          geminiDevCheckForParserMisses_DEV(wireModels, detailedModels);
+          geminiDevCheckForSuperfluousModels_DEV(detailedModels.map((model: any) => model.name));
+
+          return detailedModels;
+        },
+        convertToDescriptions: (detailedModels) => {
+          // NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro),
+          //       as the List API already has all the info on all the models
+
+          // first filter from the original list
+          const filteredModels = detailedModels.filter(geminiFilterModels);
+
+          // map to our output schema
+          const models = filteredModels
+            .map(geminiModelToModelDescription)
+            .filter(model => !!model)
+            .sort(geminiSortModels);
+          return geminiModelsAddVariants(models);
+        },
+      });
+    }
+
+    case 'ollama': {
+      return createDispatch({
+        fetchModels: async () => {
+          const { headers, url } = ollamaAccess(access, '/api/tags');
+          const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama', signal });
+          const models = wireOllamaListModelsSchema.parse(wireModels).models;
+
+          // retrieve info for each of the models
+          return await Promise.all(models.map(async (model) => {
+
+            // perform /api/show on each model to get detailed info
+            const { headers, url } = ollamaAccess(access, '/api/show');
+            const wireModelInfo = await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body: { 'name': model.name }, name: 'Ollama', signal });
+
+            const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo);
+            return { ...model, ...modelInfo };
+          }));
+        },
+        convertToDescriptions: (detailedModels) => {
+          return detailedModels.map((model) => {
+            // the model name is in the format "name:tag" (default tag = 'latest')
+            const [modelName, modelTag] = model.name.split(':');
+
+            // pretty label and description
+            const label = serverCapitalizeFirstLetter(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : '');
+            const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {};
+            let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody
+
+            // prepend the parameters count and quantization level
+            if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) {
+              let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : '';
+              if (model.details.quantization_level)
+                firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')');
+              if (model.size)
+                firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`;
+              if (baseModel.hasTools)
+                firstLine += ' [tools]';
+              if (baseModel.hasVision)
+                firstLine += ' [vision]';
+              description = firstLine + '\n\n' + description;
+            }
+
+            /* Find the context window from the 'num_ctx' line in the parameters string, if present
+             *  - https://github.com/enricoros/big-AGI/issues/309
+             *  - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096
+             *  - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future
+             */
+            let contextWindow = baseModel.contextWindow || 8192;
+            if (model.parameters) {
+              // split the parameters into lines, and find one called "num_ctx ...spaces... number"
+              const paramsNumCtx = model.parameters.split('\n').find((line) => line.startsWith('num_ctx '));
+              if (paramsNumCtx) {
+                const numCtxValue: string = paramsNumCtx.split(/\s+/)[1];
+                if (numCtxValue) {
+                  const numCtxNumber: number = parseInt(numCtxValue);
+                  if (!isNaN(numCtxNumber))
+                    contextWindow = numCtxNumber;
+                }
+              }
+            }
+
+            // auto-detect interfaces from the hardcoded description (in turn parsed from the html page)
+            const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : [];
+            if (baseModel.hasTools)
+              interfaces.push(LLM_IF_OAI_Fn);
+            if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic
+              interfaces.push(LLM_IF_OAI_Vision);
+
+            // console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n');
+
+            return {
+              id: model.name,
+              label,
+              created: Date.parse(model.modified_at) ?? undefined,
+              updated: Date.parse(model.modified_at) ?? undefined,
+              description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown',
+              contextWindow,
+              ...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}),
+              interfaces,
+            };
+          });
+        },
+      });
+    }
+
+    case 'perplexity':
+      // [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/getting-started/pricing#sonar-models-chat-completions)
+      return createDispatch({
+        fetchModels: async () => null,
+        convertToDescriptions: () => perplexityHardcodedModelDescriptions().reduce(perplexityInjectVariants, []),
+      });
+
+    case 'xai':
+      // [xAI]: custom models listing
+      return createDispatch({
+        fetchModels: async () => xaiFetchModelDescriptions(access),
+        convertToDescriptions: models => models.sort(xaiModelSort),
+      });
+
+    case 'alibaba':
+    case 'azure':
+    case 'deepseek':
+    case 'groq':
+    case 'lmstudio':
+    case 'localai':
+    case 'mistral':
+    case 'moonshot':
+    case 'openai':
+    case 'openpipe':
+    case 'openrouter':
+    case 'togetherai':
+      return createDispatch({
+
+        // [OpenAI-compatible dialects]: fetch openAI-style /v1/models API
+        fetchModels: async () => {
+          const { headers, url } = openAIAccess(access, null, '/v1/models');
+          return fetchJsonOrTRPCThrow<OpenAIWire_API_Models_List.Response>({ url, headers, name: `OpenAI/${serverCapitalizeFirstLetter(dialect)}`, signal });
+        },
+
+        // OpenAI models conversions: dependent on the dialect
+        convertToDescriptions: (openAIWireModelsResponse) => {
+
+          // [Together] missing the .data property - so we have to do this early
+          if (dialect === 'togetherai')
+            return togetherAIModelsToModelDescriptions(openAIWireModelsResponse);
+
+          // NOTE: we don't zod here as it would strip unknown properties needed for some dialects - so we proceed optimistically
+          // let maybeModels = OpenAIWire_API_Models_List.Response_schema.parse(openAIWireModelsResponse).data || [];
+          let maybeModels = openAIWireModelsResponse?.data || [];
+
+          // de-duplicate by ids (can happen for local servers.. upstream bugs)
+          const preCount = maybeModels.length;
+          maybeModels = maybeModels.filter((model, index) => maybeModels.findIndex(m => m.id === model.id) === index);
+          if (preCount !== maybeModels.length)
+            console.warn(`openai.router.listModels: removed ${preCount - maybeModels.length} duplicate models for dialect ${dialect}`);
+
+          // sort by id
+          maybeModels.sort((a, b) => a.id.localeCompare(b.id));
+
+          // every dialect has a different way to enumerate models - we execute the mapping on the server side
+          switch (dialect) {
+            case 'alibaba':
+              return maybeModels
+                .filter(({ id }) => alibabaModelFilter(id))
+                .map(({ id, created }) => alibabaModelToModelDescription(id, created))
+                .sort(alibabaModelSort);
+
+            case 'azure':
+              const azureOpenAIDeployments = azureParseFromDeploymentsAPI(maybeModels);
+              return azureOpenAIDeployments
+                .filter(azureDeploymentFilter)
+                .map(azureDeploymentToModelDescription)
+                .sort(openAISortModels);
+
+            case 'deepseek':
+              return maybeModels
+                .filter(({ id }) => deepseekModelFilter(id))
+                .map(({ id }) => deepseekModelToModelDescription(id))
+                .sort(deepseekModelSort);
+
+            case 'groq':
+              return maybeModels
+                .filter(groqModelFilter)
+                .map(groqModelToModelDescription)
+                .sort(groqModelSortFn);
+
+            case 'lmstudio':
+              return maybeModels
+                .map(({ id }) => lmStudioModelToModelDescription(id));
+
+            case 'localai':
+              return maybeModels
+                .map(({ id }) => localAIModelToModelDescription(id))
+                .sort(localAIModelSortFn);
+
+            case 'mistral':
+              return mistralModels(maybeModels);
+
+            case 'moonshot':
+              return maybeModels
+                .filter(moonshotModelFilter)
+                .map(moonshotModelToModelDescription)
+                .sort(moonshotModelSortFn);
+
+            case 'openai':
+              // [ChutesAI] special case for model enumeration
+              const oaiHost = access.oaiHost;
+              if (chutesAIHeuristic(oaiHost))
+                return chutesAIModelsToModelDescriptions(maybeModels);
+
+              // [FireworksAI] special case for model enumeration
+              if (fireworksAIHeuristic(oaiHost))
+                return fireworksAIModelsToModelDescriptions(maybeModels);
+
+              // [FastChat] make the best of the little info
+              if (fastAPIHeuristic(maybeModels))
+                return fastAPIModels(maybeModels);
+
+              // [OpenAI or OpenAI-compatible]: chat-only models, custom sort, manual mapping
+              const models = maybeModels
+                // limit to only 'gpt' and 'non instruct' models
+                .filter(openAIModelFilter)
+                // to model description
+                .map((model: any): ModelDescriptionSchema => openAIModelToModelDescription(model.id, model.created))
+                // inject variants
+                .reduce(openAIInjectVariants, [] as ModelDescriptionSchema[])
+                // custom OpenAI sort
+                .sort(openAISortModels);
+
+              // [DEV] check for superfluous and missing models
+              openaiDevCheckForModelsOverlap_DEV(maybeModels, models);
+              return models;
+
+            case 'openpipe':
+              return [
+                ...maybeModels.map(openPipeModelToModelDescriptions),
+                ...openPipeModelDescriptions().sort(openPipeModelSort),
+              ];
+
+            case 'openrouter':
+              // openRouterStatTokenizers(maybeModels);
+              return maybeModels
+                .sort(openRouterModelFamilySortFn)
+                .map(openRouterModelToModelDescription)
+                .filter(desc => !!desc)
+                .reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
+
+            default:
+              const _exhaustiveCheck: never = dialect;
+              throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unhandled dialect: ${dialect}` });
+          }
+        },
+      });
+
+    default:
+      const _exhaustiveCheck: never = dialect;
+      throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unsupported dialect: ${dialect}` });
+  }
+}
@@ -1,4 +1,4 @@
-import type { ModelDescriptionSchema } from '../../llm.server.types';
+import type { ModelDescriptionSchema } from './llm.server.types';


 // -- Manual model mappings: types and helper --
@@ -3,23 +3,18 @@ import { TRPCError } from '@trpc/server';

 import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
 import { env } from '~/server/env';
-import { fetchJsonOrTRPCThrow, fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
-
-import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
-import { capitalizeFirstLetter } from '~/common/util/textUtils';
+import { fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
+import { serverCapitalizeFirstLetter } from '~/server/wire';

 import { ListModelsResponse_schema } from '../llm.server.types';
+import { fixupHost } from '../openai/openai.router';
+import { listModelsRunDispatch } from '../listModels.dispatch';

 import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
-import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama.wiretypes';
-import { fixupHost } from '~/modules/llms/server/openai/openai.router';


-// Default hosts
+// configuration
 const DEFAULT_OLLAMA_HOST = 'http://127.0.0.1:11434';
-// export const OLLAMA_PATH_CHAT = '/api/chat';
-const OLLAMA_PATH_TAGS = '/api/tags';
-const OLLAMA_PATH_SHOW = '/api/show';


 // Mappers
@@ -84,15 +79,15 @@ export function ollamaCompletionPayload(model: OpenAIModelSchema, history: OpenA
  };
 }*/

-async function ollamaGET<TOut extends object>(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
-  const { headers, url } = ollamaAccess(access, apiPath);
-  return await fetchJsonOrTRPCThrow<TOut>({ url, headers, name: 'Ollama' });
-}
+// async function ollamaGET<TOut extends object>(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
+//   const { headers, url } = ollamaAccess(access, apiPath);
+//   return await fetchJsonOrTRPCThrow<TOut>({ url, headers, name: 'Ollama' });
+// }

-async function ollamaPOST<TOut extends object, TPostBody extends object>(access: OllamaAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
-  const { headers, url } = ollamaAccess(access, apiPath);
-  return await fetchJsonOrTRPCThrow<TOut, TPostBody>({ url, method: 'POST', headers, body, name: 'Ollama' });
-}
+// async function ollamaPOST<TOut extends object, TPostBody extends object>(access: OllamaAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
+//   const { headers, url } = ollamaAccess(access, apiPath);
+//   return await fetchJsonOrTRPCThrow<TOut, TPostBody>({ url, method: 'POST', headers, body, name: 'Ollama' });
+// }


 // Input/Output Schemas
@@ -137,7 +132,7 @@ export const llmOllamaRouter = createTRPCRouter({
      return {
        pullableModels: Object.entries(OLLAMA_BASE_MODELS).map(([model_id, model]) => ({
          id: model_id,
-          label: capitalizeFirstLetter(model_id),
+          label: serverCapitalizeFirstLetter(model_id),
          tag: 'latest',
          tags: model.tags?.length ? model.tags : [],
          description: '', // model.description, // REMOVED description - bloated and not used by nobody
@@ -185,83 +180,11 @@ export const llmOllamaRouter = createTRPCRouter({
  listModels: publicProcedure
    .input(accessOnlySchema)
    .output(ListModelsResponse_schema)
-    .query(async ({ input }) => {
+    .query(async ({ input, signal }) => {

-      // get the models
-      const wireModels = await ollamaGET(input.access, OLLAMA_PATH_TAGS);
-      let models = wireOllamaListModelsSchema.parse(wireModels).models;
+      const models = await listModelsRunDispatch(input.access, signal);

-      // retrieve info for each of the models (/api/show, post call, in parallel)
-      const detailedModels = await Promise.all(models.map(async model => {
-        const wireModelInfo = await ollamaPOST(input.access, { 'name': model.name }, OLLAMA_PATH_SHOW);
-        const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo);
-        return { ...model, ...modelInfo };
-      }));
-
-      return {
-        models: detailedModels.map(model => {
-          // the model name is in the format "name:tag" (default tag = 'latest')
-          const [modelName, modelTag] = model.name.split(':');
-
-          // pretty label and description
-          const label = capitalizeFirstLetter(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : '');
-          const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {};
-          let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody
-
-          // prepend the parameters count and quantization level
-          if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) {
-            let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : '';
-            if (model.details.quantization_level)
-              firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')');
-            if (model.size)
-              firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`;
-            if (baseModel.hasTools)
-              firstLine += ' [tools]';
-            if (baseModel.hasVision)
-              firstLine += ' [vision]';
-            description = firstLine + '\n\n' + description;
-          }
-
-          /* Find the context window from the 'num_ctx' line in the parameters string, if present
-           *  - https://github.com/enricoros/big-AGI/issues/309
-           *  - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096
-           *  - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future
-           */
-          let contextWindow = baseModel.contextWindow || 8192;
-          if (model.parameters) {
-            // split the parameters into lines, and find one called "num_ctx ...spaces... number"
-            const paramsNumCtx = model.parameters.split('\n').find(line => line.startsWith('num_ctx '));
-            if (paramsNumCtx) {
-              const numCtxValue: string = paramsNumCtx.split(/\s+/)[1];
-              if (numCtxValue) {
-                const numCtxNumber: number = parseInt(numCtxValue);
-                if (!isNaN(numCtxNumber))
-                  contextWindow = numCtxNumber;
-              }
-            }
-          }
-
-          // auto-detect interfaces from the hardcoded description (in turn parsed from the html page)
-          const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : [];
-          if (baseModel.hasTools)
-            interfaces.push(LLM_IF_OAI_Fn);
-          if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic
-            interfaces.push(LLM_IF_OAI_Vision);
-
-          // console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n');
-
-          return {
-            id: model.name,
-            label,
-            created: Date.parse(model.modified_at) ?? undefined,
-            updated: Date.parse(model.modified_at) ?? undefined,
-            description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown',
-            contextWindow,
-            ...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}),
-            interfaces,
-          };
-        }),
-      };
+      return { models };
    }),

 });
@@ -2,7 +2,7 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stor

 import type { ModelDescriptionSchema } from '../../llm.server.types';

-import { fromManualMapping, ManualMappings } from './models.data';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';

 // - Models & Pricing: https://www.alibabacloud.com/help/en/model-studio/models
 // - Billing Guide: https://www.alibabacloud.com/help/en/model-studio/billing-for-model-studio
@@ -11,7 +11,7 @@ import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.serv
 import type { OpenAIAccessSchema } from '../openai.router';
 import { fixupHost } from '../openai.router';

-import { fromManualMapping, ManualMappings } from './models.data';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';
 import { _fallbackOpenAIModel, _knownOpenAIChatModels } from './openai.models';


@@ -6,7 +6,7 @@ import { serverCapitalizeFirstLetter } from '~/server/wire';

 import type { ModelDescriptionSchema } from '../../llm.server.types';

-import { fromManualMapping, ManualMappings } from './models.data';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';


 export function chutesAIHeuristic(hostname: string) {
@@ -2,7 +2,7 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning }

 import type { ModelDescriptionSchema } from '../../llm.server.types';

-import { fromManualMapping, ManualMappings } from './models.data';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';


 const _knownDeepseekChatModels: ManualMappings = [
@@ -3,7 +3,7 @@ import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/w
 import { DModelInterfaceV1, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping, ManualMappings } from './models.data';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';


 const _fastAPIKnownModels: ManualMappings = [
@@ -4,8 +4,8 @@ import { serverCapitalizeFirstLetter } from '~/server/wire';

 import type { ModelDescriptionSchema } from '../../llm.server.types';

-import { fromManualMapping, ManualMappings } from './models.data';
-import { wireFireworksAIListOutputSchema } from '../fireworksai.wiretypes';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';
+import { wireFireworksAIListOutputSchema } from '../wiretypes/fireworksai.wiretypes';


 export function fireworksAIHeuristic(hostname: string) {
@@ -1,8 +1,8 @@
 import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping, ManualMappings } from './models.data';
-import { wireGroqModelsListOutputSchema } from '../groq.wiretypes';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';
+import { wireGroqModelsListOutputSchema } from '../wiretypes/groq.wiretypes';


 /**
@@ -1,7 +1,7 @@
 import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping } from './models.data';
+import { fromManualMapping } from '../../models.mappings';


 export function lmStudioModelToModelDescription(modelId: string): ModelDescriptionSchema {
@@ -1,8 +1,9 @@
 import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
-import { capitalizeFirstLetter } from '~/common/util/textUtils';
+
+import { serverCapitalizeFirstLetter } from '~/server/wire';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping, type ManualMappings } from './models.data';
+import { fromManualMapping, type ManualMappings } from '../../models.mappings';


 // [LocalAI]
@@ -37,7 +38,7 @@ export function localAIModelToModelDescription(modelId: string): ModelDescriptio
    .replace(' Q4_K_M', ' (Q4_K_M)')
    .replace(' F16', ' (F16)')
    .split(' ')
-    .map(capitalizeFirstLetter)
+    .map(serverCapitalizeFirstLetter)
    .join(' ');

  const description = `LocalAI model. File: ${modelId}`;
@@ -3,7 +3,7 @@ import * as z from 'zod/v4';
 import { LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping, ManualMappings } from './models.data';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';


 const IF_K2 = [
@@ -4,7 +4,7 @@ import { DModelInterfaceV1, LLM_IF_HOTFIX_NoTemperature, LLM_IF_HOTFIX_StripImag
 import { Release } from '~/common/app.release';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping, KnownModel, ManualMappings } from './models.data';
+import { fromManualMapping, KnownModel, ManualMappings } from '../../models.mappings';


 // OpenAI Model Variants
@@ -1,8 +1,8 @@
 import type { ModelDescriptionSchema } from '~/modules/llms/server/llm.server.types';
 import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
 import { _knownOpenAIChatModels } from '~/modules/llms/server/openai/models/openai.models';
-import { wireOpenPipeModelOutputSchema } from '~/modules/llms/server/openai/openpipe.wiretypes';
-import { fromManualMapping, KnownModel } from '~/modules/llms/server/openai/models/models.data';
+import { wireOpenPipeModelOutputSchema } from '~/modules/llms/server/openai/wiretypes/openpipe.wiretypes';
+import { fromManualMapping, KnownModel } from '~/modules/llms/server/models.mappings';

 const _knownOpenPipeChatModels: ModelDescriptionSchema[] = [

@@ -3,8 +3,8 @@ import * as z from 'zod/v4';
 import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping } from './models.data';
-import { wireOpenrouterModelsListOutputSchema } from '../openrouter.wiretypes';
+import { fromManualMapping } from '../../models.mappings';
+import { wireOpenrouterModelsListOutputSchema } from '../wiretypes/openrouter.wiretypes';


 // configuration
@@ -144,7 +144,7 @@ export function perplexityInjectVariants(models: ModelDescriptionSchema[], model
  return models;
 }

-export function perplexityAIModelDescriptions() {
+export function perplexityHardcodedModelDescriptions() {
  // Returns the list of known Perplexity models
  return _knownPerplexityChatModels;
 }
@@ -1,8 +1,8 @@
 import { LLM_IF_OAI_Chat, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping, ManualMappings } from './models.data';
-import { wireTogetherAIListOutputSchema } from '../togetherai.wiretypes';
+import { fromManualMapping, ManualMappings } from '../../models.mappings';
+import { wireTogetherAIListOutputSchema } from '../wiretypes/togetherai.wiretypes';


 // Note: 2025-01-28 - we used to have harcoded models here, but now we have a dynamic
@@ -5,7 +5,7 @@ import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
 import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
-import { fromManualMapping, KnownModel, ManualMappings } from './models.data';
+import { fromManualMapping, KnownModel, ManualMappings } from '../../models.mappings';
 import { openAIAccess, OpenAIAccessSchema } from '../openai.router';


@@ -179,7 +179,7 @@ const _knownXAIChatModels: ManualMappings = [


 // xAI Model Descriptions
-export async function xaiModelDescriptions(access: OpenAIAccessSchema): Promise<ModelDescriptionSchema[]> {
+export async function xaiFetchModelDescriptions(access: OpenAIAccessSchema): Promise<ModelDescriptionSchema[]> {

  // List models
  const { headers, url } = openAIAccess(access, null, '/v1/language-models');
@@ -7,31 +7,15 @@ import { fetchJsonOrTRPCThrow, TRPCFetcherError } from '~/server/trpc/trpc.route
 import { serverCapitalizeFirstLetter } from '~/server/wire';

 import type { T2ICreateImageAsyncStreamOp } from '~/modules/t2i/t2i.server';
+import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
 import { heartbeatsWhileAwaiting } from '~/modules/aix/server/dispatch/heartbeatsWhileAwaiting';

 import { Brand } from '~/common/app.config';

-import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
-
 import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types';
-import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models';
-import { azureDeploymentFilter, azureDeploymentToModelDescription, azureOpenAIAccess, azureParseFromDeploymentsAPI } from './models/azure.models';
-import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models';
-import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models';
-import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models';
-import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './models/fireworksai.models';
-import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './models/groq.models';
-import { lmStudioModelToModelDescription } from './models/lmstudio.models';
-import { localAIModelSortFn, localAIModelToModelDescription } from './models/localai.models';
-import { mistralModels } from './models/mistral.models';
-import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './models/moonshot.models';
-import { openaiDevCheckForModelsOverlap_DEV, openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
-import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './models/openpipe.models';
-import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
-import { perplexityAIModelDescriptions, perplexityInjectVariants } from './models/perplexity.models';
-import { togetherAIModelsToModelDescriptions } from './models/together.models';
-import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
-import { xaiModelDescriptions, xaiModelSort } from './models/xai.models';
+import { azureOpenAIAccess } from './models/azure.models';
+import { listModelsRunDispatch } from '../listModels.dispatch';
+import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './wiretypes/localai.wiretypes';


 const openAIDialects = z.enum([
@@ -181,142 +165,7 @@ export const llmOpenAIRouter = createTRPCRouter({

    .query(async ({ input: { access }, signal }): Promise<{ models: ModelDescriptionSchema[] }> => {

-      let models: ModelDescriptionSchema[];
-
-      // [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/guides/model-cards)
-      if (access.dialect === 'perplexity') {
-        models = perplexityAIModelDescriptions()
-          .reduce(perplexityInjectVariants, [] as ModelDescriptionSchema[]);
-        return { models };
-      }
-
-      // [xAI]: custom models listing
-      if (access.dialect === 'xai')
-        return { models: (await xaiModelDescriptions(access)).sort(xaiModelSort) };
-
-      // [OpenAI-dialects]: fetch openAI-style for all but Azure (will be then used in each dialect)
-      const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire_API_Models_List.Response>(access, '/v1/models', signal);
-
-      // [Together] missing the .data property
-      if (access.dialect === 'togetherai')
-        return { models: togetherAIModelsToModelDescriptions(openAIWireModelsResponse) };
-
-      let openAIModels = openAIWireModelsResponse?.data || [];
-
-      // de-duplicate by ids (can happen for local servers.. upstream bugs)
-      const preCount = openAIModels.length;
-      openAIModels = openAIModels.filter((model, index) => openAIModels.findIndex(m => m.id === model.id) === index);
-      if (preCount !== openAIModels.length)
-        console.warn(`openai.router.listModels: removed ${preCount - openAIModels.length} duplicate models for dialect ${access.dialect}`);
-
-      // sort by id
-      openAIModels.sort((a, b) => a.id.localeCompare(b.id));
-
-      // every dialect has a different way to enumerate models - we execute the mapping on the server side
-      switch (access.dialect) {
-
-        case 'alibaba':
-          models = openAIModels
-            .filter(({ id }) => alibabaModelFilter(id))
-            .map(({ id, created }) => alibabaModelToModelDescription(id, created))
-            .sort(alibabaModelSort);
-          break;
-
-        case 'azure':
-          const azureOpenAIDeployments = azureParseFromDeploymentsAPI(openAIModels);
-          models = azureOpenAIDeployments
-            .filter(azureDeploymentFilter)
-            .map(azureDeploymentToModelDescription)
-            .sort(openAISortModels);
-          break;
-
-        case 'deepseek':
-          models = openAIModels
-            .filter(({ id }) => deepseekModelFilter(id))
-            .map(({ id }) => deepseekModelToModelDescription(id))
-            .sort(deepseekModelSort);
-          break;
-
-        case 'groq':
-          models = openAIModels
-            .filter(groqModelFilter)
-            .map(groqModelToModelDescription)
-            .sort(groqModelSortFn);
-          break;
-
-        case 'lmstudio':
-          models = openAIModels
-            .map(({ id }) => lmStudioModelToModelDescription(id));
-          break;
-
-        // [LocalAI]: map id to label
-        case 'localai':
-          models = openAIModels
-            .map(({ id }) => localAIModelToModelDescription(id))
-            .sort(localAIModelSortFn);
-          break;
-
-        case 'mistral':
-          models = mistralModels(openAIModels);
-          break;
-
-        case 'moonshot':
-          models = openAIModels
-            .filter(moonshotModelFilter)
-            .map(moonshotModelToModelDescription)
-            .sort(moonshotModelSortFn);
-          break;
-
-        // [OpenAI]: chat-only models, custom sort, manual mapping
-        case 'openai':
-
-          // [ChutesAI] special case for model enumeration
-          if (chutesAIHeuristic(access.oaiHost))
-            return { models: chutesAIModelsToModelDescriptions(openAIModels) };
-
-          // [FireworksAI] special case for model enumeration
-          if (fireworksAIHeuristic(access.oaiHost))
-            return { models: fireworksAIModelsToModelDescriptions(openAIModels) };
-
-          // [FastChat] make the best of the little info
-          if (fastAPIHeuristic(openAIModels))
-            return { models: fastAPIModels(openAIModels) };
-
-          models = openAIModels
-
-            // limit to only 'gpt' and 'non instruct' models
-            .filter(openAIModelFilter)
-
-            // to model description
-            .map((model): ModelDescriptionSchema => openAIModelToModelDescription(model.id, model.created))
-
-            // inject variants
-            .reduce(openAIInjectVariants, [] as ModelDescriptionSchema[])
-
-            // custom OpenAI sort
-            .sort(openAISortModels);
-
-          // [DEV] check for superfluous and missing models
-          openaiDevCheckForModelsOverlap_DEV(openAIWireModelsResponse, models);
-          break;
-
-        case 'openpipe':
-          models = [
-            ...openAIModels.map(openPipeModelToModelDescriptions),
-            ...openPipeModelDescriptions().sort(openPipeModelSort),
-          ];
-          break;
-
-        case 'openrouter':
-          // openRouterStatTokenizers(openAIModels);
-          models = openAIModels
-            .sort(openRouterModelFamilySortFn)
-            .map(openRouterModelToModelDescription)
-            .filter(desc => !!desc)
-            .reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
-          break;
-
-      }
+      const models = await listModelsRunDispatch(access, signal);

      return { models };
    }),