mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
LLMs: Central Dispatch
This commit is contained in:
@@ -4,7 +4,7 @@ description: Update Alibaba model definitions with latest pricing and capabiliti
|
||||
|
||||
Update `src/modules/llms/server/openai/models/alibaba.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Models & Pricing: https://www.alibabacloud.com/help/en/model-studio/models
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update Anthropic model definitions with latest pricing and capabili
|
||||
|
||||
Update `src/modules/llms/server/anthropic/anthropic.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Models: https://docs.claude.com/en/docs/about-claude/models/overview
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update DeepSeek model definitions with latest pricing and capabilit
|
||||
|
||||
Update `src/modules/llms/server/openai/models/deepseek.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Pricing: https://api-docs.deepseek.com/quick_start/pricing
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update Gemini model definitions with latest pricing and capabilitie
|
||||
|
||||
Update `src/modules/llms/server/gemini/gemini.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.types.ts`, `src/modules/llms/server/llm.server.types.ts`, and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.types.ts`, `src/modules/llms/server/llm.server.types.ts`, and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Models: https://ai.google.dev/gemini-api/docs/models
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update Groq model definitions with latest pricing and capabilities
|
||||
|
||||
Update `src/modules/llms/server/openai/models/groq.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Models: https://console.groq.com/docs/models
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update Kimi model definitions with latest pricing and capabilities
|
||||
|
||||
Update `src/modules/llms/server/openai/models/moonshot.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Pricing: https://platform.moonshot.ai/docs/pricing/chat
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update Mistral model definitions with latest pricing and capabiliti
|
||||
|
||||
Update `src/modules/llms/server/openai/models/mistral.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Models: https://docs.mistral.ai/getting-started/models/models_overview/
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update Ollama model definitions with latest featured models
|
||||
|
||||
Update `src/modules/llms/server/ollama/ollama.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Automated Workflow:**
|
||||
```bash
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update OpenAI model definitions with latest pricing and capabilitie
|
||||
|
||||
Update `src/modules/llms/server/openai/models/openai.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Manual hint:** For pricing page, expand all tables before copying content.
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update OpenPipe model definitions with latest pricing and capabilit
|
||||
|
||||
Update `src/modules/llms/server/openai/models/openpipe.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Base Models: https://docs.openpipe.ai/base-models
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update Perplexity model definitions with latest pricing and capabil
|
||||
|
||||
Update `src/modules/llms/server/openai/models/perplexity.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Models: https://docs.perplexity.ai/getting-started/models
|
||||
|
||||
@@ -4,7 +4,7 @@ description: Update xAI model definitions with latest pricing and capabilities
|
||||
|
||||
Update `src/modules/llms/server/openai/models/xai.models.ts` with latest model definitions.
|
||||
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.data.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
Reference `src/modules/llms/server/llm.server.types.ts` and `src/modules/llms/server/models.mappings.ts` for context only. Focus on the model file, do not descend into other code.
|
||||
|
||||
**Primary Sources:**
|
||||
- Models & Pricing: https://docs.x.ai/docs/models?cluster=us-east-1#detailed-pricing-for-all-grok-models
|
||||
|
||||
@@ -54,7 +54,7 @@ If the running LocalAI instance is configured with a [Model Gallery](https://loc
|
||||
|
||||
At the time of writing, LocalAI does not publish the model `context window size`.
|
||||
Every model is assumed to be capable of chatting, and with a context window of 4096 tokens.
|
||||
Please update the [src/modules/llms/transports/server/openai/models/models.data.ts](../src/modules/llms/server/openai/models/models.data.ts)
|
||||
Please update the [src/modules/llms/server/models.mappings.ts](../src/modules/llms/server/models.mappings.ts)
|
||||
file with the mapping information between LocalAI model IDs and names/descriptions/tokens, etc.
|
||||
|
||||
# 🤝 Support
|
||||
|
||||
@@ -1,8 +1,15 @@
|
||||
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
import * as z from 'zod/v4';
|
||||
|
||||
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
|
||||
import { Release } from '~/common/app.release';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../llm.server.types';
|
||||
|
||||
|
||||
// configuration
|
||||
export const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild;
|
||||
|
||||
|
||||
const ANT_PAR_WEB: ModelDescriptionSchema['parameterSpecs'] = [
|
||||
{ paramId: 'llmVndAntWebSearch' },
|
||||
{ paramId: 'llmVndAntWebFetch' },
|
||||
@@ -239,3 +246,78 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
// retired: 'claude-2.1'
|
||||
// retired: 'claude-2.0'
|
||||
];
|
||||
|
||||
|
||||
// -- Wire Types --
|
||||
|
||||
/**
|
||||
* Namespace for the Anthropic API Models List response schema.
|
||||
* NOTE: not merged into AIX because of possible circular dependency issues - future work.
|
||||
*/
|
||||
export namespace AnthropicWire_API_Models_List {
|
||||
|
||||
export type ModelObject = z.infer<typeof ModelObject_schema>;
|
||||
const ModelObject_schema = z.object({
|
||||
type: z.literal('model'),
|
||||
id: z.string(),
|
||||
display_name: z.string(),
|
||||
created_at: z.string(),
|
||||
});
|
||||
|
||||
export const Response_schema = z.object({
|
||||
data: z.array(ModelObject_schema),
|
||||
has_more: z.boolean(),
|
||||
first_id: z.string().nullable(),
|
||||
last_id: z.string().nullable(),
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
// -- Helper Functions --
|
||||
|
||||
/**
|
||||
* DEV: Checks for obsoleted models that are defined in hardcodedAnthropicModels but no longer present in the API.
|
||||
* Similar to Gemini's geminiDevCheckForSuperfluousModels_DEV.
|
||||
*/
|
||||
export function llmsAntDevCheckForObsoletedModels_DEV(availableModels: AnthropicWire_API_Models_List.ModelObject[]): void {
|
||||
if (DEV_DEBUG_ANTHROPIC_MODELS) {
|
||||
const apiModelIds = new Set(availableModels.map(m => m.id));
|
||||
const obsoletedModels = hardcodedAnthropicModels.filter(m => !apiModelIds.has(m.id));
|
||||
if (obsoletedModels.length > 0)
|
||||
console.log(`[DEV] Anthropic: obsoleted model definitions: [ ${obsoletedModels.map(m => m.id).join(', ')} ]`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a placeholder ModelDescriptionSchema for Anthropic models not in the hardcoded list.
|
||||
* Uses sensible defaults with the newest available interfaces for day-0 support.
|
||||
*/
|
||||
export function llmsAntCreatePlaceholderModel(model: AnthropicWire_API_Models_List.ModelObject): ModelDescriptionSchema {
|
||||
return {
|
||||
id: model.id,
|
||||
label: model.display_name,
|
||||
created: Math.round(new Date(model.created_at).getTime() / 1000),
|
||||
description: 'Newest model, description not available yet.',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 8192,
|
||||
trainingDataCutoff: 'Latest',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
|
||||
// chatPrice: ...
|
||||
// benchmark: ...
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Injects the LLM_IF_Tools_WebSearch interface for models that have web search/fetch parameters.
|
||||
* This allows the UI to show the web search indicator automatically based on model capabilities.
|
||||
*/
|
||||
export function llmsAntInjectWebSearchInterface(model: ModelDescriptionSchema): ModelDescriptionSchema {
|
||||
const hasWebParams = model.parameterSpecs?.some(spec =>
|
||||
spec.paramId === 'llmVndAntWebSearch' || spec.paramId === 'llmVndAntWebFetch',
|
||||
);
|
||||
return (hasWebParams && !model.interfaces?.includes(LLM_IF_Tools_WebSearch)) ? {
|
||||
...model,
|
||||
interfaces: [...model.interfaces, LLM_IF_Tools_WebSearch],
|
||||
} : model;
|
||||
}
|
||||
|
||||
@@ -5,21 +5,15 @@ import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
|
||||
import { env } from '~/server/env';
|
||||
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
|
||||
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
|
||||
import { Release } from '~/common/app.release';
|
||||
|
||||
import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types';
|
||||
|
||||
import { hardcodedAnthropicModels, hardcodedAnthropicVariants } from './anthropic.models';
|
||||
import { fixupHost } from '~/modules/llms/server/openai/openai.router';
|
||||
import { ListModelsResponse_schema } from '../llm.server.types';
|
||||
import { fixupHost } from '../openai/openai.router';
|
||||
import { listModelsRunDispatch } from '../listModels.dispatch';
|
||||
|
||||
|
||||
// configuration and defaults
|
||||
const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
|
||||
const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com';
|
||||
|
||||
const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild;
|
||||
|
||||
const DEFAULT_ANTHROPIC_HEADERS = {
|
||||
// Latest version hasn't changed (as of Feb 2025)
|
||||
'anthropic-version': '2023-06-01',
|
||||
@@ -165,10 +159,6 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string,
|
||||
};
|
||||
}
|
||||
|
||||
function roundTime(date: string) {
|
||||
return Math.round(new Date(date).getTime() / 1000);
|
||||
}
|
||||
|
||||
|
||||
// Input Schemas
|
||||
|
||||
@@ -185,23 +175,6 @@ const listModelsInputSchema = z.object({
|
||||
});
|
||||
|
||||
|
||||
// Helpers
|
||||
|
||||
/**
|
||||
* Injects the LLM_IF_Tools_WebSearch interface for models that have web search/fetch parameters.
|
||||
* This allows the UI to show the web search indicator automatically based on model capabilities.
|
||||
*/
|
||||
function _injectWebSearchInterface(model: ModelDescriptionSchema): ModelDescriptionSchema {
|
||||
const hasWebParams = model.parameterSpecs?.some(spec =>
|
||||
spec.paramId === 'llmVndAntWebSearch' || spec.paramId === 'llmVndAntWebFetch'
|
||||
);
|
||||
return (hasWebParams && !model.interfaces?.includes(LLM_IF_Tools_WebSearch)) ? {
|
||||
...model,
|
||||
interfaces: [...model.interfaces, LLM_IF_Tools_WebSearch],
|
||||
} : model;
|
||||
}
|
||||
|
||||
|
||||
// Router
|
||||
|
||||
export const llmAnthropicRouter = createTRPCRouter({
|
||||
@@ -210,81 +183,9 @@ export const llmAnthropicRouter = createTRPCRouter({
|
||||
listModels: publicProcedure
|
||||
.input(listModelsInputSchema)
|
||||
.output(ListModelsResponse_schema)
|
||||
.query(async ({ input: { access } }) => {
|
||||
.query(async ({ input: { access }, signal }) => {
|
||||
|
||||
// get the models
|
||||
const wireModels = await anthropicGETOrThrow(access, '/v1/models?limit=1000');
|
||||
const { data: availableModels } = AnthropicWire_API_Models_List.Response_schema.parse(wireModels);
|
||||
|
||||
// sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out
|
||||
const familyPrecedence = ['-4-7-', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
|
||||
const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
|
||||
|
||||
const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f));
|
||||
const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c));
|
||||
|
||||
// cast the models to the common schema
|
||||
const models = availableModels
|
||||
.sort((a, b) => {
|
||||
const familyA = getFamilyIdx(a.id);
|
||||
const familyB = getFamilyIdx(b.id);
|
||||
const classA = getClassIdx(a.id);
|
||||
const classB = getClassIdx(b.id);
|
||||
|
||||
// family desc (lower index = better, -1 = unknown goes last)
|
||||
if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB);
|
||||
// class desc
|
||||
if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
|
||||
// date desc (newer first) - string comparison works since format is YYYYMMDD
|
||||
return b.id.localeCompare(a.id);
|
||||
})
|
||||
.reduce((acc, model) => {
|
||||
|
||||
// find the model description
|
||||
const hardcodedModel = hardcodedAnthropicModels.find(m => m.id === model.id);
|
||||
if (hardcodedModel) {
|
||||
|
||||
// update creation date
|
||||
if (!hardcodedModel.created && model.created_at)
|
||||
hardcodedModel.created = roundTime(model.created_at);
|
||||
|
||||
// add FIRST a thinking variant, if defined
|
||||
if (hardcodedAnthropicVariants[model.id])
|
||||
acc.push({
|
||||
...hardcodedModel,
|
||||
...hardcodedAnthropicVariants[model.id],
|
||||
});
|
||||
|
||||
// add the base model
|
||||
acc.push(hardcodedModel);
|
||||
|
||||
} else {
|
||||
|
||||
// for day-0 support of new models, create a placeholder model using sensible defaults
|
||||
const novelModel = _createPlaceholderModel(model);
|
||||
// if (DEV_DEBUG_ANTHROPIC_MODELS) // kind of important...
|
||||
console.log('[DEV] anthropic.router: new model found, please configure it:', novelModel.id);
|
||||
acc.push(novelModel);
|
||||
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, [] as ModelDescriptionSchema[])
|
||||
.map(_injectWebSearchInterface);
|
||||
|
||||
// developers warning for obsoleted models (we have them, but they are not in the API response anymore)
|
||||
if (DEV_DEBUG_ANTHROPIC_MODELS) {
|
||||
const apiModelIds = new Set(availableModels.map(m => m.id));
|
||||
const additionalModels = hardcodedAnthropicModels.filter(m => !apiModelIds.has(m.id));
|
||||
if (additionalModels.length > 0)
|
||||
console.log('[DEV] anthropic.router: obsoleted models:', additionalModels.map(m => m.id).join(', '));
|
||||
}
|
||||
|
||||
// additionalModels.forEach(m => {
|
||||
// m.label += ' (Removed)';
|
||||
// m.isLegacy = true;
|
||||
// });
|
||||
// models.push(...additionalModels);
|
||||
const models = await listModelsRunDispatch(access, signal);
|
||||
|
||||
return { models };
|
||||
}),
|
||||
@@ -328,47 +229,3 @@ export const llmAnthropicRouter = createTRPCRouter({
|
||||
}),
|
||||
|
||||
});
|
||||
|
||||
|
||||
/**
|
||||
* Create a placeholder ModelDescriptionSchema for models not in the hardcoded list,
|
||||
* using sensible defaults with the newest available interfaces.
|
||||
*/
|
||||
function _createPlaceholderModel(model: AnthropicWire_API_Models_List.ModelObject): ModelDescriptionSchema {
|
||||
return {
|
||||
id: model.id,
|
||||
label: model.display_name,
|
||||
created: Math.round(new Date(model.created_at).getTime() / 1000),
|
||||
description: 'Newest model, description not available yet.',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 8192,
|
||||
trainingDataCutoff: 'Latest',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
|
||||
// chatPrice: ...
|
||||
// benchmark: ...
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Namespace for the Anthropic API Models List response schema.
|
||||
* NOTE: not merged into AIX because of possible circular dependency issues - future work.
|
||||
*/
|
||||
namespace AnthropicWire_API_Models_List {
|
||||
|
||||
export type ModelObject = z.infer<typeof ModelObject_schema>;
|
||||
const ModelObject_schema = z.object({
|
||||
type: z.literal('model'),
|
||||
id: z.string(),
|
||||
display_name: z.string(),
|
||||
created_at: z.string(),
|
||||
});
|
||||
|
||||
export type Response = z.infer<typeof Response_schema>;
|
||||
export const Response_schema = z.object({
|
||||
data: z.array(ModelObject_schema),
|
||||
has_more: z.boolean(),
|
||||
first_id: z.string().nullable(),
|
||||
last_id: z.string().nullable(),
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@@ -7,11 +7,11 @@ import packageJson from '../../../../../package.json';
|
||||
import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
|
||||
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
|
||||
import { GeminiWire_API_Models_List, GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
|
||||
import { GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
|
||||
|
||||
import { ListModelsResponse_schema } from '../llm.server.types';
|
||||
import { geminiDevCheckForParserMisses_DEV, geminiDevCheckForSuperfluousModels_DEV, geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels } from './gemini.models';
|
||||
import { fixupHost } from '~/modules/llms/server/openai/openai.router';
|
||||
import { fixupHost } from '../openai/openai.router';
|
||||
import { listModelsRunDispatch } from '../listModels.dispatch';
|
||||
|
||||
|
||||
// Default hosts
|
||||
@@ -93,29 +93,11 @@ export const llmGeminiRouter = createTRPCRouter({
|
||||
listModels: publicProcedure
|
||||
.input(accessOnlySchema)
|
||||
.output(ListModelsResponse_schema)
|
||||
.query(async ({ input }) => {
|
||||
.query(async ({ input, signal }) => {
|
||||
|
||||
// get the models
|
||||
const wireModels = await geminiGET(input.access, null, GeminiWire_API_Models_List.getPath, false);
|
||||
const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models;
|
||||
geminiDevCheckForParserMisses_DEV(wireModels, detailedModels);
|
||||
geminiDevCheckForSuperfluousModels_DEV(detailedModels.map(model => model.name));
|
||||
const models = await listModelsRunDispatch(input.access, signal);
|
||||
|
||||
// NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro).,
|
||||
// as the List API already all the info on all the models
|
||||
|
||||
// first filter from the original list
|
||||
const filteredModels = detailedModels.filter(geminiFilterModels);
|
||||
|
||||
// map to our output schema
|
||||
const models = filteredModels
|
||||
.map(geminiModelToModelDescription)
|
||||
.filter(model => !!model)
|
||||
.sort(geminiSortModels);
|
||||
|
||||
return {
|
||||
models: geminiModelsAddVariants(models),
|
||||
};
|
||||
return { models };
|
||||
}),
|
||||
|
||||
});
|
||||
|
||||
@@ -0,0 +1,425 @@
|
||||
import { TRPCError } from '@trpc/server';
|
||||
|
||||
import type { AixAPI_Access } from '~/modules/aix/server/api/aix.wiretypes';
|
||||
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
import { serverCapitalizeFirstLetter } from '~/server/wire';
|
||||
|
||||
import type { ModelDescriptionSchema } from './llm.server.types';
|
||||
|
||||
|
||||
// protocol: Anthropic
|
||||
import { AnthropicWire_API_Models_List, hardcodedAnthropicModels, hardcodedAnthropicVariants, llmsAntCreatePlaceholderModel, llmsAntDevCheckForObsoletedModels_DEV, llmsAntInjectWebSearchInterface } from './anthropic/anthropic.models';
|
||||
import { anthropicAccess } from './anthropic/anthropic.router';
|
||||
|
||||
// protocol: Gemini
|
||||
import { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
|
||||
import { geminiAccess } from './gemini/gemini.router';
|
||||
import { geminiDevCheckForParserMisses_DEV, geminiDevCheckForSuperfluousModels_DEV, geminiFilterModels, geminiModelsAddVariants, geminiModelToModelDescription, geminiSortModels } from './gemini/gemini.models';
|
||||
|
||||
// protocol: Ollama
|
||||
import { OLLAMA_BASE_MODELS } from './ollama/ollama.models';
|
||||
import { ollamaAccess } from './ollama/ollama.router';
|
||||
import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama/ollama.wiretypes';
|
||||
|
||||
// protocol: OpenAI-compatible
|
||||
import { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
|
||||
import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './openai/models/alibaba.models';
|
||||
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './openai/models/azure.models';
|
||||
import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './openai/models/chutesai.models';
|
||||
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './openai/models/deepseek.models';
|
||||
import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models';
|
||||
import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models';
|
||||
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './openai/models/groq.models';
|
||||
import { lmStudioModelToModelDescription } from './openai/models/lmstudio.models';
|
||||
import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models';
|
||||
import { mistralModels } from './openai/models/mistral.models';
|
||||
import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models';
|
||||
import { openAIAccess } from './openai/openai.router';
|
||||
import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './openai/models/openpipe.models';
|
||||
import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './openai/models/openrouter.models';
|
||||
import { openaiDevCheckForModelsOverlap_DEV, openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './openai/models/openai.models';
|
||||
import { perplexityHardcodedModelDescriptions, perplexityInjectVariants } from './openai/models/perplexity.models';
|
||||
import { togetherAIModelsToModelDescriptions } from './openai/models/together.models';
|
||||
import { xaiFetchModelDescriptions, xaiModelSort } from './openai/models/xai.models';
|
||||
|
||||
|
||||
// -- Dispatch types --
|
||||
|
||||
export type ListModelsDispatch<TWireModels = any> = {
|
||||
fetchModels: () => Promise<TWireModels>;
|
||||
convertToDescriptions: (wireModels: TWireModels) => ModelDescriptionSchema[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper to create a dispatch with proper type inference.
|
||||
* TypeScript will infer TWireModels from fetchModels return type and enforce it in convertToDescriptions.
|
||||
*/
|
||||
function createDispatch<T>(dispatch: ListModelsDispatch<T>): ListModelsDispatch<T> {
|
||||
return dispatch;
|
||||
}
|
||||
|
||||
|
||||
// -- Specialized Implementations -- Core of Server-side LLM Model Listing abstraction --
|
||||
|
||||
export async function listModelsRunDispatch(access: AixAPI_Access, signal?: AbortSignal) {
|
||||
const dispatch = _listModelsCreateDispatch(access, signal);
|
||||
const wireModels = await dispatch.fetchModels();
|
||||
return dispatch.convertToDescriptions(wireModels);
|
||||
}
|
||||
|
||||
/**
|
||||
* Specializes to the correct vendor a request for listing models.
|
||||
* This follows the same pattern as AIX's chatGenerate dispatcher for consistency.
|
||||
*/
|
||||
function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal): ListModelsDispatch {
|
||||
|
||||
// dialect is the only common property
|
||||
const { dialect } = access;
|
||||
|
||||
switch (dialect) {
|
||||
|
||||
case 'anthropic': {
|
||||
return createDispatch({
|
||||
fetchModels: async () => {
|
||||
const { headers, url } = anthropicAccess(access, '/v1/models?limit=1000', {/* ... no options for list ... */ });
|
||||
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal });
|
||||
return AnthropicWire_API_Models_List.Response_schema.parse(wireModels);
|
||||
},
|
||||
convertToDescriptions: (wireModelsResponse) => {
|
||||
const { data: availableModels } = wireModelsResponse;
|
||||
|
||||
// sort by: family (desc) > class (desc) > date (desc) -- Future NOTE: -5- will match -4-5- and -3-5-.. figure something else out
|
||||
const familyPrecedence = ['-4-7-', '-4-5-', '-4-1-', '-4-', '-3-7-', '-3-5-', '-3-'];
|
||||
const classPrecedence = ['-opus-', '-sonnet-', '-haiku-'];
|
||||
|
||||
const getFamilyIdx = (id: string) => familyPrecedence.findIndex(f => id.includes(f));
|
||||
const getClassIdx = (id: string) => classPrecedence.findIndex(c => id.includes(c));
|
||||
|
||||
// cast the models to the common schema
|
||||
const models = availableModels
|
||||
.sort((a, b) => {
|
||||
const familyA = getFamilyIdx(a.id);
|
||||
const familyB = getFamilyIdx(b.id);
|
||||
const classA = getClassIdx(a.id);
|
||||
const classB = getClassIdx(b.id);
|
||||
|
||||
// family desc (lower index = better, -1 = unknown goes last)
|
||||
if (familyA !== familyB) return (familyA === -1 ? 999 : familyA) - (familyB === -1 ? 999 : familyB);
|
||||
// class desc
|
||||
if (classA !== classB) return (classA === -1 ? 999 : classA) - (classB === -1 ? 999 : classB);
|
||||
// date desc (newer first) - string comparison works since format is YYYYMMDD
|
||||
return b.id.localeCompare(a.id);
|
||||
})
|
||||
.reduce((acc: ModelDescriptionSchema[], model) => {
|
||||
// find the model description
|
||||
const hardcodedModel = hardcodedAnthropicModels.find(m => m.id === model.id);
|
||||
if (hardcodedModel) {
|
||||
|
||||
// update creation date
|
||||
function roundTime(date: string) {
|
||||
return Math.round(new Date(date).getTime() / 1000);
|
||||
}
|
||||
|
||||
if (!hardcodedModel.created && model.created_at)
|
||||
hardcodedModel.created = roundTime(model.created_at);
|
||||
|
||||
// add FIRST a thinking variant, if defined
|
||||
if (hardcodedAnthropicVariants[model.id])
|
||||
acc.push({
|
||||
...hardcodedModel,
|
||||
...hardcodedAnthropicVariants[model.id],
|
||||
});
|
||||
|
||||
// add the base model
|
||||
acc.push(hardcodedModel);
|
||||
} else {
|
||||
// for day-0 support of new models, create a placeholder model using sensible defaults
|
||||
const novelModel = llmsAntCreatePlaceholderModel(model);
|
||||
// if (DEV_DEBUG_ANTHROPIC_MODELS) // kind of important...
|
||||
console.log('[DEV] anthropic.router: new model found, please configure it:', novelModel.id);
|
||||
acc.push(novelModel);
|
||||
}
|
||||
return acc;
|
||||
}, [] as ModelDescriptionSchema[])
|
||||
.map(llmsAntInjectWebSearchInterface);
|
||||
|
||||
// [DEV] check for obsoleted models (defined but no longer in API response)
|
||||
llmsAntDevCheckForObsoletedModels_DEV(availableModels);
|
||||
|
||||
return models;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
case 'gemini': {
|
||||
return createDispatch({
|
||||
fetchModels: async () => {
|
||||
const { headers, url } = geminiAccess(access, null, GeminiWire_API_Models_List.getPath, false);
|
||||
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Gemini', signal });
|
||||
const detailedModels = GeminiWire_API_Models_List.Response_schema.parse(wireModels).models;
|
||||
|
||||
// [DEV] check for missing or superfluous models
|
||||
geminiDevCheckForParserMisses_DEV(wireModels, detailedModels);
|
||||
geminiDevCheckForSuperfluousModels_DEV(detailedModels.map((model: any) => model.name));
|
||||
|
||||
return detailedModels;
|
||||
},
|
||||
convertToDescriptions: (detailedModels) => {
|
||||
// NOTE: no need to retrieve info for each of the models (e.g. /v1beta/model/gemini-pro),
|
||||
// as the List API already has all the info on all the models
|
||||
|
||||
// first filter from the original list
|
||||
const filteredModels = detailedModels.filter(geminiFilterModels);
|
||||
|
||||
// map to our output schema
|
||||
const models = filteredModels
|
||||
.map(geminiModelToModelDescription)
|
||||
.filter(model => !!model)
|
||||
.sort(geminiSortModels);
|
||||
return geminiModelsAddVariants(models);
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
case 'ollama': {
|
||||
return createDispatch({
|
||||
fetchModels: async () => {
|
||||
const { headers, url } = ollamaAccess(access, '/api/tags');
|
||||
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama', signal });
|
||||
const models = wireOllamaListModelsSchema.parse(wireModels).models;
|
||||
|
||||
// retrieve info for each of the models
|
||||
return await Promise.all(models.map(async (model) => {
|
||||
|
||||
// perform /api/show on each model to get detailed info
|
||||
const { headers, url } = ollamaAccess(access, '/api/show');
|
||||
const wireModelInfo = await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body: { 'name': model.name }, name: 'Ollama', signal });
|
||||
|
||||
const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo);
|
||||
return { ...model, ...modelInfo };
|
||||
}));
|
||||
},
|
||||
convertToDescriptions: (detailedModels) => {
|
||||
return detailedModels.map((model) => {
|
||||
// the model name is in the format "name:tag" (default tag = 'latest')
|
||||
const [modelName, modelTag] = model.name.split(':');
|
||||
|
||||
// pretty label and description
|
||||
const label = serverCapitalizeFirstLetter(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : '');
|
||||
const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {};
|
||||
let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody
|
||||
|
||||
// prepend the parameters count and quantization level
|
||||
if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) {
|
||||
let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : '';
|
||||
if (model.details.quantization_level)
|
||||
firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')');
|
||||
if (model.size)
|
||||
firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`;
|
||||
if (baseModel.hasTools)
|
||||
firstLine += ' [tools]';
|
||||
if (baseModel.hasVision)
|
||||
firstLine += ' [vision]';
|
||||
description = firstLine + '\n\n' + description;
|
||||
}
|
||||
|
||||
/* Find the context window from the 'num_ctx' line in the parameters string, if present
|
||||
* - https://github.com/enricoros/big-AGI/issues/309
|
||||
* - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096
|
||||
* - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future
|
||||
*/
|
||||
let contextWindow = baseModel.contextWindow || 8192;
|
||||
if (model.parameters) {
|
||||
// split the parameters into lines, and find one called "num_ctx ...spaces... number"
|
||||
const paramsNumCtx = model.parameters.split('\n').find((line) => line.startsWith('num_ctx '));
|
||||
if (paramsNumCtx) {
|
||||
const numCtxValue: string = paramsNumCtx.split(/\s+/)[1];
|
||||
if (numCtxValue) {
|
||||
const numCtxNumber: number = parseInt(numCtxValue);
|
||||
if (!isNaN(numCtxNumber))
|
||||
contextWindow = numCtxNumber;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// auto-detect interfaces from the hardcoded description (in turn parsed from the html page)
|
||||
const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : [];
|
||||
if (baseModel.hasTools)
|
||||
interfaces.push(LLM_IF_OAI_Fn);
|
||||
if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic
|
||||
interfaces.push(LLM_IF_OAI_Vision);
|
||||
|
||||
// console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n');
|
||||
|
||||
return {
|
||||
id: model.name,
|
||||
label,
|
||||
created: Date.parse(model.modified_at) ?? undefined,
|
||||
updated: Date.parse(model.modified_at) ?? undefined,
|
||||
description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown',
|
||||
contextWindow,
|
||||
...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}),
|
||||
interfaces,
|
||||
};
|
||||
});
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
case 'perplexity':
|
||||
// [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/getting-started/pricing#sonar-models-chat-completions)
|
||||
return createDispatch({
|
||||
fetchModels: async () => null,
|
||||
convertToDescriptions: () => perplexityHardcodedModelDescriptions().reduce(perplexityInjectVariants, []),
|
||||
});
|
||||
|
||||
case 'xai':
|
||||
// [xAI]: custom models listing
|
||||
return createDispatch({
|
||||
fetchModels: async () => xaiFetchModelDescriptions(access),
|
||||
convertToDescriptions: models => models.sort(xaiModelSort),
|
||||
});
|
||||
|
||||
case 'alibaba':
|
||||
case 'azure':
|
||||
case 'deepseek':
|
||||
case 'groq':
|
||||
case 'lmstudio':
|
||||
case 'localai':
|
||||
case 'mistral':
|
||||
case 'moonshot':
|
||||
case 'openai':
|
||||
case 'openpipe':
|
||||
case 'openrouter':
|
||||
case 'togetherai':
|
||||
return createDispatch({
|
||||
|
||||
// [OpenAI-compatible dialects]: fetch openAI-style /v1/models API
|
||||
fetchModels: async () => {
|
||||
const { headers, url } = openAIAccess(access, null, '/v1/models');
|
||||
return fetchJsonOrTRPCThrow<OpenAIWire_API_Models_List.Response>({ url, headers, name: `OpenAI/${serverCapitalizeFirstLetter(dialect)}`, signal });
|
||||
},
|
||||
|
||||
// OpenAI models conversions: dependent on the dialect
|
||||
convertToDescriptions: (openAIWireModelsResponse) => {
|
||||
|
||||
// [Together] missing the .data property - so we have to do this early
|
||||
if (dialect === 'togetherai')
|
||||
return togetherAIModelsToModelDescriptions(openAIWireModelsResponse);
|
||||
|
||||
// NOTE: we don't zod here as it would strip unknown properties needed for some dialects - so we proceed optimistically
|
||||
// let maybeModels = OpenAIWire_API_Models_List.Response_schema.parse(openAIWireModelsResponse).data || [];
|
||||
let maybeModels = openAIWireModelsResponse?.data || [];
|
||||
|
||||
// de-duplicate by ids (can happen for local servers.. upstream bugs)
|
||||
const preCount = maybeModels.length;
|
||||
maybeModels = maybeModels.filter((model, index) => maybeModels.findIndex(m => m.id === model.id) === index);
|
||||
if (preCount !== maybeModels.length)
|
||||
console.warn(`openai.router.listModels: removed ${preCount - maybeModels.length} duplicate models for dialect ${dialect}`);
|
||||
|
||||
// sort by id
|
||||
maybeModels.sort((a, b) => a.id.localeCompare(b.id));
|
||||
|
||||
// every dialect has a different way to enumerate models - we execute the mapping on the server side
|
||||
switch (dialect) {
|
||||
case 'alibaba':
|
||||
return maybeModels
|
||||
.filter(({ id }) => alibabaModelFilter(id))
|
||||
.map(({ id, created }) => alibabaModelToModelDescription(id, created))
|
||||
.sort(alibabaModelSort);
|
||||
|
||||
case 'azure':
|
||||
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(maybeModels);
|
||||
return azureOpenAIDeployments
|
||||
.filter(azureDeploymentFilter)
|
||||
.map(azureDeploymentToModelDescription)
|
||||
.sort(openAISortModels);
|
||||
|
||||
case 'deepseek':
|
||||
return maybeModels
|
||||
.filter(({ id }) => deepseekModelFilter(id))
|
||||
.map(({ id }) => deepseekModelToModelDescription(id))
|
||||
.sort(deepseekModelSort);
|
||||
|
||||
case 'groq':
|
||||
return maybeModels
|
||||
.filter(groqModelFilter)
|
||||
.map(groqModelToModelDescription)
|
||||
.sort(groqModelSortFn);
|
||||
|
||||
case 'lmstudio':
|
||||
return maybeModels
|
||||
.map(({ id }) => lmStudioModelToModelDescription(id));
|
||||
|
||||
case 'localai':
|
||||
return maybeModels
|
||||
.map(({ id }) => localAIModelToModelDescription(id))
|
||||
.sort(localAIModelSortFn);
|
||||
|
||||
case 'mistral':
|
||||
return mistralModels(maybeModels);
|
||||
|
||||
case 'moonshot':
|
||||
return maybeModels
|
||||
.filter(moonshotModelFilter)
|
||||
.map(moonshotModelToModelDescription)
|
||||
.sort(moonshotModelSortFn);
|
||||
|
||||
case 'openai':
|
||||
// [ChutesAI] special case for model enumeration
|
||||
const oaiHost = access.oaiHost;
|
||||
if (chutesAIHeuristic(oaiHost))
|
||||
return chutesAIModelsToModelDescriptions(maybeModels);
|
||||
|
||||
// [FireworksAI] special case for model enumeration
|
||||
if (fireworksAIHeuristic(oaiHost))
|
||||
return fireworksAIModelsToModelDescriptions(maybeModels);
|
||||
|
||||
// [FastChat] make the best of the little info
|
||||
if (fastAPIHeuristic(maybeModels))
|
||||
return fastAPIModels(maybeModels);
|
||||
|
||||
// [OpenAI or OpenAI-compatible]: chat-only models, custom sort, manual mapping
|
||||
const models = maybeModels
|
||||
// limit to only 'gpt' and 'non instruct' models
|
||||
.filter(openAIModelFilter)
|
||||
// to model description
|
||||
.map((model: any): ModelDescriptionSchema => openAIModelToModelDescription(model.id, model.created))
|
||||
// inject variants
|
||||
.reduce(openAIInjectVariants, [] as ModelDescriptionSchema[])
|
||||
// custom OpenAI sort
|
||||
.sort(openAISortModels);
|
||||
|
||||
// [DEV] check for superfluous and missing models
|
||||
openaiDevCheckForModelsOverlap_DEV(maybeModels, models);
|
||||
return models;
|
||||
|
||||
case 'openpipe':
|
||||
return [
|
||||
...maybeModels.map(openPipeModelToModelDescriptions),
|
||||
...openPipeModelDescriptions().sort(openPipeModelSort),
|
||||
];
|
||||
|
||||
case 'openrouter':
|
||||
// openRouterStatTokenizers(maybeModels);
|
||||
return maybeModels
|
||||
.sort(openRouterModelFamilySortFn)
|
||||
.map(openRouterModelToModelDescription)
|
||||
.filter(desc => !!desc)
|
||||
.reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
|
||||
|
||||
default:
|
||||
const _exhaustiveCheck: never = dialect;
|
||||
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unhandled dialect: ${dialect}` });
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
default:
|
||||
const _exhaustiveCheck: never = dialect;
|
||||
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `Unsupported dialect: ${dialect}` });
|
||||
}
|
||||
}
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import type { ModelDescriptionSchema } from './llm.server.types';
|
||||
|
||||
|
||||
// -- Manual model mappings: types and helper --
|
||||
@@ -3,23 +3,18 @@ import { TRPCError } from '@trpc/server';
|
||||
|
||||
import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
|
||||
import { env } from '~/server/env';
|
||||
import { fetchJsonOrTRPCThrow, fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
import { capitalizeFirstLetter } from '~/common/util/textUtils';
|
||||
import { fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
import { serverCapitalizeFirstLetter } from '~/server/wire';
|
||||
|
||||
import { ListModelsResponse_schema } from '../llm.server.types';
|
||||
import { fixupHost } from '../openai/openai.router';
|
||||
import { listModelsRunDispatch } from '../listModels.dispatch';
|
||||
|
||||
import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
|
||||
import { wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama.wiretypes';
|
||||
import { fixupHost } from '~/modules/llms/server/openai/openai.router';
|
||||
|
||||
|
||||
// Default hosts
|
||||
// configuration
|
||||
const DEFAULT_OLLAMA_HOST = 'http://127.0.0.1:11434';
|
||||
// export const OLLAMA_PATH_CHAT = '/api/chat';
|
||||
const OLLAMA_PATH_TAGS = '/api/tags';
|
||||
const OLLAMA_PATH_SHOW = '/api/show';
|
||||
|
||||
|
||||
// Mappers
|
||||
@@ -84,15 +79,15 @@ export function ollamaCompletionPayload(model: OpenAIModelSchema, history: OpenA
|
||||
};
|
||||
}*/
|
||||
|
||||
async function ollamaGET<TOut extends object>(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
const { headers, url } = ollamaAccess(access, apiPath);
|
||||
return await fetchJsonOrTRPCThrow<TOut>({ url, headers, name: 'Ollama' });
|
||||
}
|
||||
// async function ollamaGET<TOut extends object>(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
// const { headers, url } = ollamaAccess(access, apiPath);
|
||||
// return await fetchJsonOrTRPCThrow<TOut>({ url, headers, name: 'Ollama' });
|
||||
// }
|
||||
|
||||
async function ollamaPOST<TOut extends object, TPostBody extends object>(access: OllamaAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
const { headers, url } = ollamaAccess(access, apiPath);
|
||||
return await fetchJsonOrTRPCThrow<TOut, TPostBody>({ url, method: 'POST', headers, body, name: 'Ollama' });
|
||||
}
|
||||
// async function ollamaPOST<TOut extends object, TPostBody extends object>(access: OllamaAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
// const { headers, url } = ollamaAccess(access, apiPath);
|
||||
// return await fetchJsonOrTRPCThrow<TOut, TPostBody>({ url, method: 'POST', headers, body, name: 'Ollama' });
|
||||
// }
|
||||
|
||||
|
||||
// Input/Output Schemas
|
||||
@@ -137,7 +132,7 @@ export const llmOllamaRouter = createTRPCRouter({
|
||||
return {
|
||||
pullableModels: Object.entries(OLLAMA_BASE_MODELS).map(([model_id, model]) => ({
|
||||
id: model_id,
|
||||
label: capitalizeFirstLetter(model_id),
|
||||
label: serverCapitalizeFirstLetter(model_id),
|
||||
tag: 'latest',
|
||||
tags: model.tags?.length ? model.tags : [],
|
||||
description: '', // model.description, // REMOVED description - bloated and not used by nobody
|
||||
@@ -185,83 +180,11 @@ export const llmOllamaRouter = createTRPCRouter({
|
||||
listModels: publicProcedure
|
||||
.input(accessOnlySchema)
|
||||
.output(ListModelsResponse_schema)
|
||||
.query(async ({ input }) => {
|
||||
.query(async ({ input, signal }) => {
|
||||
|
||||
// get the models
|
||||
const wireModels = await ollamaGET(input.access, OLLAMA_PATH_TAGS);
|
||||
let models = wireOllamaListModelsSchema.parse(wireModels).models;
|
||||
const models = await listModelsRunDispatch(input.access, signal);
|
||||
|
||||
// retrieve info for each of the models (/api/show, post call, in parallel)
|
||||
const detailedModels = await Promise.all(models.map(async model => {
|
||||
const wireModelInfo = await ollamaPOST(input.access, { 'name': model.name }, OLLAMA_PATH_SHOW);
|
||||
const modelInfo = wireOllamaModelInfoSchema.parse(wireModelInfo);
|
||||
return { ...model, ...modelInfo };
|
||||
}));
|
||||
|
||||
return {
|
||||
models: detailedModels.map(model => {
|
||||
// the model name is in the format "name:tag" (default tag = 'latest')
|
||||
const [modelName, modelTag] = model.name.split(':');
|
||||
|
||||
// pretty label and description
|
||||
const label = capitalizeFirstLetter(modelName) + ((modelTag && modelTag !== 'latest') ? ` (${modelTag})` : '');
|
||||
const baseModel = OLLAMA_BASE_MODELS[modelName] ?? {};
|
||||
let description = ''; // baseModel.description || 'Model unknown'; // REMOVED description - bloated and not used by nobody
|
||||
|
||||
// prepend the parameters count and quantization level
|
||||
if (model.details?.quantization_level || model.details?.format || model.details?.parameter_size) {
|
||||
let firstLine = model.details.parameter_size ? `${model.details.parameter_size} parameters ` : '';
|
||||
if (model.details.quantization_level)
|
||||
firstLine += `(${model.details.quantization_level}` + ((model.details.format) ? `, ${model.details.format})` : ')');
|
||||
if (model.size)
|
||||
firstLine += `, ${(model.size / 1024 / 1024 / 1024).toFixed(1)} GB`;
|
||||
if (baseModel.hasTools)
|
||||
firstLine += ' [tools]';
|
||||
if (baseModel.hasVision)
|
||||
firstLine += ' [vision]';
|
||||
description = firstLine + '\n\n' + description;
|
||||
}
|
||||
|
||||
/* Find the context window from the 'num_ctx' line in the parameters string, if present
|
||||
* - https://github.com/enricoros/big-AGI/issues/309
|
||||
* - Note: as of 2024-01-26 the num_ctx line is present in 50% of the models, and in most cases set to 4096
|
||||
* - We are tracking the Upstream issue https://github.com/ollama/ollama/issues/1473 for better ways to do this in the future
|
||||
*/
|
||||
let contextWindow = baseModel.contextWindow || 8192;
|
||||
if (model.parameters) {
|
||||
// split the parameters into lines, and find one called "num_ctx ...spaces... number"
|
||||
const paramsNumCtx = model.parameters.split('\n').find(line => line.startsWith('num_ctx '));
|
||||
if (paramsNumCtx) {
|
||||
const numCtxValue: string = paramsNumCtx.split(/\s+/)[1];
|
||||
if (numCtxValue) {
|
||||
const numCtxNumber: number = parseInt(numCtxValue);
|
||||
if (!isNaN(numCtxNumber))
|
||||
contextWindow = numCtxNumber;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// auto-detect interfaces from the hardcoded description (in turn parsed from the html page)
|
||||
const interfaces = !baseModel.isEmbeddings ? [LLM_IF_OAI_Chat] : [];
|
||||
if (baseModel.hasTools)
|
||||
interfaces.push(LLM_IF_OAI_Fn);
|
||||
if (baseModel.hasVision || modelName.includes('-vision')) // Heuristic
|
||||
interfaces.push(LLM_IF_OAI_Vision);
|
||||
|
||||
// console.log('>>> ollama model', model.name, model.template, model.modelfile, '\n');
|
||||
|
||||
return {
|
||||
id: model.name,
|
||||
label,
|
||||
created: Date.parse(model.modified_at) ?? undefined,
|
||||
updated: Date.parse(model.modified_at) ?? undefined,
|
||||
description: description, // description: (model.license ? `License: ${model.license}. Info: ` : '') + model.modelfile || 'Model unknown',
|
||||
contextWindow,
|
||||
...(contextWindow ? { maxCompletionTokens: Math.round(contextWindow / 2) } : {}),
|
||||
interfaces,
|
||||
};
|
||||
}),
|
||||
};
|
||||
return { models };
|
||||
}),
|
||||
|
||||
});
|
||||
|
||||
@@ -2,7 +2,7 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stor
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
|
||||
// - Models & Pricing: https://www.alibabacloud.com/help/en/model-studio/models
|
||||
// - Billing Guide: https://www.alibabacloud.com/help/en/model-studio/billing-for-model-studio
|
||||
|
||||
@@ -11,7 +11,7 @@ import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.serv
|
||||
import type { OpenAIAccessSchema } from '../openai.router';
|
||||
import { fixupHost } from '../openai.router';
|
||||
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
import { _fallbackOpenAIModel, _knownOpenAIChatModels } from './openai.models';
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import { serverCapitalizeFirstLetter } from '~/server/wire';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
|
||||
|
||||
export function chutesAIHeuristic(hostname: string) {
|
||||
|
||||
@@ -2,7 +2,7 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning }
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
|
||||
|
||||
const _knownDeepseekChatModels: ManualMappings = [
|
||||
|
||||
@@ -3,7 +3,7 @@ import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/w
|
||||
import { DModelInterfaceV1, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
|
||||
|
||||
const _fastAPIKnownModels: ManualMappings = [
|
||||
|
||||
@@ -4,8 +4,8 @@ import { serverCapitalizeFirstLetter } from '~/server/wire';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { wireFireworksAIListOutputSchema } from '../fireworksai.wiretypes';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
import { wireFireworksAIListOutputSchema } from '../wiretypes/fireworksai.wiretypes';
|
||||
|
||||
|
||||
export function fireworksAIHeuristic(hostname: string) {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { wireGroqModelsListOutputSchema } from '../groq.wiretypes';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
import { wireGroqModelsListOutputSchema } from '../wiretypes/groq.wiretypes';
|
||||
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping } from './models.data';
|
||||
import { fromManualMapping } from '../../models.mappings';
|
||||
|
||||
|
||||
export function lmStudioModelToModelDescription(modelId: string): ModelDescriptionSchema {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
import { capitalizeFirstLetter } from '~/common/util/textUtils';
|
||||
|
||||
import { serverCapitalizeFirstLetter } from '~/server/wire';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, type ManualMappings } from './models.data';
|
||||
import { fromManualMapping, type ManualMappings } from '../../models.mappings';
|
||||
|
||||
|
||||
// [LocalAI]
|
||||
@@ -37,7 +38,7 @@ export function localAIModelToModelDescription(modelId: string): ModelDescriptio
|
||||
.replace(' Q4_K_M', ' (Q4_K_M)')
|
||||
.replace(' F16', ' (F16)')
|
||||
.split(' ')
|
||||
.map(capitalizeFirstLetter)
|
||||
.map(serverCapitalizeFirstLetter)
|
||||
.join(' ');
|
||||
|
||||
const description = `LocalAI model. File: ${modelId}`;
|
||||
|
||||
@@ -3,7 +3,7 @@ import * as z from 'zod/v4';
|
||||
import { LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
|
||||
|
||||
const IF_K2 = [
|
||||
|
||||
@@ -4,7 +4,7 @@ import { DModelInterfaceV1, LLM_IF_HOTFIX_NoTemperature, LLM_IF_HOTFIX_StripImag
|
||||
import { Release } from '~/common/app.release';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, KnownModel, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, KnownModel, ManualMappings } from '../../models.mappings';
|
||||
|
||||
|
||||
// OpenAI Model Variants
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import type { ModelDescriptionSchema } from '~/modules/llms/server/llm.server.types';
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
import { _knownOpenAIChatModels } from '~/modules/llms/server/openai/models/openai.models';
|
||||
import { wireOpenPipeModelOutputSchema } from '~/modules/llms/server/openai/openpipe.wiretypes';
|
||||
import { fromManualMapping, KnownModel } from '~/modules/llms/server/openai/models/models.data';
|
||||
import { wireOpenPipeModelOutputSchema } from '~/modules/llms/server/openai/wiretypes/openpipe.wiretypes';
|
||||
import { fromManualMapping, KnownModel } from '~/modules/llms/server/models.mappings';
|
||||
|
||||
const _knownOpenPipeChatModels: ModelDescriptionSchema[] = [
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ import * as z from 'zod/v4';
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping } from './models.data';
|
||||
import { wireOpenrouterModelsListOutputSchema } from '../openrouter.wiretypes';
|
||||
import { fromManualMapping } from '../../models.mappings';
|
||||
import { wireOpenrouterModelsListOutputSchema } from '../wiretypes/openrouter.wiretypes';
|
||||
|
||||
|
||||
// configuration
|
||||
|
||||
@@ -144,7 +144,7 @@ export function perplexityInjectVariants(models: ModelDescriptionSchema[], model
|
||||
return models;
|
||||
}
|
||||
|
||||
export function perplexityAIModelDescriptions() {
|
||||
export function perplexityHardcodedModelDescriptions() {
|
||||
// Returns the list of known Perplexity models
|
||||
return _knownPerplexityChatModels;
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { wireTogetherAIListOutputSchema } from '../togetherai.wiretypes';
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
import { wireTogetherAIListOutputSchema } from '../wiretypes/togetherai.wiretypes';
|
||||
|
||||
|
||||
// Note: 2025-01-28 - we used to have harcoded models here, but now we have a dynamic
|
||||
|
||||
@@ -5,7 +5,7 @@ import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping, KnownModel, ManualMappings } from './models.data';
|
||||
import { fromManualMapping, KnownModel, ManualMappings } from '../../models.mappings';
|
||||
import { openAIAccess, OpenAIAccessSchema } from '../openai.router';
|
||||
|
||||
|
||||
@@ -179,7 +179,7 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
|
||||
|
||||
// xAI Model Descriptions
|
||||
export async function xaiModelDescriptions(access: OpenAIAccessSchema): Promise<ModelDescriptionSchema[]> {
|
||||
export async function xaiFetchModelDescriptions(access: OpenAIAccessSchema): Promise<ModelDescriptionSchema[]> {
|
||||
|
||||
// List models
|
||||
const { headers, url } = openAIAccess(access, null, '/v1/language-models');
|
||||
|
||||
@@ -7,31 +7,15 @@ import { fetchJsonOrTRPCThrow, TRPCFetcherError } from '~/server/trpc/trpc.route
|
||||
import { serverCapitalizeFirstLetter } from '~/server/wire';
|
||||
|
||||
import type { T2ICreateImageAsyncStreamOp } from '~/modules/t2i/t2i.server';
|
||||
import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
|
||||
import { heartbeatsWhileAwaiting } from '~/modules/aix/server/dispatch/heartbeatsWhileAwaiting';
|
||||
|
||||
import { Brand } from '~/common/app.config';
|
||||
|
||||
import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
|
||||
|
||||
import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types';
|
||||
import { alibabaModelFilter, alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models';
|
||||
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureOpenAIAccess, azureParseFromDeploymentsAPI } from './models/azure.models';
|
||||
import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models';
|
||||
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models';
|
||||
import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models';
|
||||
import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './models/fireworksai.models';
|
||||
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription } from './models/groq.models';
|
||||
import { lmStudioModelToModelDescription } from './models/lmstudio.models';
|
||||
import { localAIModelSortFn, localAIModelToModelDescription } from './models/localai.models';
|
||||
import { mistralModels } from './models/mistral.models';
|
||||
import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './models/moonshot.models';
|
||||
import { openaiDevCheckForModelsOverlap_DEV, openAIInjectVariants, openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
|
||||
import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './models/openpipe.models';
|
||||
import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
|
||||
import { perplexityAIModelDescriptions, perplexityInjectVariants } from './models/perplexity.models';
|
||||
import { togetherAIModelsToModelDescriptions } from './models/together.models';
|
||||
import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
|
||||
import { xaiModelDescriptions, xaiModelSort } from './models/xai.models';
|
||||
import { azureOpenAIAccess } from './models/azure.models';
|
||||
import { listModelsRunDispatch } from '../listModels.dispatch';
|
||||
import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './wiretypes/localai.wiretypes';
|
||||
|
||||
|
||||
const openAIDialects = z.enum([
|
||||
@@ -181,142 +165,7 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
|
||||
.query(async ({ input: { access }, signal }): Promise<{ models: ModelDescriptionSchema[] }> => {
|
||||
|
||||
let models: ModelDescriptionSchema[];
|
||||
|
||||
// [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/guides/model-cards)
|
||||
if (access.dialect === 'perplexity') {
|
||||
models = perplexityAIModelDescriptions()
|
||||
.reduce(perplexityInjectVariants, [] as ModelDescriptionSchema[]);
|
||||
return { models };
|
||||
}
|
||||
|
||||
// [xAI]: custom models listing
|
||||
if (access.dialect === 'xai')
|
||||
return { models: (await xaiModelDescriptions(access)).sort(xaiModelSort) };
|
||||
|
||||
// [OpenAI-dialects]: fetch openAI-style for all but Azure (will be then used in each dialect)
|
||||
const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire_API_Models_List.Response>(access, '/v1/models', signal);
|
||||
|
||||
// [Together] missing the .data property
|
||||
if (access.dialect === 'togetherai')
|
||||
return { models: togetherAIModelsToModelDescriptions(openAIWireModelsResponse) };
|
||||
|
||||
let openAIModels = openAIWireModelsResponse?.data || [];
|
||||
|
||||
// de-duplicate by ids (can happen for local servers.. upstream bugs)
|
||||
const preCount = openAIModels.length;
|
||||
openAIModels = openAIModels.filter((model, index) => openAIModels.findIndex(m => m.id === model.id) === index);
|
||||
if (preCount !== openAIModels.length)
|
||||
console.warn(`openai.router.listModels: removed ${preCount - openAIModels.length} duplicate models for dialect ${access.dialect}`);
|
||||
|
||||
// sort by id
|
||||
openAIModels.sort((a, b) => a.id.localeCompare(b.id));
|
||||
|
||||
// every dialect has a different way to enumerate models - we execute the mapping on the server side
|
||||
switch (access.dialect) {
|
||||
|
||||
case 'alibaba':
|
||||
models = openAIModels
|
||||
.filter(({ id }) => alibabaModelFilter(id))
|
||||
.map(({ id, created }) => alibabaModelToModelDescription(id, created))
|
||||
.sort(alibabaModelSort);
|
||||
break;
|
||||
|
||||
case 'azure':
|
||||
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(openAIModels);
|
||||
models = azureOpenAIDeployments
|
||||
.filter(azureDeploymentFilter)
|
||||
.map(azureDeploymentToModelDescription)
|
||||
.sort(openAISortModels);
|
||||
break;
|
||||
|
||||
case 'deepseek':
|
||||
models = openAIModels
|
||||
.filter(({ id }) => deepseekModelFilter(id))
|
||||
.map(({ id }) => deepseekModelToModelDescription(id))
|
||||
.sort(deepseekModelSort);
|
||||
break;
|
||||
|
||||
case 'groq':
|
||||
models = openAIModels
|
||||
.filter(groqModelFilter)
|
||||
.map(groqModelToModelDescription)
|
||||
.sort(groqModelSortFn);
|
||||
break;
|
||||
|
||||
case 'lmstudio':
|
||||
models = openAIModels
|
||||
.map(({ id }) => lmStudioModelToModelDescription(id));
|
||||
break;
|
||||
|
||||
// [LocalAI]: map id to label
|
||||
case 'localai':
|
||||
models = openAIModels
|
||||
.map(({ id }) => localAIModelToModelDescription(id))
|
||||
.sort(localAIModelSortFn);
|
||||
break;
|
||||
|
||||
case 'mistral':
|
||||
models = mistralModels(openAIModels);
|
||||
break;
|
||||
|
||||
case 'moonshot':
|
||||
models = openAIModels
|
||||
.filter(moonshotModelFilter)
|
||||
.map(moonshotModelToModelDescription)
|
||||
.sort(moonshotModelSortFn);
|
||||
break;
|
||||
|
||||
// [OpenAI]: chat-only models, custom sort, manual mapping
|
||||
case 'openai':
|
||||
|
||||
// [ChutesAI] special case for model enumeration
|
||||
if (chutesAIHeuristic(access.oaiHost))
|
||||
return { models: chutesAIModelsToModelDescriptions(openAIModels) };
|
||||
|
||||
// [FireworksAI] special case for model enumeration
|
||||
if (fireworksAIHeuristic(access.oaiHost))
|
||||
return { models: fireworksAIModelsToModelDescriptions(openAIModels) };
|
||||
|
||||
// [FastChat] make the best of the little info
|
||||
if (fastAPIHeuristic(openAIModels))
|
||||
return { models: fastAPIModels(openAIModels) };
|
||||
|
||||
models = openAIModels
|
||||
|
||||
// limit to only 'gpt' and 'non instruct' models
|
||||
.filter(openAIModelFilter)
|
||||
|
||||
// to model description
|
||||
.map((model): ModelDescriptionSchema => openAIModelToModelDescription(model.id, model.created))
|
||||
|
||||
// inject variants
|
||||
.reduce(openAIInjectVariants, [] as ModelDescriptionSchema[])
|
||||
|
||||
// custom OpenAI sort
|
||||
.sort(openAISortModels);
|
||||
|
||||
// [DEV] check for superfluous and missing models
|
||||
openaiDevCheckForModelsOverlap_DEV(openAIWireModelsResponse, models);
|
||||
break;
|
||||
|
||||
case 'openpipe':
|
||||
models = [
|
||||
...openAIModels.map(openPipeModelToModelDescriptions),
|
||||
...openPipeModelDescriptions().sort(openPipeModelSort),
|
||||
];
|
||||
break;
|
||||
|
||||
case 'openrouter':
|
||||
// openRouterStatTokenizers(openAIModels);
|
||||
models = openAIModels
|
||||
.sort(openRouterModelFamilySortFn)
|
||||
.map(openRouterModelToModelDescription)
|
||||
.filter(desc => !!desc)
|
||||
.reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
|
||||
break;
|
||||
|
||||
}
|
||||
const models = await listModelsRunDispatch(access, signal);
|
||||
|
||||
return { models };
|
||||
}),
|
||||
|
||||
Reference in New Issue
Block a user