diff --git a/src/modules/llms/server/openai/models/chutesai.models.ts b/src/modules/llms/server/openai/models/chutesai.models.ts new file mode 100644 index 000000000..2e3ca19e2 --- /dev/null +++ b/src/modules/llms/server/openai/models/chutesai.models.ts @@ -0,0 +1,123 @@ +import { z } from 'zod'; + +import { DModelInterfaceV1, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; + +import { serverCapitalizeFirstLetter } from '~/server/wire'; + +import type { ModelDescriptionSchema } from '../../llm.server.types'; + +import { fromManualMapping, ManualMappings } from './models.data'; + + +export function chutesAIHeuristic(hostname: string) { + return hostname.includes('.chutes.ai'); +} + + +const _wireChutesAIListOutputSchema = z.array(z.object({ + + id: z.string(), + object: z.literal('model'), + created: z.number(), + owned_by: z.string().optional().nullable(), + root: z.string().optional().nullable(), + + // ChutesAI specific field for context length + max_model_len: z.number().optional().nullable(), + + // Optional fields that may be present + parent: z.string().nullable().optional(), + // permission: z.array(z.object({ + // id: z.string(), + // object: z.literal('model_permission'), + // created: z.number(), + // allow_create_engine: z.boolean(), + // allow_sampling: z.boolean(), + // allow_logprobs: z.boolean(), + // allow_search_indices: z.boolean(), + // allow_view: z.boolean(), + // allow_fine_tuning: z.boolean(), + // organization: z.string(), + // group: z.string().nullable(), + // is_blocking: z.boolean(), + // })).optional(), +})); + +const _chutesKnownModels: ManualMappings = [ + // NOTE: we don't need manual patching as we have enough info for now +] as const; + +const _chutesDenyListContains: string[] = [ + // nothing to deny for now +] as const; + + +function _prettyModelId(id: string): string { + // example: "chutesai/Llama-4-Scout-17B-16E-Instruct" => "ChutesAI · Llama 4 Scout 17B 16E Instruct" + // example: "deepseek-ai/DeepSeek-R1" => "Deepseek AI · DeepSeek R1" + // example: "unsloth/Llama-3.2-1B-Instruct" => "Unsloth · Llama 3.2 1B Instruct" + + return id + .replaceAll(/[_-]/g, ' ') // replace underscores or dashes with spaces + .replace('/', ' · ') // turn the first "/" into " · " + .split(' ') + .map(piece => { + // Handle special cases like version numbers + if (piece.match(/^\d+(\.\d+)*$/)) return piece; // keep version numbers as-is + if (piece.toLowerCase() === 'ai') return 'AI'; + if (piece.toLowerCase() === 'v1' || piece.toLowerCase() === 'v2' || piece.toLowerCase() === 'v3') return piece.toUpperCase(); + return serverCapitalizeFirstLetter(piece); + }) + .join(' ') + .replace('Deepseek AI · DeepSeek', 'Deepseek AI · ') // special case for Deepseek + .trim(); +} + + +export function chutesAIModelsToModelDescriptions(wireModels: unknown): ModelDescriptionSchema[] { + return _wireChutesAIListOutputSchema.parse(wireModels) + + .filter((model) => { + return !_chutesDenyListContains.some(contains => model.id.includes(contains)); + }) + + .map((model): ModelDescriptionSchema => { + + // heuristics + const label = _prettyModelId(model.id); + const description = model.owned_by ? `${serverCapitalizeFirstLetter(model.owned_by)} model via ChutesAI.` : 'Model via ChutesAI.'; + + // Use max_model_len if available, otherwise fallback to 8192 + const contextWindow = model.max_model_len || 8192; + + const interfaces: DModelInterfaceV1[] = [ + LLM_IF_OAI_Chat, // Assume all are chat models + LLM_IF_OAI_Vision, // Assume we can send them + LLM_IF_OAI_Fn, // Most models support function calling + ]; + + // Check for vision capabilities based on model name patterns + // if (model.id.toLowerCase().includes('vision') || model.id.toLowerCase().includes('vl')) { + // interfaces.push(LLM_IF_OAI_Vision); + // } + + // Most modern models support function calling + // interfaces.push(LLM_IF_OAI_Fn); + + return fromManualMapping(_chutesKnownModels, model.id, model.created, undefined, { + idPrefix: model.id, + label, + description, + contextWindow, + interfaces, + hidden: false, + }); + }) + + .sort((a: ModelDescriptionSchema, b: ModelDescriptionSchema): number => { + // Sort by creation date (newer first), then by id + if (a.created !== b.created) + return (b.created || 0) - (a.created || 0); + return a.id.localeCompare(b.id); + }); +} diff --git a/src/modules/llms/server/openai/openai.router.ts b/src/modules/llms/server/openai/openai.router.ts index ddd77a48d..8b2c2baec 100644 --- a/src/modules/llms/server/openai/openai.router.ts +++ b/src/modules/llms/server/openai/openai.router.ts @@ -16,6 +16,7 @@ import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWi import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types'; import { alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models'; import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './models/azure.models'; +import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models'; import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models'; import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models'; import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './models/fireworksai.models'; @@ -241,6 +242,10 @@ export const llmOpenAIRouter = createTRPCRouter({ // [OpenAI]: chat-only models, custom sort, manual mapping case 'openai': + // [ChutesAI] special case for model enumeration + if (chutesAIHeuristic(access.oaiHost)) + return { models: chutesAIModelsToModelDescriptions(openAIModels) }; + // [FireworksAI] special case for model enumeration if (fireworksAIHeuristic(access.oaiHost)) return { models: fireworksAIModelsToModelDescriptions(openAIModels) };