LLMs: LMStudio: use native API for detailed model information

This commit is contained in:
Enrico Ros
2026-02-02 12:03:46 -08:00
parent 0c8460419b
commit d77274058d
2 changed files with 142 additions and 24 deletions
@@ -35,7 +35,7 @@ import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models'
import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models';
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription, groqValidateModelDefs_DEV } from './openai/models/groq.models';
import { novitaHeuristic, novitaModelsToModelDescriptions } from './openai/models/novita.models';
import { lmStudioModelToModelDescription } from './openai/models/lmstudio.models';
import { lmStudioFetchModels, lmStudioModelsToModelDescriptions } from './openai/models/lmstudio.models';
import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models';
import { mistralModels } from './openai/models/mistral.models';
import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models';
@@ -285,11 +285,17 @@ function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal):
convertToDescriptions: models => models.sort(xaiModelSort),
});
case 'lmstudio':
// [LM Studio]: custom models listing with native API
return createDispatch({
fetchModels: async () => lmStudioFetchModels(access),
convertToDescriptions: (response) => lmStudioModelsToModelDescriptions(response.models),
});
case 'alibaba':
case 'azure':
case 'deepseek':
case 'groq':
case 'lmstudio':
case 'localai':
case 'mistral':
case 'moonshot':
@@ -365,10 +371,6 @@ function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal):
.map(groqModelToModelDescription)
.sort(groqModelSortFn);
case 'lmstudio':
return maybeModels
.map(({ id }) => lmStudioModelToModelDescription(id));
case 'localai':
return maybeModels
.map(({ id }) => localAIModelToModelDescription(id))
@@ -1,26 +1,142 @@
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
import * as z from 'zod/v4';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping } from '../../models.mappings';
import type { OpenAIAccessSchema } from '../openai.access';
import { openAIAccess } from '../openai.access';
export function lmStudioModelToModelDescription(modelId: string): ModelDescriptionSchema {
/**
* LM Studio Native API Path for listing models.
* Different from the OpenAI-compatible `/v1/models` endpoint.
*/
export const LMSTUDIO_API_PATHS = {
models: '/api/v1/models',
} as const;
// LM Studio model ID's are the file names of the model files
function getFileName(filePath: string): string {
const normalizedPath = filePath.replace(/\\/g, '/');
return normalizedPath.split('/').pop() || '';
}
return fromManualMapping([], modelId, undefined, undefined, {
idPrefix: modelId,
label: getFileName(modelId)
.replace('.gguf', '')
.replace('.bin', ''),
// .replaceAll('-', ' '),
description: `Unknown LM Studio model. File: ${modelId}`,
contextWindow: null, // 'not provided'
interfaces: [LLM_IF_OAI_Chat], // assume..
chatPrice: { input: 'free', output: 'free' },
// Wire Types for LM Studio Native API
export namespace LMStudioWire_API_Models_List {
export type Model = z.infer<typeof Model_schema>;
const Model_schema = z.object({
type: z.enum(['llm', 'embedding']).or(z.string()),
publisher: z.string().optional(),
key: z.string(),
display_name: z.string().optional(),
architecture: z.string().nullish(),
quantization: z.object({
name: z.string().nullish(),
bits_per_weight: z.number().nullish(),
}).nullish(),
size_bytes: z.number().optional(),
params_string: z.string().nullish(),
loaded_instances: z.array(z.object({
id: z.string().optional(),
config: z.object({
context_length: z.number().optional(),
eval_batch_size: z.number().optional(),
flash_attention: z.boolean().optional(),
num_experts: z.number().optional(),
offload_kv_cache_to_gpu: z.boolean().optional(),
}).optional(),
})).optional(),
max_context_length: z.number().optional(),
format: z.enum(['gguf', 'mlx']).or(z.string()).nullish(),
capabilities: z.object({
vision: z.boolean().optional(),
trained_for_tool_use: z.boolean().optional(),
}).nullish(),
description: z.string().nullish(),
});
export type Response = z.infer<typeof Response_schema>;
export const Response_schema = z.object({
models: z.array(Model_schema),
});
}
export async function lmStudioFetchModels(access: OpenAIAccessSchema): Promise<LMStudioWire_API_Models_List.Response> {
const { headers, url } = openAIAccess(access, null, LMSTUDIO_API_PATHS.models);
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'LM Studio' });
return LMStudioWire_API_Models_List.Response_schema.parse(wireModels);
}
export function lmStudioModelsToModelDescriptions(wireModels: LMStudioWire_API_Models_List.Model[]): ModelDescriptionSchema[] {
return wireModels
.filter((model) => model.type === 'llm')
.map((model): ModelDescriptionSchema => {
const modelId = model.key;
const label = model.display_name || model.key;
// build description
const descs: string[] = [];
if (model.params_string)
descs.push(`${model.params_string} parameters`);
if (model.quantization?.name) {
const quantInfo = model.quantization.bits_per_weight
? `${model.quantization.name} (${model.quantization.bits_per_weight}-bit)`
: model.quantization.name;
descs.push(quantInfo);
}
if (model.format)
descs.push(model.format.toUpperCase());
if (model.size_bytes) {
const sizeGB = (model.size_bytes / 1024 / 1024 / 1024).toFixed(1);
descs.push(`${sizeGB} GB`);
}
if (model.architecture)
descs.push(`arch: ${model.architecture}`);
if (model.publisher)
descs.push(`by ${model.publisher}`);
if (model.capabilities?.vision)
descs.push('[vision]');
if (model.capabilities?.trained_for_tool_use)
descs.push('[tool use]');
if (model.description)
descs.push(model.description);
const description = descs.join(' · ') || 'LM Studio model';
const contextWindow = model.max_context_length || null;
const interfaces: ModelDescriptionSchema['interfaces'] = [
LLM_IF_OAI_Chat,
...(model.capabilities?.vision ? [LLM_IF_OAI_Vision] : []),
...(model.capabilities?.trained_for_tool_use ? [LLM_IF_OAI_Fn] : []),
// LLM_IF_HOTFIX_NoWebP, // because they are not supported
];
// If loaded, use the actual context length from the instance config
const loadedContextLength = model.loaded_instances?.length
? model.loaded_instances[0]?.config?.context_length ?? null
: null;
return {
id: modelId,
// idVariant
label,
// created
// updated
description,
contextWindow: (contextWindow && loadedContextLength) ? Math.max(contextWindow, loadedContextLength) : contextWindow || loadedContextLength,
interfaces,
// parameterSpects
maxCompletionTokens: contextWindow ? Math.round(contextWindow / 2) : undefined,
// benchmark
chatPrice: { input: 'free', output: 'free' },
// hidden
// initialTemperature
};
})
.sort((a, b) => a.label.localeCompare(b.label));
}