mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
LLMs: LMStudio: use native API for detailed model information
This commit is contained in:
@@ -35,7 +35,7 @@ import { fastAPIHeuristic, fastAPIModels } from './openai/models/fastapi.models'
|
||||
import { fireworksAIHeuristic, fireworksAIModelsToModelDescriptions } from './openai/models/fireworksai.models';
|
||||
import { groqModelFilter, groqModelSortFn, groqModelToModelDescription, groqValidateModelDefs_DEV } from './openai/models/groq.models';
|
||||
import { novitaHeuristic, novitaModelsToModelDescriptions } from './openai/models/novita.models';
|
||||
import { lmStudioModelToModelDescription } from './openai/models/lmstudio.models';
|
||||
import { lmStudioFetchModels, lmStudioModelsToModelDescriptions } from './openai/models/lmstudio.models';
|
||||
import { localAIModelSortFn, localAIModelToModelDescription } from './openai/models/localai.models';
|
||||
import { mistralModels } from './openai/models/mistral.models';
|
||||
import { moonshotModelFilter, moonshotModelSortFn, moonshotModelToModelDescription } from './openai/models/moonshot.models';
|
||||
@@ -285,11 +285,17 @@ function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal):
|
||||
convertToDescriptions: models => models.sort(xaiModelSort),
|
||||
});
|
||||
|
||||
case 'lmstudio':
|
||||
// [LM Studio]: custom models listing with native API
|
||||
return createDispatch({
|
||||
fetchModels: async () => lmStudioFetchModels(access),
|
||||
convertToDescriptions: (response) => lmStudioModelsToModelDescriptions(response.models),
|
||||
});
|
||||
|
||||
case 'alibaba':
|
||||
case 'azure':
|
||||
case 'deepseek':
|
||||
case 'groq':
|
||||
case 'lmstudio':
|
||||
case 'localai':
|
||||
case 'mistral':
|
||||
case 'moonshot':
|
||||
@@ -365,10 +371,6 @@ function _listModelsCreateDispatch(access: AixAPI_Access, signal?: AbortSignal):
|
||||
.map(groqModelToModelDescription)
|
||||
.sort(groqModelSortFn);
|
||||
|
||||
case 'lmstudio':
|
||||
return maybeModels
|
||||
.map(({ id }) => lmStudioModelToModelDescription(id));
|
||||
|
||||
case 'localai':
|
||||
return maybeModels
|
||||
.map(({ id }) => localAIModelToModelDescription(id))
|
||||
|
||||
@@ -1,26 +1,142 @@
|
||||
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
|
||||
import * as z from 'zod/v4';
|
||||
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { fromManualMapping } from '../../models.mappings';
|
||||
import type { OpenAIAccessSchema } from '../openai.access';
|
||||
import { openAIAccess } from '../openai.access';
|
||||
|
||||
|
||||
export function lmStudioModelToModelDescription(modelId: string): ModelDescriptionSchema {
|
||||
/**
|
||||
* LM Studio Native API Path for listing models.
|
||||
* Different from the OpenAI-compatible `/v1/models` endpoint.
|
||||
*/
|
||||
export const LMSTUDIO_API_PATHS = {
|
||||
models: '/api/v1/models',
|
||||
} as const;
|
||||
|
||||
// LM Studio model ID's are the file names of the model files
|
||||
function getFileName(filePath: string): string {
|
||||
const normalizedPath = filePath.replace(/\\/g, '/');
|
||||
return normalizedPath.split('/').pop() || '';
|
||||
}
|
||||
|
||||
return fromManualMapping([], modelId, undefined, undefined, {
|
||||
idPrefix: modelId,
|
||||
label: getFileName(modelId)
|
||||
.replace('.gguf', '')
|
||||
.replace('.bin', ''),
|
||||
// .replaceAll('-', ' '),
|
||||
description: `Unknown LM Studio model. File: ${modelId}`,
|
||||
contextWindow: null, // 'not provided'
|
||||
interfaces: [LLM_IF_OAI_Chat], // assume..
|
||||
chatPrice: { input: 'free', output: 'free' },
|
||||
// Wire Types for LM Studio Native API
|
||||
|
||||
export namespace LMStudioWire_API_Models_List {
|
||||
|
||||
export type Model = z.infer<typeof Model_schema>;
|
||||
const Model_schema = z.object({
|
||||
type: z.enum(['llm', 'embedding']).or(z.string()),
|
||||
publisher: z.string().optional(),
|
||||
key: z.string(),
|
||||
display_name: z.string().optional(),
|
||||
architecture: z.string().nullish(),
|
||||
quantization: z.object({
|
||||
name: z.string().nullish(),
|
||||
bits_per_weight: z.number().nullish(),
|
||||
}).nullish(),
|
||||
size_bytes: z.number().optional(),
|
||||
params_string: z.string().nullish(),
|
||||
loaded_instances: z.array(z.object({
|
||||
id: z.string().optional(),
|
||||
config: z.object({
|
||||
context_length: z.number().optional(),
|
||||
eval_batch_size: z.number().optional(),
|
||||
flash_attention: z.boolean().optional(),
|
||||
num_experts: z.number().optional(),
|
||||
offload_kv_cache_to_gpu: z.boolean().optional(),
|
||||
}).optional(),
|
||||
})).optional(),
|
||||
max_context_length: z.number().optional(),
|
||||
format: z.enum(['gguf', 'mlx']).or(z.string()).nullish(),
|
||||
capabilities: z.object({
|
||||
vision: z.boolean().optional(),
|
||||
trained_for_tool_use: z.boolean().optional(),
|
||||
}).nullish(),
|
||||
description: z.string().nullish(),
|
||||
});
|
||||
|
||||
export type Response = z.infer<typeof Response_schema>;
|
||||
export const Response_schema = z.object({
|
||||
models: z.array(Model_schema),
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
export async function lmStudioFetchModels(access: OpenAIAccessSchema): Promise<LMStudioWire_API_Models_List.Response> {
|
||||
const { headers, url } = openAIAccess(access, null, LMSTUDIO_API_PATHS.models);
|
||||
const wireModels = await fetchJsonOrTRPCThrow({ url, headers, name: 'LM Studio' });
|
||||
return LMStudioWire_API_Models_List.Response_schema.parse(wireModels);
|
||||
}
|
||||
|
||||
|
||||
export function lmStudioModelsToModelDescriptions(wireModels: LMStudioWire_API_Models_List.Model[]): ModelDescriptionSchema[] {
|
||||
return wireModels
|
||||
.filter((model) => model.type === 'llm')
|
||||
.map((model): ModelDescriptionSchema => {
|
||||
|
||||
const modelId = model.key;
|
||||
|
||||
const label = model.display_name || model.key;
|
||||
|
||||
// build description
|
||||
const descs: string[] = [];
|
||||
if (model.params_string)
|
||||
descs.push(`${model.params_string} parameters`);
|
||||
if (model.quantization?.name) {
|
||||
const quantInfo = model.quantization.bits_per_weight
|
||||
? `${model.quantization.name} (${model.quantization.bits_per_weight}-bit)`
|
||||
: model.quantization.name;
|
||||
descs.push(quantInfo);
|
||||
}
|
||||
if (model.format)
|
||||
descs.push(model.format.toUpperCase());
|
||||
if (model.size_bytes) {
|
||||
const sizeGB = (model.size_bytes / 1024 / 1024 / 1024).toFixed(1);
|
||||
descs.push(`${sizeGB} GB`);
|
||||
}
|
||||
if (model.architecture)
|
||||
descs.push(`arch: ${model.architecture}`);
|
||||
if (model.publisher)
|
||||
descs.push(`by ${model.publisher}`);
|
||||
if (model.capabilities?.vision)
|
||||
descs.push('[vision]');
|
||||
if (model.capabilities?.trained_for_tool_use)
|
||||
descs.push('[tool use]');
|
||||
if (model.description)
|
||||
descs.push(model.description);
|
||||
|
||||
const description = descs.join(' · ') || 'LM Studio model';
|
||||
|
||||
const contextWindow = model.max_context_length || null;
|
||||
|
||||
const interfaces: ModelDescriptionSchema['interfaces'] = [
|
||||
LLM_IF_OAI_Chat,
|
||||
...(model.capabilities?.vision ? [LLM_IF_OAI_Vision] : []),
|
||||
...(model.capabilities?.trained_for_tool_use ? [LLM_IF_OAI_Fn] : []),
|
||||
// LLM_IF_HOTFIX_NoWebP, // because they are not supported
|
||||
];
|
||||
|
||||
// If loaded, use the actual context length from the instance config
|
||||
const loadedContextLength = model.loaded_instances?.length
|
||||
? model.loaded_instances[0]?.config?.context_length ?? null
|
||||
: null;
|
||||
|
||||
return {
|
||||
id: modelId,
|
||||
// idVariant
|
||||
label,
|
||||
// created
|
||||
// updated
|
||||
description,
|
||||
contextWindow: (contextWindow && loadedContextLength) ? Math.max(contextWindow, loadedContextLength) : contextWindow || loadedContextLength,
|
||||
interfaces,
|
||||
// parameterSpects
|
||||
maxCompletionTokens: contextWindow ? Math.round(contextWindow / 2) : undefined,
|
||||
// benchmark
|
||||
chatPrice: { input: 'free', output: 'free' },
|
||||
// hidden
|
||||
// initialTemperature
|
||||
};
|
||||
})
|
||||
.sort((a, b) => a.label.localeCompare(b.label));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user