mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
DLLMs: client-side OAI reasoning effort
This commit is contained in:
@@ -63,7 +63,7 @@ export const DModelParameterRegistry = {
|
||||
incompatibleWith: ['temperature'] as const,
|
||||
} as const,
|
||||
|
||||
'vnd.oai.reasoning_effort': {
|
||||
llmVndOaiReasoningEffort: {
|
||||
label: 'Reasoning Effort',
|
||||
type: 'enum' as const,
|
||||
description: 'Constrains effort on reasoning for OpenAI reasoning models',
|
||||
@@ -87,7 +87,7 @@ export type DModelParameterValues = {
|
||||
[K in DModelParameterId]?: DModelParameterValue<K>;
|
||||
}
|
||||
|
||||
export type DModelParameterId = keyof typeof DModelParameterRegistry; // max_tokens, temperature, top_p, vnd.oai.reasoning_effort, ...
|
||||
export type DModelParameterId = keyof typeof DModelParameterRegistry;
|
||||
// type _ExtendedParameterId = keyof typeof _ExtendedParameterRegistry;
|
||||
|
||||
type _EnumValues<T> = T extends { type: 'enum', values: readonly (infer U)[] } ? U : never;
|
||||
|
||||
@@ -8,8 +8,9 @@ import { persist } from 'zustand/middleware';
|
||||
import type { DOpenRouterServiceSettings } from '~/modules/llms/vendors/openrouter/openrouter.vendor';
|
||||
import type { ModelVendorId } from '~/modules/llms/vendors/vendors.registry';
|
||||
|
||||
import { DLLM, DLLMId, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from './llms.types';
|
||||
import type { DModelParameterId } from './llms.parameters';
|
||||
import type { DModelsService, DModelsServiceId } from './modelsservice.types';
|
||||
import { DLLM, DLLMId, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from './llms.types';
|
||||
import { getLlmCostForTokens, portModelPricingV2toV3 } from './llms.pricing';
|
||||
|
||||
|
||||
@@ -33,6 +34,7 @@ interface LlmsActions {
|
||||
rerankLLMsByServices: (serviceIdOrder: DModelsServiceId[]) => void;
|
||||
updateLLM: (id: DLLMId, partial: Partial<DLLM>) => void;
|
||||
updateLLMUserParameters: (id: DLLMId, partial: Partial<DLLM['userParameters']>) => void;
|
||||
deleteLLMUserParameter: (id: DLLMId, parameterId: DModelParameterId) => void;
|
||||
|
||||
addService: (service: DModelsService) => void;
|
||||
removeService: (id: DModelsServiceId) => void;
|
||||
@@ -133,14 +135,22 @@ export const useModelsStore = create<LlmsState & LlmsActions>()(persist(
|
||||
})),
|
||||
|
||||
updateLLMUserParameters: (id: DLLMId, partialUserParameters: Partial<DLLM['userParameters']>) =>
|
||||
set(state => ({
|
||||
llms: state.llms.map((llm: DLLM): DLLM =>
|
||||
set(({ llms }) => ({
|
||||
llms: llms.map((llm: DLLM): DLLM =>
|
||||
llm.id === id
|
||||
? { ...llm, userParameters: { ...llm.userParameters, ...partialUserParameters } }
|
||||
: llm,
|
||||
),
|
||||
})),
|
||||
|
||||
deleteLLMUserParameter: (id: DLLMId, parameterId: DModelParameterId) =>
|
||||
set(({ llms }) => ({
|
||||
llms: llms.map((llm: DLLM): DLLM =>
|
||||
llm.id === id && llm.userParameters
|
||||
? { ...llm, userParameters: Object.fromEntries(Object.entries(llm.userParameters).filter(([key]) => key !== parameterId)) }
|
||||
: llm,
|
||||
),
|
||||
})),
|
||||
|
||||
addService: (service: DModelsService) =>
|
||||
set(state => {
|
||||
|
||||
@@ -29,23 +29,31 @@ export function aixCreateChatGenerateContext(name: AixAPI_Context_ChatGenerate['
|
||||
}
|
||||
|
||||
export function aixCreateModelFromLLMOptions(
|
||||
llmOptions: DModelParameterValues | undefined,
|
||||
llmOptionsOverride: DModelParameterValues | undefined,
|
||||
llmOptions: DModelParameterValues,
|
||||
llmOptionsOverride: Omit<DModelParameterValues, 'llmRef'> | undefined,
|
||||
debugLlmId: string,
|
||||
): AixAPI_Model {
|
||||
// model params (llm)
|
||||
let { llmRef, llmTemperature, llmResponseTokens } = llmOptions || {};
|
||||
if (!llmRef || llmTemperature === undefined)
|
||||
|
||||
// destructure input with the overrides
|
||||
const { llmRef, llmTemperature, llmResponseTokens, llmTopP, llmVndOaiReasoningEffort } = {
|
||||
...llmOptions,
|
||||
...llmOptionsOverride,
|
||||
};
|
||||
|
||||
// llmRef is absolutely required
|
||||
if (!llmRef)
|
||||
throw new Error(`AIX: Error in configuration for model ${debugLlmId} (missing ref, temperature): ${JSON.stringify(llmOptions)}`);
|
||||
|
||||
// model params overrides
|
||||
if (llmOptionsOverride?.llmTemperature !== undefined) llmTemperature = llmOptionsOverride.llmTemperature;
|
||||
if (llmOptionsOverride?.llmResponseTokens !== undefined) llmResponseTokens = llmOptionsOverride.llmResponseTokens;
|
||||
// llmTemperature is highly recommended, so we display a note if it's missing
|
||||
if (llmTemperature === undefined)
|
||||
console.warn(`[DEV] AIX: Missing temperature for model ${debugLlmId}, using default.`);
|
||||
|
||||
return {
|
||||
id: llmRef,
|
||||
temperature: llmTemperature,
|
||||
...(llmResponseTokens ? { maxTokens: llmResponseTokens } : {}),
|
||||
...(llmTemperature !== undefined ? { temperature: llmTemperature } : {}),
|
||||
...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
|
||||
...(llmTopP !== undefined ? { topP: llmTopP } : {}),
|
||||
...(llmVndOaiReasoningEffort ? { vndOaiReasoningEffort: llmVndOaiReasoningEffort } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -70,7 +78,7 @@ type StreamMessageStatus = {
|
||||
interface AixClientOptions {
|
||||
abortSignal: AbortSignal | 'NON_ABORTABLE'; // 'NON_ABORTABLE' is a special case for non-abortable operations
|
||||
throttleParallelThreads?: number; // 0: disable, 1: default throttle (12Hz), 2+ reduce frequency with the square root
|
||||
llmOptionsOverride?: DModelParameterValues;
|
||||
llmOptionsOverride?: Omit<DModelParameterValues, 'llmRef'>; // overrides for the LLM options
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -376,6 +376,8 @@ export namespace AixWire_API {
|
||||
id: z.string(),
|
||||
temperature: z.number().min(0).max(2).optional(),
|
||||
maxTokens: z.number().min(1).optional(),
|
||||
topP: z.number().min(0).max(1).optional(),
|
||||
vndOaiReasoningEffort: z.enum(['low', 'medium', 'high']).optional(),
|
||||
});
|
||||
|
||||
/// Context
|
||||
|
||||
@@ -15,6 +15,7 @@ const reasoningEffortOptions = [
|
||||
{ value: 'high', label: 'High', description: 'Deep, thorough analysis' },
|
||||
{ value: 'medium', label: 'Medium', description: 'Balanced reasoning depth' },
|
||||
{ value: 'low', label: 'Low', description: 'Quick, concise responses' },
|
||||
{ value: 'unspecified', label: 'Default', description: 'Default value (unset)' },
|
||||
] as const;
|
||||
|
||||
|
||||
@@ -24,13 +25,13 @@ export function LLMOptions(props: { llm: DLLM }) {
|
||||
const { id: llmId, maxOutputTokens, initialParameters, userParameters, parameterSpecs } = props.llm;
|
||||
|
||||
// external state
|
||||
const { updateLLMUserParameters } = llmsStoreActions();
|
||||
const { updateLLMUserParameters, deleteLLMUserParameter } = llmsStoreActions();
|
||||
const allParameters = getAllModelParameterValues(initialParameters, userParameters);
|
||||
|
||||
// derived state
|
||||
const llmTemperature = allParameters?.llmTemperature ?? FALLBACK_LLM_PARAM_TEMPERATURE;
|
||||
const llmResponseTokens = allParameters?.llmResponseTokens ?? FALLBACK_LLM_PARAM_RESPONSE_TOKENS;
|
||||
const llmVndOaiReasoningEffort = allParameters?.['vnd.oai.reasoning_effort'];
|
||||
const llmVndOaiReasoningEffort = allParameters?.llmVndOaiReasoningEffort;
|
||||
const tempAboveOne = llmTemperature > 1;
|
||||
|
||||
// more state (here because the initial state depends on props)
|
||||
@@ -47,7 +48,7 @@ export function LLMOptions(props: { llm: DLLM }) {
|
||||
|
||||
|
||||
// find the reasoning effort parameter spec
|
||||
const paramVndOaiReasoningEffort = parameterSpecs?.find(p => p.paramId === 'vnd.oai.reasoning_effort') as DModelParameterSpec<'vnd.oai.reasoning_effort'> | undefined;
|
||||
const paramReasoningEffort = parameterSpecs?.find(p => p.paramId === 'llmVndOaiReasoningEffort') as DModelParameterSpec<'llmVndOaiReasoningEffort'> | undefined;
|
||||
|
||||
const showOverheatButton = overheat || tempAboveOne;
|
||||
|
||||
@@ -85,13 +86,17 @@ export function LLMOptions(props: { llm: DLLM }) {
|
||||
<InlineError error='Max Output Tokens: Token computations are disabled because this model does not declare the context window size.' />
|
||||
)}
|
||||
|
||||
{paramVndOaiReasoningEffort && (
|
||||
{paramReasoningEffort && (
|
||||
<FormSelectControl
|
||||
disabled
|
||||
title='Reasoning Effort'
|
||||
tooltip='Controls how much effort the model spends on reasoning'
|
||||
value={llmVndOaiReasoningEffort ?? 'medium'}
|
||||
onChange={(value) => updateLLMUserParameters(llmId, { 'vnd.oai.reasoning_effort': value })}
|
||||
value={llmVndOaiReasoningEffort ?? 'unspecified'}
|
||||
onChange={(value) => {
|
||||
if (value === 'unspecified' || !value)
|
||||
deleteLLMUserParameter(llmId, 'llmVndOaiReasoningEffort');
|
||||
else
|
||||
updateLLMUserParameters(llmId, { 'llmVndOaiReasoningEffort': value });
|
||||
}}
|
||||
options={reasoningEffortOptions}
|
||||
/>
|
||||
)}
|
||||
|
||||
@@ -70,7 +70,7 @@ const ModelParameterSpec_schema = z.object({
|
||||
*/
|
||||
paramId: z.enum([
|
||||
'llmTopP',
|
||||
'vnd.oai.reasoning_effort', // vendor-specific
|
||||
'llmVndOaiReasoningEffort', // vendor-specific
|
||||
]),
|
||||
required: z.boolean().optional(),
|
||||
hidden: z.boolean().optional(),
|
||||
|
||||
@@ -166,7 +166,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 100000,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_OAI_PromptCaching, LLM_IF_HOTFIX_NoStream],
|
||||
parameterSpecs: [{ paramId: 'vnd.oai.reasoning_effort' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }],
|
||||
chatPrice: { input: 15, cache: { cType: 'oai-ac', read: 7.5 }, output: 60 },
|
||||
benchmark: { cbaElo: 1335 + 1 },
|
||||
},
|
||||
@@ -178,7 +178,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 100000,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_OAI_PromptCaching, LLM_IF_HOTFIX_NoStream],
|
||||
parameterSpecs: [{ paramId: 'vnd.oai.reasoning_effort' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }],
|
||||
chatPrice: { input: 15, cache: { cType: 'oai-ac', read: 7.5 }, output: 60 },
|
||||
benchmark: { cbaElo: 1335 + 1 },
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user