DLLMs: client-side OAI reasoning effort

This commit is contained in:
Enrico Ros
2024-12-24 18:12:04 -08:00
parent 897d7fb7e0
commit b2d66af440
7 changed files with 51 additions and 26 deletions
+2 -2
View File
@@ -63,7 +63,7 @@ export const DModelParameterRegistry = {
incompatibleWith: ['temperature'] as const,
} as const,
'vnd.oai.reasoning_effort': {
llmVndOaiReasoningEffort: {
label: 'Reasoning Effort',
type: 'enum' as const,
description: 'Constrains effort on reasoning for OpenAI reasoning models',
@@ -87,7 +87,7 @@ export type DModelParameterValues = {
[K in DModelParameterId]?: DModelParameterValue<K>;
}
export type DModelParameterId = keyof typeof DModelParameterRegistry; // max_tokens, temperature, top_p, vnd.oai.reasoning_effort, ...
export type DModelParameterId = keyof typeof DModelParameterRegistry;
// type _ExtendedParameterId = keyof typeof _ExtendedParameterRegistry;
type _EnumValues<T> = T extends { type: 'enum', values: readonly (infer U)[] } ? U : never;
+13 -3
View File
@@ -8,8 +8,9 @@ import { persist } from 'zustand/middleware';
import type { DOpenRouterServiceSettings } from '~/modules/llms/vendors/openrouter/openrouter.vendor';
import type { ModelVendorId } from '~/modules/llms/vendors/vendors.registry';
import { DLLM, DLLMId, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from './llms.types';
import type { DModelParameterId } from './llms.parameters';
import type { DModelsService, DModelsServiceId } from './modelsservice.types';
import { DLLM, DLLMId, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision } from './llms.types';
import { getLlmCostForTokens, portModelPricingV2toV3 } from './llms.pricing';
@@ -33,6 +34,7 @@ interface LlmsActions {
rerankLLMsByServices: (serviceIdOrder: DModelsServiceId[]) => void;
updateLLM: (id: DLLMId, partial: Partial<DLLM>) => void;
updateLLMUserParameters: (id: DLLMId, partial: Partial<DLLM['userParameters']>) => void;
deleteLLMUserParameter: (id: DLLMId, parameterId: DModelParameterId) => void;
addService: (service: DModelsService) => void;
removeService: (id: DModelsServiceId) => void;
@@ -133,14 +135,22 @@ export const useModelsStore = create<LlmsState & LlmsActions>()(persist(
})),
updateLLMUserParameters: (id: DLLMId, partialUserParameters: Partial<DLLM['userParameters']>) =>
set(state => ({
llms: state.llms.map((llm: DLLM): DLLM =>
set(({ llms }) => ({
llms: llms.map((llm: DLLM): DLLM =>
llm.id === id
? { ...llm, userParameters: { ...llm.userParameters, ...partialUserParameters } }
: llm,
),
})),
deleteLLMUserParameter: (id: DLLMId, parameterId: DModelParameterId) =>
set(({ llms }) => ({
llms: llms.map((llm: DLLM): DLLM =>
llm.id === id && llm.userParameters
? { ...llm, userParameters: Object.fromEntries(Object.entries(llm.userParameters).filter(([key]) => key !== parameterId)) }
: llm,
),
})),
addService: (service: DModelsService) =>
set(state => {
+19 -11
View File
@@ -29,23 +29,31 @@ export function aixCreateChatGenerateContext(name: AixAPI_Context_ChatGenerate['
}
export function aixCreateModelFromLLMOptions(
llmOptions: DModelParameterValues | undefined,
llmOptionsOverride: DModelParameterValues | undefined,
llmOptions: DModelParameterValues,
llmOptionsOverride: Omit<DModelParameterValues, 'llmRef'> | undefined,
debugLlmId: string,
): AixAPI_Model {
// model params (llm)
let { llmRef, llmTemperature, llmResponseTokens } = llmOptions || {};
if (!llmRef || llmTemperature === undefined)
// destructure input with the overrides
const { llmRef, llmTemperature, llmResponseTokens, llmTopP, llmVndOaiReasoningEffort } = {
...llmOptions,
...llmOptionsOverride,
};
// llmRef is absolutely required
if (!llmRef)
throw new Error(`AIX: Error in configuration for model ${debugLlmId} (missing ref, temperature): ${JSON.stringify(llmOptions)}`);
// model params overrides
if (llmOptionsOverride?.llmTemperature !== undefined) llmTemperature = llmOptionsOverride.llmTemperature;
if (llmOptionsOverride?.llmResponseTokens !== undefined) llmResponseTokens = llmOptionsOverride.llmResponseTokens;
// llmTemperature is highly recommended, so we display a note if it's missing
if (llmTemperature === undefined)
console.warn(`[DEV] AIX: Missing temperature for model ${debugLlmId}, using default.`);
return {
id: llmRef,
temperature: llmTemperature,
...(llmResponseTokens ? { maxTokens: llmResponseTokens } : {}),
...(llmTemperature !== undefined ? { temperature: llmTemperature } : {}),
...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
...(llmTopP !== undefined ? { topP: llmTopP } : {}),
...(llmVndOaiReasoningEffort ? { vndOaiReasoningEffort: llmVndOaiReasoningEffort } : {}),
};
}
@@ -70,7 +78,7 @@ type StreamMessageStatus = {
interface AixClientOptions {
abortSignal: AbortSignal | 'NON_ABORTABLE'; // 'NON_ABORTABLE' is a special case for non-abortable operations
throttleParallelThreads?: number; // 0: disable, 1: default throttle (12Hz), 2+ reduce frequency with the square root
llmOptionsOverride?: DModelParameterValues;
llmOptionsOverride?: Omit<DModelParameterValues, 'llmRef'>; // overrides for the LLM options
}
@@ -376,6 +376,8 @@ export namespace AixWire_API {
id: z.string(),
temperature: z.number().min(0).max(2).optional(),
maxTokens: z.number().min(1).optional(),
topP: z.number().min(0).max(1).optional(),
vndOaiReasoningEffort: z.enum(['low', 'medium', 'high']).optional(),
});
/// Context
+12 -7
View File
@@ -15,6 +15,7 @@ const reasoningEffortOptions = [
{ value: 'high', label: 'High', description: 'Deep, thorough analysis' },
{ value: 'medium', label: 'Medium', description: 'Balanced reasoning depth' },
{ value: 'low', label: 'Low', description: 'Quick, concise responses' },
{ value: 'unspecified', label: 'Default', description: 'Default value (unset)' },
] as const;
@@ -24,13 +25,13 @@ export function LLMOptions(props: { llm: DLLM }) {
const { id: llmId, maxOutputTokens, initialParameters, userParameters, parameterSpecs } = props.llm;
// external state
const { updateLLMUserParameters } = llmsStoreActions();
const { updateLLMUserParameters, deleteLLMUserParameter } = llmsStoreActions();
const allParameters = getAllModelParameterValues(initialParameters, userParameters);
// derived state
const llmTemperature = allParameters?.llmTemperature ?? FALLBACK_LLM_PARAM_TEMPERATURE;
const llmResponseTokens = allParameters?.llmResponseTokens ?? FALLBACK_LLM_PARAM_RESPONSE_TOKENS;
const llmVndOaiReasoningEffort = allParameters?.['vnd.oai.reasoning_effort'];
const llmVndOaiReasoningEffort = allParameters?.llmVndOaiReasoningEffort;
const tempAboveOne = llmTemperature > 1;
// more state (here because the initial state depends on props)
@@ -47,7 +48,7 @@ export function LLMOptions(props: { llm: DLLM }) {
// find the reasoning effort parameter spec
const paramVndOaiReasoningEffort = parameterSpecs?.find(p => p.paramId === 'vnd.oai.reasoning_effort') as DModelParameterSpec<'vnd.oai.reasoning_effort'> | undefined;
const paramReasoningEffort = parameterSpecs?.find(p => p.paramId === 'llmVndOaiReasoningEffort') as DModelParameterSpec<'llmVndOaiReasoningEffort'> | undefined;
const showOverheatButton = overheat || tempAboveOne;
@@ -85,13 +86,17 @@ export function LLMOptions(props: { llm: DLLM }) {
<InlineError error='Max Output Tokens: Token computations are disabled because this model does not declare the context window size.' />
)}
{paramVndOaiReasoningEffort && (
{paramReasoningEffort && (
<FormSelectControl
disabled
title='Reasoning Effort'
tooltip='Controls how much effort the model spends on reasoning'
value={llmVndOaiReasoningEffort ?? 'medium'}
onChange={(value) => updateLLMUserParameters(llmId, { 'vnd.oai.reasoning_effort': value })}
value={llmVndOaiReasoningEffort ?? 'unspecified'}
onChange={(value) => {
if (value === 'unspecified' || !value)
deleteLLMUserParameter(llmId, 'llmVndOaiReasoningEffort');
else
updateLLMUserParameters(llmId, { 'llmVndOaiReasoningEffort': value });
}}
options={reasoningEffortOptions}
/>
)}
+1 -1
View File
@@ -70,7 +70,7 @@ const ModelParameterSpec_schema = z.object({
*/
paramId: z.enum([
'llmTopP',
'vnd.oai.reasoning_effort', // vendor-specific
'llmVndOaiReasoningEffort', // vendor-specific
]),
required: z.boolean().optional(),
hidden: z.boolean().optional(),
@@ -166,7 +166,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 100000,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_OAI_PromptCaching, LLM_IF_HOTFIX_NoStream],
parameterSpecs: [{ paramId: 'vnd.oai.reasoning_effort' }],
parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }],
chatPrice: { input: 15, cache: { cType: 'oai-ac', read: 7.5 }, output: 60 },
benchmark: { cbaElo: 1335 + 1 },
},
@@ -178,7 +178,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 100000,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_OAI_PromptCaching, LLM_IF_HOTFIX_NoStream],
parameterSpecs: [{ paramId: 'vnd.oai.reasoning_effort' }],
parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }],
chatPrice: { input: 15, cache: { cType: 'oai-ac', read: 7.5 }, output: 60 },
benchmark: { cbaElo: 1335 + 1 },
},