From 3aa9a71a4b2d6d39085c0e73500ac2be701918f4 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Wed, 18 Feb 2026 14:38:14 -0800 Subject: [PATCH] LLM Effort: split definition for UI namings with unified backend. #940 --- .claude/commands/llms/verify-parameters.md | 22 +- src/common/stores/llms/llms.parameters.ts | 74 +++-- src/modules/aix/client/aix.client.ts | 24 +- src/modules/aix/server/api/aix.wiretypes.ts | 109 +++---- .../adapters/anthropic.messageCreate.ts | 2 +- .../adapters/gemini.generateContent.ts | 6 +- .../adapters/openai.chatCompletions.ts | 15 +- .../adapters/openai.responsesCreate.ts | 2 +- .../adapters/xai.responsesCreate.ts | 2 +- .../dispatch/wiretypes/openai.wiretypes.ts | 2 +- .../llms/models-modal/LLMParametersEditor.tsx | 271 ++++++++++++------ .../llms/server/anthropic/anthropic.models.ts | 16 +- .../llms/server/gemini/gemini.models.ts | 6 +- src/modules/llms/server/llm.server.types.ts | 6 +- .../server/openai/models/moonshot.models.ts | 2 +- .../server/openai/models/openai.models.ts | 46 +-- .../server/openai/models/openrouter.models.ts | 18 +- .../server/openai/models/perplexity.models.ts | 11 +- .../llms/server/openai/models/xai.models.ts | 5 +- .../llms/server/openai/models/zai.models.ts | 2 +- tools/develop/llm-parameter-sweep/sweep.ts | 16 +- 21 files changed, 393 insertions(+), 264 deletions(-) diff --git a/.claude/commands/llms/verify-parameters.md b/.claude/commands/llms/verify-parameters.md index f142f2441..274eded18 100644 --- a/.claude/commands/llms/verify-parameters.md +++ b/.claude/commands/llms/verify-parameters.md @@ -38,17 +38,17 @@ Example parameter mapping. Note that new parameters may have been added to both The objective of the sweep is to hint at model definition values, but the model definitions are what matters for Big-AGI, and need to be carefully updated, otherwise thousands of clients may break. -| Dialect | Sweep Key | Model paramId | -|---------|-----------|---------------| -| OpenAI | `oai-reasoning-effort` | `llmEffort` | -| OpenAI | `oai-verbosity` | `llmVndOaiVerbosity` | -| OpenAI | `oai-image-generation` | `llmVndOaiImageGeneration` | -| OpenAI | `oai-web-search` | `llmVndOaiWebSearchContext` | -| Anthropic | `ant-effort` | `llmEffort` | -| Anthropic | `ant-thinking-budget` | `llmVndAntThinkingBudget` | -| Gemini | `gemini-thinking-level` | `llmEffort` | -| Gemini | `gemini-thinking-budget` | `llmVndGeminiThinkingBudget` | -| xAI | `xai-web-search` | `llmVndXaiWebSearch` | +| Dialect | Sweep Key | Model paramId | +|-----------|--------------------------|------------------------------| +| OpenAI | `oai-reasoning-effort` | `llmVndOaiEffort` | +| OpenAI | `oai-verbosity` | `llmVndOaiVerbosity` | +| OpenAI | `oai-image-generation` | `llmVndOaiImageGeneration` | +| OpenAI | `oai-web-search` | `llmVndOaiWebSearchContext` | +| Anthropic | `ant-effort` | `llmVndAntEffort` | +| Anthropic | `ant-thinking-budget` | `llmVndAntThinkingBudget` | +| Gemini | `gemini-thinking-level` | `llmVndGemEffort` | +| Gemini | `gemini-thinking-budget` | `llmVndGeminiThinkingBudget` | +| xAI | `xai-web-search` | `llmVndXaiWebSearch` | ## Output diff --git a/src/common/stores/llms/llms.parameters.ts b/src/common/stores/llms/llms.parameters.ts index df3335870..71eb48fdd 100644 --- a/src/common/stores/llms/llms.parameters.ts +++ b/src/common/stores/llms/llms.parameters.ts @@ -83,8 +83,7 @@ function _enumDef(def: _EnumParamDef): _EnumParamDef< export const DModelParameterRegistry = { - /// Common 'implicit' parameters, available to all models /// - // Note: we still use pre-v2 names for compatibility and ease of migration + // -- Common 'implicit' parameters, available to all models -- llmRef: { label: 'Model ID', @@ -114,7 +113,7 @@ export const DModelParameterRegistry = { // due to implicit, when undefined we apply the runtime fallback }, - /// Extended parameters, specific to certain models/vendors + // -- Extended parameters, specific to certain models/vendors -- llmTopP: { label: 'Top P', @@ -124,30 +123,6 @@ export const DModelParameterRegistry = { // when undefined is omitted from the requests (default) }, - /** - * Unified 'reasoning' effort parameter for all vendors. The full superset of all possible effort levels. - * Each model declares its own subset via `enumValues` in its parameterSpec. - * - * Mapping to vendor-native values is done in adapters (the only place with vendor knowledge): - * - Anthropic: output_config.effort - * - OpenAI: reasoning_effort (ChatCompletions) / reasoning.effort (Responses) - * - Gemini: thinkingConfig.thinkingLevel (depending on model: low/high, minimal/low/medium/high, ...) - * - Moonshot/ZAI: thinking.type (none->disabled, high->enabled) - * - Perplexity: reasoning_effort - * - etc. - */ - llmEffort: _enumDef({ - label: 'Reasoning Effort', - type: 'enum', - description: 'Controls reasoning depth and effort level.', - values: [ - // all values (max includes) sorted in ascending order of effort - 'none', 'minimal', 'low', 'medium', 'high', 'xhigh', // OpenAI/common - 'max', // Anthropic only, for now - ], - // undefined means vendor default (usually high or medium, could be different such as none) - }), - /** * First introduced as a user-configurable parameter for the 'Verification' required by o3. * [2025-04-16] Adding parameter to disable streaming for o3, and possibly more models. @@ -165,6 +140,46 @@ export const DModelParameterRegistry = { }, + // -- 'Effort' unified semantic specialization -- + + /** + * Vendor-specific effort parameters. Each vendor has its own effort param with vendor-contextual + * labels and descriptions. Models declare their subset via `enumValues` in parameterSpec. + * All converge to the unified `effort` wire field in aix.client.ts. + */ + llmVndAntEffort: _enumDef({ + label: 'Effort', + type: 'enum', + description: 'Controls reasoning depth. Works alongside thinking budget.', + values: ['low', 'medium', 'high', 'max'], + // undefined means high effort (default) + }), + + llmVndGemEffort: _enumDef({ + label: 'Thinking Level', + type: 'enum', + description: 'Controls internal reasoning depth. When unset, the model decides dynamically.', + values: ['minimal', 'low', 'medium', 'high'], + // undefined means dynamic (model decides) + }), + + llmVndOaiEffort: _enumDef({ + label: 'Reasoning Effort', + type: 'enum', + description: 'Controls how much effort the model spends on reasoning.', + values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'], + // undefined means vendor default + }), + + llmVndMiscEffort: _enumDef({ + label: 'Thinking', + type: 'enum', + description: 'Enable or disable extended thinking mode.', + values: ['none', 'high'], + // undefined means vendor default (usually 'high', i.e. thinking enabled) + }), + + // Anthropic-specific llmVndAnt1MContext: { @@ -192,7 +207,8 @@ export const DModelParameterRegistry = { }, /** - * NOTE: this is being phased out with Opus 4.6 in favor of llmEffort ('low', 'medium', 'high', 'max') + * NOTE: this is being phased out with Opus 4.6 in favor of llmVndAntEffort, while this is implicitly + * adaptive if missing (as-if we had our custom sentinel value of -1). * * Important: when this is set to anything other than nullish, it enables Adaptive(-1)/Extended(int > 1024) thinking, * and as a side effect **disables the temperature** in the requests (even when tunneled through OpenRouter). So this @@ -504,7 +520,7 @@ interface DModelParameterSpec { /** * (optional) For enum params: restrict which values from the registry are allowed for this model. * The UI will only show these values. Analogous to rangeOverride for numeric params. - * Example: llmEffort registry has 7 values, but a specific model may only support ['low', 'medium', 'high']. + * Example: llmVndOaiEffort registry has 6 values, but a specific model may only support ['low', 'medium', 'high']. */ enumValues?: readonly string[]; } diff --git a/src/modules/aix/client/aix.client.ts b/src/modules/aix/client/aix.client.ts index 78fd579ca..9c5cd689c 100644 --- a/src/modules/aix/client/aix.client.ts +++ b/src/modules/aix/client/aix.client.ts @@ -50,14 +50,13 @@ export function aixCreateModelFromLLMOptions( // destructure input with the overrides const { llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream, - llmEffort, + llmVndAntEffort, llmVndGemEffort, llmVndOaiEffort, llmVndMiscEffort, llmVndAnt1MContext, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiThinkingBudget, // llmVndMoonshotWebSearch, llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration, llmVndOaiCodeInterpreter, llmVndOrtWebSearch, llmVndPerplexityDateFilter, llmVndPerplexitySearchMode, - // xAI llmVndXaiCodeExecution, llmVndXaiSearchInterval, llmVndXaiWebSearch, llmVndXaiXSearch, llmVndXaiXSearchHandles, } = { ...llmOptions, @@ -102,11 +101,15 @@ export function aixCreateModelFromLLMOptions( return stripUndefined({ id: llmRef, acceptsOutputs: acceptsOutputs, - ...(hotfixOmitTemperature ? { temperature: null } : llmTemperature !== undefined ? { temperature: llmTemperature } : {}), - ...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}), - ...(llmTopP !== undefined ? { topP: llmTopP } : {}), - ...(llmEffort ? { effort: llmEffort } : {}), - ...(llmForceNoStream ? { forceNoStream: true } : {}), + temperature: (hotfixOmitTemperature || llmTemperature === null) ? null : llmTemperature, // strippable + maxTokens: llmResponseTokens ?? undefined, // strippable - null: like undefined -> strip -> omit the value + topP: llmTopP, // strippable (likely) + forceNoStream: llmForceNoStream ? true : undefined, // strippable + userGeolocation: userGeolocation, // strippable (likely) + + // Cross-provider unified options + reasoningEffort: llmVndAntEffort ?? llmVndGemEffort ?? llmVndOaiEffort ?? llmVndMiscEffort, // strippable + // Anthropic ...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget === -1 ? 'adaptive' as const : llmVndAntThinkingBudget } : {}), ...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}), @@ -114,6 +117,7 @@ export function aixCreateModelFromLLMOptions( ...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}), ...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}), ...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}), + // Gemini ...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}), ...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}), @@ -126,8 +130,10 @@ export function aixCreateModelFromLLMOptions( ...(llmVndGeminiMediaResolution ? { vndGeminiMediaResolution: llmVndGeminiMediaResolution } : {}), ...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}), // ...(llmVndGeminiUrlContext === 'auto' ? { vndGeminiUrlContext: llmVndGeminiUrlContext } : {}), + // Moonshot // ...(llmVndMoonshotWebSearch === 'auto' ? { vndMoonshotWebSearch: 'auto' } : {}), + // OpenAI ...(llmVndOaiResponsesAPI ? { vndOaiResponsesAPI: true } : {}), ...(llmVndOaiRestoreMarkdown ? { vndOaiRestoreMarkdown: llmVndOaiRestoreMarkdown } : {}), @@ -135,12 +141,14 @@ export function aixCreateModelFromLLMOptions( ...(llmVndOaiWebSearchContext ? { vndOaiWebSearchContext: llmVndOaiWebSearchContext } : {}), ...(llmVndOaiImageGeneration ? { vndOaiImageGeneration: (llmVndOaiImageGeneration as any /* backward comp */) === true ? 'mq' : llmVndOaiImageGeneration } : {}), ...(llmVndOaiCodeInterpreter === 'auto' ? { vndOaiCodeInterpreter: llmVndOaiCodeInterpreter } : {}), + // OpenRouter ...(llmVndOrtWebSearch === 'auto' ? { vndOrtWebSearch: 'auto' } : {}), + // Perplexity ...(llmVndPerplexityDateFilter ? { vndPerplexityDateFilter: llmVndPerplexityDateFilter } : {}), ...(llmVndPerplexitySearchMode ? { vndPerplexitySearchMode: llmVndPerplexitySearchMode } : {}), - ...(userGeolocation ? { userGeolocation } : {}), + // xAI ...(llmVndXaiCodeExecution === 'auto' ? { vndXaiCodeExecution: llmVndXaiCodeExecution } : {}), ...(llmVndXaiSearchInterval ? { vndXaiSearchInterval: llmVndXaiSearchInterval } : {}), diff --git a/src/modules/aix/server/api/aix.wiretypes.ts b/src/modules/aix/server/api/aix.wiretypes.ts index d1f1498de..b4e9aaec0 100644 --- a/src/modules/aix/server/api/aix.wiretypes.ts +++ b/src/modules/aix/server/api/aix.wiretypes.ts @@ -431,8 +431,6 @@ export namespace AixWire_API { topP: z.number().min(0).max(1).optional(), forceNoStream: z.boolean().optional(), - // Cross-vendor Structured Outputs - /** * Constrain model response to a JSON schema for data extraction. Response will be valid JSON. Schema limitations vary by vendor. * Supported: Anthropic (output_format), OpenAI (response_format), Gemini (responseSchema) @@ -449,53 +447,6 @@ export namespace AixWire_API { */ strictToolInvocations: z.boolean().optional(), - // Unified effort parameter (replaces vendor-specific effort params) - effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(), - - // NOTE: kept for backward compatibility during the migration; and they flow into effort - REMOVE for 2.0.5 - vndAntEffort: z.enum(['low', 'medium', 'high', 'max']).optional(), - vndGeminiThinkingLevel: z.enum(['high', 'medium', 'low', 'minimal']).optional(), // new param - vndOaiReasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(), - vndOaiReasoningSummary: z.enum(['none', 'detailed']).optional(), - vndGeminiShowThoughts: z.boolean().optional(), - - // Anthropic - vndAnt1MContext: z.boolean().optional(), - vndAntInfSpeed: z.enum(['fast']).optional(), - vndAntSkills: z.string().optional(), - vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(), - vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant - vndAntWebFetch: z.enum(['auto']).optional(), - vndAntWebSearch: z.enum(['auto']).optional(), - // Gemini - vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(), - vndGeminiCodeExecution: z.enum(['auto']).optional(), - vndGeminiComputerUse: z.enum(['browser']).optional(), - vndGeminiGoogleSearch: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(), - vndGeminiImageSize: z.enum(['1K', '2K', '4K']).optional(), - vndGeminiMediaResolution: z.enum(['mr_high', 'mr_medium', 'mr_low']).optional(), - vndGeminiThinkingBudget: z.number().optional(), // -1 for 'adaptive' - vndGeminiUrlContext: z.enum(['auto']).optional(), - // Moonshot - vndMoonshotWebSearch: z.enum(['auto']).optional(), - // OpenAI - vndOaiCodeInterpreter: z.enum(['off', 'auto']).optional(), - vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(), - vndOaiResponsesAPI: z.boolean().optional(), - vndOaiRestoreMarkdown: z.boolean().optional(), - vndOaiVerbosity: z.enum(['low', 'medium', 'high']).optional(), - vndOaiWebSearchContext: z.enum(['low', 'medium', 'high']).optional(), - // OpenRouter - vndOrtWebSearch: z.enum(['auto']).optional(), - // Perplexity - vndPerplexityDateFilter: z.enum(['unfiltered', '1m', '3m', '6m', '1y']).optional(), - vndPerplexitySearchMode: z.enum(['default', 'academic']).optional(), - // xAI - vndXaiCodeExecution: z.enum(['off', 'auto']).optional(), - vndXaiSearchInterval: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(), - vndXaiWebSearch: z.enum(['off', 'auto']).optional(), - vndXaiXSearch: z.enum(['off', 'auto']).optional(), - vndXaiXSearchHandles: z.string().optional(), /** * [OpenAI, 2025-03-11] This is the generic version of the `web_search_options.user_location` field * This AIX field mimics on purpose: https://platform.openai.com/docs/api-reference/chat/create @@ -506,6 +457,66 @@ export namespace AixWire_API { country: z.string().optional(), // two-letter ISO country code of the user, e.g. US timezone: z.string().optional(), // IANA timezone of the user, e.g. America/Los_Angeles }).optional(), + + + // Cross-provider unified (but with semantic specialization) options + + /** + * Union of all the possible reasoning effort values. Different dispatches will validate the + * domain (subset) of values they support, but the client can send any of them and let the server handle it. + */ + reasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(), + // REMOVE for 2.0.5: we used to have the parameters below - here for doc purposes only - parsing doesn't break if they are set (backward comp) + // vndAntEffort: z.enum(['low', 'medium', 'high', 'max']).optional(), + // vndGeminiThinkingLevel: z.enum(['high', 'medium', 'low', 'minimal']).optional(), // new param + // vndOaiReasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(), + // vndOaiReasoningSummary: z.enum(['none', 'detailed']).optional(), + // vndGeminiShowThoughts: z.boolean().optional(), + + // Anthropic + vndAnt1MContext: z.boolean().optional(), + vndAntInfSpeed: z.enum(['fast']).optional(), + vndAntSkills: z.string().optional(), + vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(), + vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant + vndAntWebFetch: z.enum(['auto']).optional(), + vndAntWebSearch: z.enum(['auto']).optional(), + + // Gemini + vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(), + vndGeminiCodeExecution: z.enum(['auto']).optional(), + vndGeminiComputerUse: z.enum(['browser']).optional(), + vndGeminiGoogleSearch: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(), + vndGeminiImageSize: z.enum(['1K', '2K', '4K']).optional(), + vndGeminiMediaResolution: z.enum(['mr_high', 'mr_medium', 'mr_low']).optional(), + vndGeminiThinkingBudget: z.number().optional(), // -1 for 'adaptive' + vndGeminiUrlContext: z.enum(['auto']).optional(), + + // Moonshot + vndMoonshotWebSearch: z.enum(['auto']).optional(), + + // OpenAI + vndOaiCodeInterpreter: z.enum(['off', 'auto']).optional(), + vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(), + vndOaiResponsesAPI: z.boolean().optional(), + vndOaiRestoreMarkdown: z.boolean().optional(), + vndOaiVerbosity: z.enum(['low', 'medium', 'high']).optional(), + vndOaiWebSearchContext: z.enum(['low', 'medium', 'high']).optional(), + + // OpenRouter + vndOrtWebSearch: z.enum(['auto']).optional(), + + // Perplexity + vndPerplexityDateFilter: z.enum(['unfiltered', '1m', '3m', '6m', '1y']).optional(), + vndPerplexitySearchMode: z.enum(['default', 'academic']).optional(), + + // xAI + vndXaiCodeExecution: z.enum(['off', 'auto']).optional(), + vndXaiSearchInterval: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(), + vndXaiWebSearch: z.enum(['off', 'auto']).optional(), + vndXaiXSearch: z.enum(['off', 'auto']).optional(), + vndXaiXSearchHandles: z.string().optional(), + }); /// Resume Handle diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts index 513a844ac..1d2fd1ca0 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts @@ -171,7 +171,7 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate: } // [Anthropic] Effort parameter [Anthropic, effort-2025-11-24] - const reasoningEffort = model.effort ?? model.vndAntEffort; + const reasoningEffort = model.reasoningEffort; // ?? model.vndAntEffort; if (reasoningEffort) { if (reasoningEffort === 'none' || reasoningEffort === 'minimal' || reasoningEffort === 'xhigh') throw new Error(`Anthropic API does not support '${reasoningEffort}' effort level`); payload.output_config = { diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/gemini.generateContent.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/gemini.generateContent.ts index feafc7cc5..e7ebf1da4 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/gemini.generateContent.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/gemini.generateContent.ts @@ -96,15 +96,15 @@ export function aixToGeminiGenerateContent(model: AixAPI_Model, _chatGenerate: A } // Thinking models: thinking budget and show thoughts - const thinkingLevel = model.effort ?? model.vndGeminiThinkingLevel; + const thinkingLevel = model.reasoningEffort; // ?? model.vndGeminiThinkingLevel; if (thinkingLevel === 'none' || thinkingLevel === 'xhigh' || thinkingLevel === 'max') // domain validation throw new Error(`Gemini API does not support '${thinkingLevel}' thinking level`); - if (thinkingLevel || model.vndGeminiThinkingBudget !== undefined || model.vndGeminiShowThoughts === true) { + if (thinkingLevel || model.vndGeminiThinkingBudget !== undefined /*|| model.vndGeminiShowThoughts === true*/) { const thinkingConfig: Exclude['thinkingConfig'] = {}; // This shows mainly 'summaries' of thoughts, and we enable it for most cases where thinking is requested - if (thinkingLevel || (model.vndGeminiThinkingBudget ?? 0) > 1 || model.vndGeminiShowThoughts === true) + if (thinkingLevel || (model.vndGeminiThinkingBudget ?? 0) > 1 /*|| model.vndGeminiShowThoughts === true*/) thinkingConfig.includeThoughts = true; // [Gemini 3, 2025-11-18] Thinking Level (replaces thinkingBudget for Gemini 3) diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts index 4133b7cf8..2b432cf46 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts @@ -139,11 +139,10 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: } // [OpenAI] Vendor-specific reasoning effort - const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort; + const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort; if (reasoningEffort && openAIDialect !== 'openrouter' // OpenRouter has its own channeling of this - && openAIDialect !== 'moonshot' // MoonShot maps to none->disabled / high->enabled - && openAIDialect !== 'zai' // Z.ai maps like MoonShot + && openAIDialect !== 'deepseek' && openAIDialect !== 'moonshot' && openAIDialect !== 'zai' // MoonShot maps to none->disabled / high->enabled && openAIDialect !== 'perplexity' // Perplexity has its own block below with stricter validation ) { if (reasoningEffort === 'max') // domain validation @@ -153,7 +152,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: // [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now) // [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode - if (reasoningEffort && (openAIDialect === 'moonshot' || openAIDialect === 'zai')) { + if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) { if (reasoningEffort !== 'none' && reasoningEffort !== 'high') // domain validation throw new Error(`${openAIDialect} only supports reasoning effort 'none' or 'high', got '${reasoningEffort}'`); @@ -246,7 +245,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: const isTunneledGemini = model.id.startsWith('google/'); if (isTunneledAnt) { // Effort -> OpenRouter verbosity -> Anthropic upstream output_config.effort - const antEffort = model.effort ?? model.vndAntEffort; + const antEffort = model.reasoningEffort; // ?? model.vndAntEffort; if (antEffort) { if (antEffort === 'none' || antEffort === 'minimal' || antEffort === 'xhigh') // domain validation throw new Error(`OpenRouter->Anthropic API does not support '${antEffort}' reasoning effort`); @@ -261,10 +260,10 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: if (model.vndAntThinkingBudget === 'adaptive') { payload.reasoning = { enabled: true }; delete payload.temperature; - } else if (model.vndAntThinkingBudget) { + } else if (typeof model.vndAntThinkingBudget === 'number') { payload.reasoning = { enabled: true, max_tokens: model.vndAntThinkingBudget }; delete payload.temperature; - } else { + } else /* null or undefined */ { // NOTE: with thinking disabled (null), we can still use temperature, so we don't delete it // see the note on llms.parameters.ts: 'llmVndAntThinkingBudget' } @@ -274,7 +273,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: if (model.vndGeminiThinkingBudget !== undefined) { payload.reasoning = { enabled: true, max_tokens: model.vndGeminiThinkingBudget }; } else { - const gemEffort = model.effort ?? model.vndGeminiThinkingLevel; + const gemEffort = model.reasoningEffort; // ?? model.vndGeminiThinkingLevel; if (gemEffort) { if (gemEffort === 'none' || gemEffort === 'xhigh' || gemEffort === 'max') // domain validation throw new Error(`OpenRouter->Gemini API does not support '${gemEffort}' reasoning effort`); diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts index 900e9a101..e8a563f1c 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts @@ -116,7 +116,7 @@ export function aixToOpenAIResponses( // Reasoning - const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort; + const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort; if (reasoningEffort === 'max') // domain validation throw new Error(`OpenAI Responses API does not support '${reasoningEffort}' reasoning effort`); diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/xai.responsesCreate.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/xai.responsesCreate.ts index 657d390aa..044208741 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/xai.responsesCreate.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/xai.responsesCreate.ts @@ -95,7 +95,7 @@ export function aixToXAIResponses( } // Reasoning - const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort; + const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort; if (reasoningEffort === 'none' || reasoningEffort === 'minimal' || reasoningEffort === 'xhigh' || reasoningEffort === 'max') // domain validation throw new Error(`XAI Responses API does not support reasoning effort '${reasoningEffort}'`); diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index 38c87b24e..b5f647268 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -336,7 +336,7 @@ export namespace OpenAIWire_API_Chat_Completions { // https://openrouter.ai/docs/api/reference/parameters#verbosity verbosity: z.enum([ 'low', 'medium', 'high', - 'max', // [OpenRouter, 2026-02-06] Anthropic-through-openrouter has its llmEffort mapped to 'verbosity' + 'max', // [OpenRouter, 2026-02-06] Anthropic-through-openrouter has its effort mapped to 'verbosity' ]).optional(), // 'max' is Opus 4.6 only // [OpenRouter, 2025-11-11] Unified reasoning parameter for all models reasoning: z.object({ diff --git a/src/modules/llms/models-modal/LLMParametersEditor.tsx b/src/modules/llms/models-modal/LLMParametersEditor.tsx index 240d830d4..659d6ef60 100644 --- a/src/modules/llms/models-modal/LLMParametersEditor.tsx +++ b/src/modules/llms/models-modal/LLMParametersEditor.tsx @@ -21,9 +21,26 @@ import { AnthropicSkillsConfig } from './AnthropicSkillsConfig'; const _UNSPECIFIED = '_UNSPECIFIED' as const; -// unified effort options - descending order (strongest first), filtered per-model by enumValues -const _effortOptions = [ - { value: 'max', label: 'Max', description: 'Deepest reasoning, no constraints' } as const, + +// Vendor-specific effort options - descending order, filtered per-model by enumValues + +const _antEffortOptions = [ + { value: 'max', label: 'Max', description: 'Deepest reasoning' } as const, + { value: 'high', label: 'High', description: 'Maximum capability' } as const, + { value: 'medium', label: 'Medium', description: 'Balanced' } as const, + { value: 'low', label: 'Low', description: 'Most efficient' } as const, + { value: _UNSPECIFIED, label: 'Default', description: 'Default (High)' } as const, +] as const; + +const _gemEffortOptions = [ + { value: 'high', label: 'High', description: 'Maximum reasoning depth' } as const, + { value: 'medium', label: 'Medium', description: 'Balanced reasoning' } as const, + { value: 'low', label: 'Low', description: 'Quick responses' } as const, + { value: 'minimal', label: 'Minimal', description: 'Fastest, least reasoning' } as const, + { value: _UNSPECIFIED, label: 'Default', description: 'Model decides' } as const, +] as const; + +const _oaiEffortOptions = [ { value: 'xhigh', label: 'X-High', description: 'Hardest thinking, best quality' } as const, { value: 'high', label: 'High', description: 'Deep, thorough analysis' } as const, { value: 'medium', label: 'Medium', description: 'Balanced reasoning depth' } as const, @@ -33,6 +50,20 @@ const _effortOptions = [ { value: _UNSPECIFIED, label: 'Default', description: 'Default value (unset)' } as const, ] as const; +const _miscEffortOptions = [ + { value: 'high', label: 'On', description: 'Multi-step reasoning' } as const, + { value: 'none', label: 'Off', description: 'Disable thinking mode' } as const, + { value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const, +] as const; + +export function llmParametersFilterEffortOptions(options: readonly T[], spec: DModelParameterSpecAny | undefined, registryKey: keyof typeof DModelParameterRegistry): T[] | null { + if (!spec) return null; + const registry = DModelParameterRegistry[registryKey]; + const allowedSet = new Set((spec.enumValues as readonly string[] | undefined) ?? ('values' in registry ? registry.values : [])); + return options.filter(o => o.value === _UNSPECIFIED || allowedSet.has(o.value)); +} + + const _verbosityOptions = [ { value: 'high', label: 'Detailed', description: 'Thorough responses, great for audits' } as const, { value: 'medium', label: 'Balanced', description: 'Standard detail level (default)' } as const, @@ -191,13 +222,13 @@ export function LLMParametersEditor(props: { , [props.parameterSpecs]); - // effort options: filtered to model's allowed values, preserving descending order from _effortOptions - const llmEffortSpec = modelParamSpec['llmEffort']; - const effortOptions = React.useMemo(() => { - if (!llmEffortSpec) return null; - const allowedSet = new Set((llmEffortSpec.enumValues as readonly string[] | undefined) ?? DModelParameterRegistry['llmEffort'].values); - return _effortOptions.filter(o => o.value === _UNSPECIFIED || allowedSet.has(o.value)); - }, [llmEffortSpec]); + // effort options: one memo for all vendors, filtered to model's allowed values + const { antEffortOptions, gemEffortOptions, oaiEffortOptions, miscEffortOptions } = React.useMemo(() => ({ + antEffortOptions: llmParametersFilterEffortOptions(_antEffortOptions, modelParamSpec['llmVndAntEffort'], 'llmVndAntEffort'), + gemEffortOptions: llmParametersFilterEffortOptions(_gemEffortOptions, modelParamSpec['llmVndGemEffort'], 'llmVndGemEffort'), + oaiEffortOptions: llmParametersFilterEffortOptions(_oaiEffortOptions, modelParamSpec['llmVndOaiEffort'], 'llmVndOaiEffort'), + miscEffortOptions: llmParametersFilterEffortOptions(_miscEffortOptions, modelParamSpec['llmVndMiscEffort'], 'llmVndMiscEffort'), + }), [modelParamSpec]); // current values: { ...fallback, ...baseline, ...user } @@ -205,21 +236,24 @@ export function LLMParametersEditor(props: { const { llmResponseTokens = LLMImplicitParamersRuntimeFallback.llmResponseTokens, // fallback for undefined, result is number | null llmTemperature, // null: no temperature, number: temperature value, undefined: shall not happen, we treat is similarly to null - llmEffort, llmForceNoStream, llmVndAnt1MContext, + llmVndAntEffort, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, + llmVndGemEffort, llmVndGeminiAspectRatio, llmVndGeminiCodeExecution, llmVndGeminiGoogleSearch, llmVndGeminiImageSize, llmVndGeminiMediaResolution, llmVndGeminiThinkingBudget, + llmVndMiscEffort, // llmVndMoonshotWebSearch, + llmVndOaiEffort, llmVndOaiRestoreMarkdown, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, @@ -229,7 +263,6 @@ export function LLMParametersEditor(props: { llmVndOrtWebSearch, llmVndPerplexityDateFilter, llmVndPerplexitySearchMode, - llmVndXaiCodeExecution, llmVndXaiSearchInterval, llmVndXaiWebSearch, @@ -276,10 +309,10 @@ export function LLMParametersEditor(props: { const gemTBSpec = modelParamSpec['llmVndGeminiThinkingBudget']; const gemTBMinMax = gemTBSpec?.rangeOverride || defGemTB.range; - // check if web search should be disabled - // 2026-02-17: NOTE: formerly we checked for `llmEffort === 'minimal' || llmEffort === 'none'`, but seems to be working now - // Now this seems to be still the case for llmEffort === 'minimal' (gpt 5.0 and before), 5.1/5.2 work even with 'none' - const oaiSkipSearchOnMinimalEffort = llmEffort === 'minimal'; + // check if web search should be disabled (OpenAI-only) + // 2026-02-17: NOTE: formerly we checked for `llmVndOaiEffort === 'minimal' || llmVndOaiEffort === 'none'`, but seems to be working now + // Now this seems to be still the case for llmVndOaiEffort === 'minimal' (gpt 5.0 and before), 5.1/5.2 work even with 'none' + const oaiSkipSearchOnMinimalEffort = llmVndOaiEffort === 'minimal'; return <> @@ -329,6 +362,8 @@ export function LLMParametersEditor(props: { )} + + {/* pre-Effort: Anthropic [thinking budget, effort, ...] */} {antThinkingShown && ( )} - {/* Unified Effort - dynamic options from model spec's enumValues, descending order */} - {showParam('llmEffort') && effortOptions && ( + + {/* Anthropic Effort */} + {showParam('llmVndAntEffort') && antEffortOptions && ( { - if (value === _UNSPECIFIED || !value) onRemoveParameter('llmEffort'); - else onChangeParameter({ llmEffort: value }); + if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndAntEffort'); + else onChangeParameter({ llmVndAntEffort: value }); }} - options={effortOptions} + options={antEffortOptions} /> )} + {/* Gemini Thinking Level */} + {showParam('llmVndGemEffort') && gemEffortOptions && ( + { + if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndGemEffort'); + else onChangeParameter({ llmVndGemEffort: value }); + }} + options={gemEffortOptions} + /> + )} + {/* OpenAI Reasoning Effort */} + {showParam('llmVndOaiEffort') && oaiEffortOptions && ( + { + if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndOaiEffort'); + else onChangeParameter({ llmVndOaiEffort: value }); + }} + options={oaiEffortOptions} + /> + )} + {/* Moonshot/Z.ai Thinking */} + {showParam('llmVndMiscEffort') && miscEffortOptions && ( + { + if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndMiscEffort'); + else onChangeParameter({ llmVndMiscEffort: value }); + }} + options={miscEffortOptions} + /> + )} + {showParam('llmVndAntWebSearch') && ( onChangeParameter({ llmVndGeminiThinkingBudget: Array.isArray(value) ? (value[0] || value[1]) : value })} + startAdornment={gemTBMinMax[0] === 0 && ( + + onChangeParameter({ llmVndGeminiThinkingBudget: 0 })} + sx={{ mr: 2 }} + > + {gemThinkingOff ? : } + + + )} + endAdornment={ + + onRemoveParameter('llmVndGeminiThinkingBudget')} + sx={{ ml: 2 }} + > + + + + } + /> + )} + + {/*{showParam('llmVndGeminiShowThoughts') && (*/} + {/* onChangeParameter({ llmVndGeminiShowThoughts: checked })}*/} + {/* />*/} + {/*)}*/} + {showParam('llmVndGeminiImageSize') && ( )} - - {showParam('llmVndGeminiThinkingBudget') && ( - onChangeParameter({ llmVndGeminiThinkingBudget: Array.isArray(value) ? (value[0] || value[1]) : value })} - startAdornment={gemTBMinMax[0] === 0 && ( - - onChangeParameter({ llmVndGeminiThinkingBudget: 0 })} - sx={{ mr: 2 }} - > - {gemThinkingOff ? : } - - - )} - endAdornment={ - - onRemoveParameter('llmVndGeminiThinkingBudget')} - sx={{ ml: 2 }} - > - - - - } - /> - )} - {showParam('llmVndGeminiCodeExecution') && ( )} - - {showParam('llmVndPerplexitySearchMode') && ( - { - if (value === _UNSPECIFIED || !value) - onRemoveParameter('llmVndPerplexitySearchMode'); - else - onChangeParameter({ llmVndPerplexitySearchMode: value }); - }} - options={_perplexitySearchModeOptions} - /> - )} - - {showParam('llmVndPerplexityDateFilter') && ( - { - if (value === _UNSPECIFIED || !value) - onRemoveParameter('llmVndPerplexityDateFilter'); - else - onChangeParameter({ llmVndPerplexityDateFilter: value }); - }} - options={_perplexityDateFilterOptions} - /> - )} - {showParam('llmVndOaiVerbosity') && ( )} - {showParam('llmForceNoStream') && ( { + if (value === _UNSPECIFIED || !value) + onRemoveParameter('llmVndPerplexitySearchMode'); + else + onChangeParameter({ llmVndPerplexitySearchMode: value }); + }} + options={_perplexitySearchModeOptions} + /> + )} + + {showParam('llmVndPerplexityDateFilter') && ( + { + if (value === _UNSPECIFIED || !value) + onRemoveParameter('llmVndPerplexityDateFilter'); + else + onChangeParameter({ llmVndPerplexityDateFilter: value }); + }} + options={_perplexityDateFilterOptions} + /> + )} + + {showParam('llmVndOrtWebSearch') && ( = new Set([ ] as const); // NOTE: llmVndAntInfSpeed intentionally NOT included - fast mode not available through OpenRouter const _ORT_ANT_PARAM_ALLOWLIST: ReadonlySet = new Set([ - 'llmEffort', // unified effort + 'llmVndAntEffort', // Anthropic effort 'llmVndAntThinkingBudget', ] as const satisfies DModelParameterId[]); diff --git a/src/modules/llms/server/gemini/gemini.models.ts b/src/modules/llms/server/gemini/gemini.models.ts index bdf86fc97..21bbbde6b 100644 --- a/src/modules/llms/server/gemini/gemini.models.ts +++ b/src/modules/llms/server/gemini/gemini.models.ts @@ -167,7 +167,7 @@ const _knownGeminiModels: ({ chatPrice: gemini30ProPricing, interfaces: IF_30, parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['low', 'high']}, + { paramId: 'llmVndGemEffort', enumValues: ['low', 'high']}, { paramId: 'llmVndGeminiMediaResolution' }, { paramId: 'llmVndGeminiCodeExecution' }, { paramId: 'llmVndGeminiGoogleSearch' }, @@ -216,7 +216,7 @@ const _knownGeminiModels: ({ chatPrice: gemini30FlashPricing, interfaces: IF_30, parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high']}, + { paramId: 'llmVndGemEffort', enumValues: ['minimal', 'low', 'medium', 'high']}, { paramId: 'llmVndGeminiMediaResolution' }, { paramId: 'llmVndGeminiCodeExecution' }, { paramId: 'llmVndGeminiGoogleSearch' }, @@ -860,7 +860,7 @@ const _ORT_GEM_IF_ALLOWLIST: ReadonlySet = new Set([ ] as const); const _ORT_GEM_PARAM_ALLOWLIST: ReadonlySet = new Set([ - 'llmVndGeminiThinkingBudget', 'llmEffort', // OR supports Gemini thinking (unified effort) + 'llmVndGeminiThinkingBudget', 'llmVndGemEffort', // OR supports Gemini thinking 'llmVndGeminiAspectRatio', 'llmVndGeminiImageSize', // OR supports Gemini image generation ] as const satisfies DModelParameterId[]); diff --git a/src/modules/llms/server/llm.server.types.ts b/src/modules/llms/server/llm.server.types.ts index 67f8a7f50..16db477bf 100644 --- a/src/modules/llms/server/llm.server.types.ts +++ b/src/modules/llms/server/llm.server.types.ts @@ -76,7 +76,11 @@ const ModelParameterSpec_schema = z.object({ paramId: z.enum([ 'llmTopP', 'llmForceNoStream', - 'llmEffort', // unified effort + // Vendor-specific effort params (converge to unified `effort` wire field) + 'llmVndAntEffort', + 'llmVndGemEffort', + 'llmVndOaiEffort', + 'llmVndMiscEffort', // Anthropic 'llmVndAnt1MContext', 'llmVndAntInfSpeed', diff --git a/src/modules/llms/server/openai/models/moonshot.models.ts b/src/modules/llms/server/openai/models/moonshot.models.ts index 1932bf699..c8594c85b 100644 --- a/src/modules/llms/server/openai/models/moonshot.models.ts +++ b/src/modules/llms/server/openai/models/moonshot.models.ts @@ -20,7 +20,7 @@ const IF_K2_5 = [ ]; const _PS_Reasoning: ModelDescriptionSchema['parameterSpecs'] = [ - { paramId: 'llmEffort', enumValues: ['none', 'high'] }, + { paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] }, ] as const; diff --git a/src/modules/llms/server/openai/models/openai.models.ts b/src/modules/llms/server/openai/models/openai.models.ts index ae1396b55..af7f0b6ff 100644 --- a/src/modules/llms/server/openai/models/openai.models.ts +++ b/src/modules/llms/server/openai/models/openai.models.ts @@ -22,7 +22,7 @@ export const hardcodedOpenAIVariants: ModelVariantMap = { description: 'Supports temperature control for creative applications. GPT-5.2 with reasoning disabled (reasoning_effort=none).', interfaces: [LLM_IF_OAI_Responses, LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], // NO LLM_IF_OAI_Reasoning, NO LLM_IF_HOTFIX_NoTemperature parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'none', hidden: true }, // factory 'none', not changeable + { paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'none', hidden: true }, // factory 'none', not changeable { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, @@ -40,7 +40,7 @@ export const hardcodedOpenAIVariants: ModelVariantMap = { // // customize this param // { paramId: 'llmVndOaiWebSearchContext', initialValue: 'medium', hidden: true }, // Search enabled by default // // copy other params - // { paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, + // { paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, // { paramId: 'llmVndOaiRestoreMarkdown' }, // { paramId: 'llmVndOaiVerbosity' }, // { paramId: 'llmVndOaiImageGeneration' }, @@ -84,7 +84,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'medium' /* our decision: set to medium to have thinking - clones can set to 'none' to have temperature */ }, + { paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'medium' /* our decision: set to medium to have thinking - clones can set to 'none' to have temperature */ }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, @@ -110,7 +110,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ { paramId: 'llmForceNoStream' }, - { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, + { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiWebSearchContext' }, ], chatPrice: { input: 1.75, cache: { cType: 'oai-ac', read: 0.175 }, output: 14 }, @@ -144,7 +144,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 272000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['medium', 'high', 'xhigh'] }, + { paramId: 'llmVndOaiEffort', enumValues: ['medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, @@ -171,7 +171,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, + { paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmVndOaiCodeInterpreter' }, @@ -214,7 +214,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, + { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmForceNoStream' }, ], @@ -230,7 +230,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal) + { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal) { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmForceNoStream' }, ], @@ -246,7 +246,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal) + { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal) { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmForceNoStream' }, ], @@ -266,7 +266,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, + { paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, // gpt-5-class nets have verbosity control { paramId: 'llmVndOaiImageGeneration' }, // image generation capability { paramId: 'llmVndOaiCodeInterpreter' }, // code execution in sandboxed container @@ -321,7 +321,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // works + { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // works { paramId: 'llmVndOaiWebSearchContext' }, // works, although is not triggered often // { paramId: 'llmVndOaiRestoreMarkdown', initialValue: false }, // since this is for code, let the prompt dictate markdown usage rather than us injecting { paramId: 'llmForceNoStream' }, @@ -356,7 +356,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 400000, maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }], chatPrice: { input: 0.25, cache: { cType: 'oai-ac', read: 0.025 }, output: 2 }, benchmark: { cbaElo: 1390 }, // gpt-5-mini-high }, @@ -374,7 +374,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 400000, maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }], chatPrice: { input: 0.05, cache: { cType: 'oai-ac', read: 0.005 }, output: 0.4 }, benchmark: { cbaElo: 1338 }, // gpt-5-nano-high }, @@ -394,7 +394,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 128000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'] }, + { paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmForceNoStream' }, ], @@ -412,7 +412,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 32768, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, + { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmForceNoStream' }, ], // chatPrice: TBD - unknown pricing @@ -447,7 +447,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 200000, maxCompletionTokens: 100000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature], - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }], chatPrice: { input: 1.5, cache: { cType: 'oai-ac', read: 0.375 }, output: 6 }, isLegacy: true, // Deprecated January 16, 2026. }, @@ -480,7 +480,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 200000, maxCompletionTokens: 100000, interfaces: IFS_CHAT_CACHE_REASON, - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }], chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.275 }, output: 4.4 }, benchmark: { cbaElo: 1391 }, // o4-mini-2025-04-16 }, @@ -515,7 +515,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 200000, maxCompletionTokens: 100000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature], - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }], chatPrice: { input: 20, output: 80 }, // benchmark: has not been measured yet }, @@ -533,7 +533,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 200000, maxCompletionTokens: 100000, interfaces: IFS_CHAT_CACHE_REASON, - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmForceNoStream' }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmForceNoStream' }], chatPrice: { input: 2, cache: { cType: 'oai-ac', read: 0.5 }, output: 8 }, benchmark: { cbaElo: 1433 }, // o3-2025-04-16 }, @@ -551,7 +551,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 200000, maxCompletionTokens: 100000, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_StripImages], - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }], chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.55 }, output: 4.4 }, benchmark: { cbaElo: 1348 }, // o3-mini }, @@ -570,7 +570,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 200000, maxCompletionTokens: 100000, interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature], - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmForceNoStream' }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmForceNoStream' }], chatPrice: { input: 150, output: 600 }, // benchmark: has not been measured yet by third parties }, @@ -588,7 +588,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 200000, maxCompletionTokens: 100000, interfaces: IFS_CHAT_CACHE_REASON, - parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiRestoreMarkdown' }], + parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiRestoreMarkdown' }], chatPrice: { input: 15, cache: { cType: 'oai-ac', read: 7.5 }, output: 60 }, benchmark: { cbaElo: 1402 }, // o1-2024-12-17 }, @@ -1232,7 +1232,7 @@ const _ORT_OAI_IF_ALLOWLIST: ReadonlySet = new Set([ LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, ] as const); const _ORT_OAI_PARAM_ALLOWLIST: ReadonlySet = new Set([ - 'llmEffort', // unified reasoning effort + 'llmVndOaiEffort', // OpenAI reasoning effort 'llmVndOaiVerbosity', // verbosity // 'llmVndOaiImageGeneration', // OR does NOT support image gen with OAI yet (2026-02-06) ] as const satisfies DModelParameterId[]); diff --git a/src/modules/llms/server/openai/models/openrouter.models.ts b/src/modules/llms/server/openai/models/openrouter.models.ts index b155780f9..904e613fd 100644 --- a/src/modules/llms/server/openai/models/openrouter.models.ts +++ b/src/modules/llms/server/openai/models/openrouter.models.ts @@ -191,8 +191,8 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndAntThinkingBudget')) { DEV_DEBUG_OPENROUTER_MODELS && console.log(`[DEV] openRouterModelToModelDescription: unexpected ${antLookup ? 'KNOWN' : 'unknown'} Anthropic reasoning model:`, model.id); parameterSpecs.push({ paramId: 'llmVndAntThinkingBudget' }); // configurable thinking budget - if (!parameterSpecs.some(p => p.paramId === 'llmEffort')) - parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'max'] }); // use the latest known Anthropic effort levels superset + if (!parameterSpecs.some(p => p.paramId === 'llmVndAntEffort')) + parameterSpecs.push({ paramId: 'llmVndAntEffort' }); // use the latest known Anthropic effort levels superset } break; @@ -204,9 +204,9 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr console.log('[DEV] openRouterModelToModelDescription: unknown Gemini model:', model.id); // 0-day: reasoning models get default thinking budget if not inherited - if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndGeminiThinkingBudget' || p.paramId === 'llmEffort')) { + if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndGeminiThinkingBudget' || p.paramId === 'llmVndGemEffort')) { // DEV_DEBUG_OPENROUTER_MODELS && console.log(`[DEV] openRouterModelToModelDescription: tagging ${gemLookup ? 'KNOWN' : 'unknown'} Gemini reasoning model:`, model.id); - parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }); // use the latest known Gemini effort (thinking) levels superset + parameterSpecs.push({ paramId: 'llmVndGemEffort' }); // use the latest known Gemini effort (thinking) levels superset // parameterSpecs.push({ paramId: 'llmVndGeminiThinkingBudget' }); // fallback with default range } @@ -229,17 +229,17 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr console.log('[DEV] openRouterModelToModelDescription: unknown OpenAI model:', model.id); // 0-day: reasoning models get default 3-level effort if not inherited - if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmEffort')) { + if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndOaiEffort')) { // console.log('[DEV] openRouterModelToModelDescription: unexpected OpenAI reasoning model:', model.id); - parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'] }); // latest known OpenAI effort levels superset + parameterSpecs.push({ paramId: 'llmVndOaiEffort' }); // latest known OpenAI effort levels superset } break; case model.id.startsWith('x-ai/') || model.id.startsWith('moonshotai/') || model.id.startsWith('z-ai/') || model.id.startsWith('deepseek/'): - // 0-day: xAI/Grok models get default reasoning effort if not inherited - if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmEffort')) { + // 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited + if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) { // console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id); - parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }); // latest known xAI effort levels superset + parameterSpecs.push({ paramId: 'llmVndMiscEffort' }); // binary thinking for these vendors } break; diff --git a/src/modules/llms/server/openai/models/perplexity.models.ts b/src/modules/llms/server/openai/models/perplexity.models.ts index 2ef363ba5..9b5a613b3 100644 --- a/src/modules/llms/server/openai/models/perplexity.models.ts +++ b/src/modules/llms/server/openai/models/perplexity.models.ts @@ -1,7 +1,7 @@ import type { ModelDescriptionSchema } from '../../llm.server.types'; import { createVariantInjector, ModelVariantMap } from '../../llm.server.variants'; -import { LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types'; +import { LLM_IF_HOTFIX_NoStream, LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types'; // configuration @@ -16,13 +16,16 @@ const _hardcodedPerplexityVariants: ModelVariantMap = !PERPLEXITY_ENABLE_VARIANT idVariant: 'academic', label: 'Sonar Deep Research (Academic)', description: 'Expert-level research model with academic sources only. Searches scholarly databases, peer-reviewed papers, and academic publications. 128k context.', + interfaces: [ + LLM_IF_HOTFIX_NoStream, // seems to be required for medium/academic + LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, + ], parameterSpecs: [ // Fixed parameters for academic search { paramId: 'llmVndOaiWebSearchContext', initialValue: 'medium', hidden: true }, { paramId: 'llmVndPerplexitySearchMode', initialValue: 'academic', hidden: true }, - { paramId: 'llmForceNoStream', initialValue: true, hidden: true }, // Free parameters - // { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'], initialValue: 'medium' }, + // { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'], initialValue: 'medium' }, { paramId: 'llmVndPerplexityDateFilter' }, ], }, @@ -40,7 +43,7 @@ const _knownPerplexityChatModels: ModelDescriptionSchema[] = [ contextWindow: 128000, interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning], parameterSpecs: [ - { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, + { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext', initialValue: 'low' }, // REUSE! { paramId: 'llmVndPerplexitySearchMode' }, { paramId: 'llmVndPerplexityDateFilter' }, diff --git a/src/modules/llms/server/openai/models/xai.models.ts b/src/modules/llms/server/openai/models/xai.models.ts index 3a47c8846..ef7058cfb 100644 --- a/src/modules/llms/server/openai/models/xai.models.ts +++ b/src/modules/llms/server/openai/models/xai.models.ts @@ -152,7 +152,10 @@ const _knownXAIChatModels: ManualMappings = [ contextWindow: 131072, maxCompletionTokens: undefined, interfaces: [...XAI_IF_Pre4, LLM_IF_OAI_Reasoning], - parameterSpecs: XAI_PAR_Pre4, + parameterSpecs: [ + { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, + ...XAI_PAR_Pre4, + ], chatPrice: { input: 0.3, output: 0.5, cache: { cType: 'oai-ac', read: 0.075 } }, benchmark: { cbaElo: 1357 }, // grok-3-mini-beta }, diff --git a/src/modules/llms/server/openai/models/zai.models.ts b/src/modules/llms/server/openai/models/zai.models.ts index 29980e116..592853564 100644 --- a/src/modules/llms/server/openai/models/zai.models.ts +++ b/src/modules/llms/server/openai/models/zai.models.ts @@ -17,7 +17,7 @@ const _IF_Vision_Reasoning = [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, L // - Z.ai thinking maps from effort: 'none' → disabled, anything else → enabled // - Z.ai only supports binary enabled/disabled, so we expose 'none' and 'high' const _PS_Reasoning: ModelDescriptionSchema['parameterSpecs'] = [ - { paramId: 'llmEffort', enumValues: ['none', 'high'] }, + { paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] }, ] as const; diff --git a/tools/develop/llm-parameter-sweep/sweep.ts b/tools/develop/llm-parameter-sweep/sweep.ts index c306b6db6..3f6513139 100644 --- a/tools/develop/llm-parameter-sweep/sweep.ts +++ b/tools/develop/llm-parameter-sweep/sweep.ts @@ -42,8 +42,8 @@ const SWEEP_DEFINITIONS = [ name: 'oai-reasoning-effort', description: 'OpenAI reasoning_effort values', applicability: { type: 'dialects', dialects: ['openai', 'azure', 'openrouter'] }, - applyToModel: (value) => ({ effort: value }), - values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh' /*, 'max'*/ /* OpenRouter-only? */] satisfies AixAPI_Model['effort'][], + applyToModel: (value) => ({ reasoningEffort: value }), + values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh' /*, 'max'*/ /* OpenRouter-only? */] satisfies AixAPI_Model['reasoningEffort'][], neuteredValues: ['medium'], // medium is the default, so only-medium means no real support mode: 'enumerate', }), @@ -85,8 +85,8 @@ const SWEEP_DEFINITIONS = [ name: 'ant-effort', description: 'Anthropic output_config.effort values', applicability: { type: 'dialects', dialects: ['anthropic'] }, - applyToModel: (value) => ({ effort: value }), - values: ['low', 'medium', 'high', 'max'] satisfies AixAPI_Model['effort'][], + applyToModel: (value) => ({ reasoningEffort: value }), + values: ['low', 'medium', 'high', 'max'] satisfies AixAPI_Model['reasoningEffort'][], mode: 'enumerate', }), @@ -112,8 +112,8 @@ const SWEEP_DEFINITIONS = [ name: 'gemini-thinking-level', description: 'Gemini thinkingConfig.thinkingLevel values', applicability: { type: 'dialects', dialects: ['gemini'] }, - applyToModel: (value) => value ? { effort: value } : {}, // null = dynamic mode, don't set level - values: ['minimal', 'low', 'medium', 'high'] satisfies (AixAPI_Model['effort'] | null)[], + applyToModel: (value) => value ? { reasoningEffort: value } : {}, // null = dynamic mode, don't set level + values: ['minimal', 'low', 'medium', 'high'] satisfies (AixAPI_Model['reasoningEffort'] | null)[], mode: 'enumerate', }), @@ -138,8 +138,8 @@ const SWEEP_DEFINITIONS = [ name: 'xai-reasoning-effort', description: 'xAI reasoning.effort values', applicability: { type: 'dialects', dialects: ['xai'] }, - applyToModel: (value) => ({ effort: value }), - values: ['low', 'medium', 'high'] satisfies AixAPI_Model['effort'][], + applyToModel: (value) => ({ reasoningEffort: value }), + values: ['low', 'medium', 'high'] satisfies AixAPI_Model['reasoningEffort'][], mode: 'enumerate', }),