LLM Effort: split definition for UI namings with unified backend. #940

This commit is contained in:
Enrico Ros
2026-02-18 14:38:14 -08:00
parent 3758612ed6
commit 3aa9a71a4b
21 changed files with 393 additions and 264 deletions
+11 -11
View File
@@ -38,17 +38,17 @@ Example parameter mapping. Note that new parameters may have been added to both
The objective of the sweep is to hint at model definition values, but the model definitions are what matters for Big-AGI,
and need to be carefully updated, otherwise thousands of clients may break.
| Dialect | Sweep Key | Model paramId |
|---------|-----------|---------------|
| OpenAI | `oai-reasoning-effort` | `llmEffort` |
| OpenAI | `oai-verbosity` | `llmVndOaiVerbosity` |
| OpenAI | `oai-image-generation` | `llmVndOaiImageGeneration` |
| OpenAI | `oai-web-search` | `llmVndOaiWebSearchContext` |
| Anthropic | `ant-effort` | `llmEffort` |
| Anthropic | `ant-thinking-budget` | `llmVndAntThinkingBudget` |
| Gemini | `gemini-thinking-level` | `llmEffort` |
| Gemini | `gemini-thinking-budget` | `llmVndGeminiThinkingBudget` |
| xAI | `xai-web-search` | `llmVndXaiWebSearch` |
| Dialect | Sweep Key | Model paramId |
|-----------|--------------------------|------------------------------|
| OpenAI | `oai-reasoning-effort` | `llmVndOaiEffort` |
| OpenAI | `oai-verbosity` | `llmVndOaiVerbosity` |
| OpenAI | `oai-image-generation` | `llmVndOaiImageGeneration` |
| OpenAI | `oai-web-search` | `llmVndOaiWebSearchContext` |
| Anthropic | `ant-effort` | `llmVndAntEffort` |
| Anthropic | `ant-thinking-budget` | `llmVndAntThinkingBudget` |
| Gemini | `gemini-thinking-level` | `llmVndGemEffort` |
| Gemini | `gemini-thinking-budget` | `llmVndGeminiThinkingBudget` |
| xAI | `xai-web-search` | `llmVndXaiWebSearch` |
## Output
+45 -29
View File
@@ -83,8 +83,7 @@ function _enumDef<const V extends string>(def: _EnumParamDef<V>): _EnumParamDef<
export const DModelParameterRegistry = {
/// Common 'implicit' parameters, available to all models ///
// Note: we still use pre-v2 names for compatibility and ease of migration
// -- Common 'implicit' parameters, available to all models --
llmRef: {
label: 'Model ID',
@@ -114,7 +113,7 @@ export const DModelParameterRegistry = {
// due to implicit, when undefined we apply the runtime fallback
},
/// Extended parameters, specific to certain models/vendors
// -- Extended parameters, specific to certain models/vendors --
llmTopP: {
label: 'Top P',
@@ -124,30 +123,6 @@ export const DModelParameterRegistry = {
// when undefined is omitted from the requests (default)
},
/**
* Unified 'reasoning' effort parameter for all vendors. The full superset of all possible effort levels.
* Each model declares its own subset via `enumValues` in its parameterSpec.
*
* Mapping to vendor-native values is done in adapters (the only place with vendor knowledge):
* - Anthropic: output_config.effort
* - OpenAI: reasoning_effort (ChatCompletions) / reasoning.effort (Responses)
* - Gemini: thinkingConfig.thinkingLevel (depending on model: low/high, minimal/low/medium/high, ...)
* - Moonshot/ZAI: thinking.type (none->disabled, high->enabled)
* - Perplexity: reasoning_effort
* - etc.
*/
llmEffort: _enumDef({
label: 'Reasoning Effort',
type: 'enum',
description: 'Controls reasoning depth and effort level.',
values: [
// all values (max includes) sorted in ascending order of effort
'none', 'minimal', 'low', 'medium', 'high', 'xhigh', // OpenAI/common
'max', // Anthropic only, for now
],
// undefined means vendor default (usually high or medium, could be different such as none)
}),
/**
* First introduced as a user-configurable parameter for the 'Verification' required by o3.
* [2025-04-16] Adding parameter to disable streaming for o3, and possibly more models.
@@ -165,6 +140,46 @@ export const DModelParameterRegistry = {
},
// -- 'Effort' unified semantic specialization --
/**
* Vendor-specific effort parameters. Each vendor has its own effort param with vendor-contextual
* labels and descriptions. Models declare their subset via `enumValues` in parameterSpec.
* All converge to the unified `effort` wire field in aix.client.ts.
*/
llmVndAntEffort: _enumDef({
label: 'Effort',
type: 'enum',
description: 'Controls reasoning depth. Works alongside thinking budget.',
values: ['low', 'medium', 'high', 'max'],
// undefined means high effort (default)
}),
llmVndGemEffort: _enumDef({
label: 'Thinking Level',
type: 'enum',
description: 'Controls internal reasoning depth. When unset, the model decides dynamically.',
values: ['minimal', 'low', 'medium', 'high'],
// undefined means dynamic (model decides)
}),
llmVndOaiEffort: _enumDef({
label: 'Reasoning Effort',
type: 'enum',
description: 'Controls how much effort the model spends on reasoning.',
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'],
// undefined means vendor default
}),
llmVndMiscEffort: _enumDef({
label: 'Thinking',
type: 'enum',
description: 'Enable or disable extended thinking mode.',
values: ['none', 'high'],
// undefined means vendor default (usually 'high', i.e. thinking enabled)
}),
// Anthropic-specific
llmVndAnt1MContext: {
@@ -192,7 +207,8 @@ export const DModelParameterRegistry = {
},
/**
* NOTE: this is being phased out with Opus 4.6 in favor of llmEffort ('low', 'medium', 'high', 'max')
* NOTE: this is being phased out with Opus 4.6 in favor of llmVndAntEffort, while this is implicitly
* adaptive if missing (as-if we had our custom sentinel value of -1).
*
* Important: when this is set to anything other than nullish, it enables Adaptive(-1)/Extended(int > 1024) thinking,
* and as a side effect **disables the temperature** in the requests (even when tunneled through OpenRouter). So this
@@ -504,7 +520,7 @@ interface DModelParameterSpec<T extends DModelParameterId> {
/**
* (optional) For enum params: restrict which values from the registry are allowed for this model.
* The UI will only show these values. Analogous to rangeOverride for numeric params.
* Example: llmEffort registry has 7 values, but a specific model may only support ['low', 'medium', 'high'].
* Example: llmVndOaiEffort registry has 6 values, but a specific model may only support ['low', 'medium', 'high'].
*/
enumValues?: readonly string[];
}
+16 -8
View File
@@ -50,14 +50,13 @@ export function aixCreateModelFromLLMOptions(
// destructure input with the overrides
const {
llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream,
llmEffort,
llmVndAntEffort, llmVndGemEffort, llmVndOaiEffort, llmVndMiscEffort,
llmVndAnt1MContext, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch,
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiThinkingBudget,
// llmVndMoonshotWebSearch,
llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration, llmVndOaiCodeInterpreter,
llmVndOrtWebSearch,
llmVndPerplexityDateFilter, llmVndPerplexitySearchMode,
// xAI
llmVndXaiCodeExecution, llmVndXaiSearchInterval, llmVndXaiWebSearch, llmVndXaiXSearch, llmVndXaiXSearchHandles,
} = {
...llmOptions,
@@ -102,11 +101,15 @@ export function aixCreateModelFromLLMOptions(
return stripUndefined({
id: llmRef,
acceptsOutputs: acceptsOutputs,
...(hotfixOmitTemperature ? { temperature: null } : llmTemperature !== undefined ? { temperature: llmTemperature } : {}),
...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
...(llmTopP !== undefined ? { topP: llmTopP } : {}),
...(llmEffort ? { effort: llmEffort } : {}),
...(llmForceNoStream ? { forceNoStream: true } : {}),
temperature: (hotfixOmitTemperature || llmTemperature === null) ? null : llmTemperature, // strippable
maxTokens: llmResponseTokens ?? undefined, // strippable - null: like undefined -> strip -> omit the value
topP: llmTopP, // strippable (likely)
forceNoStream: llmForceNoStream ? true : undefined, // strippable
userGeolocation: userGeolocation, // strippable (likely)
// Cross-provider unified options
reasoningEffort: llmVndAntEffort ?? llmVndGemEffort ?? llmVndOaiEffort ?? llmVndMiscEffort, // strippable
// Anthropic
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget === -1 ? 'adaptive' as const : llmVndAntThinkingBudget } : {}),
...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}),
@@ -114,6 +117,7 @@ export function aixCreateModelFromLLMOptions(
...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}),
...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}),
// Gemini
...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}),
...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}),
@@ -126,8 +130,10 @@ export function aixCreateModelFromLLMOptions(
...(llmVndGeminiMediaResolution ? { vndGeminiMediaResolution: llmVndGeminiMediaResolution } : {}),
...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}),
// ...(llmVndGeminiUrlContext === 'auto' ? { vndGeminiUrlContext: llmVndGeminiUrlContext } : {}),
// Moonshot
// ...(llmVndMoonshotWebSearch === 'auto' ? { vndMoonshotWebSearch: 'auto' } : {}),
// OpenAI
...(llmVndOaiResponsesAPI ? { vndOaiResponsesAPI: true } : {}),
...(llmVndOaiRestoreMarkdown ? { vndOaiRestoreMarkdown: llmVndOaiRestoreMarkdown } : {}),
@@ -135,12 +141,14 @@ export function aixCreateModelFromLLMOptions(
...(llmVndOaiWebSearchContext ? { vndOaiWebSearchContext: llmVndOaiWebSearchContext } : {}),
...(llmVndOaiImageGeneration ? { vndOaiImageGeneration: (llmVndOaiImageGeneration as any /* backward comp */) === true ? 'mq' : llmVndOaiImageGeneration } : {}),
...(llmVndOaiCodeInterpreter === 'auto' ? { vndOaiCodeInterpreter: llmVndOaiCodeInterpreter } : {}),
// OpenRouter
...(llmVndOrtWebSearch === 'auto' ? { vndOrtWebSearch: 'auto' } : {}),
// Perplexity
...(llmVndPerplexityDateFilter ? { vndPerplexityDateFilter: llmVndPerplexityDateFilter } : {}),
...(llmVndPerplexitySearchMode ? { vndPerplexitySearchMode: llmVndPerplexitySearchMode } : {}),
...(userGeolocation ? { userGeolocation } : {}),
// xAI
...(llmVndXaiCodeExecution === 'auto' ? { vndXaiCodeExecution: llmVndXaiCodeExecution } : {}),
...(llmVndXaiSearchInterval ? { vndXaiSearchInterval: llmVndXaiSearchInterval } : {}),
+60 -49
View File
@@ -431,8 +431,6 @@ export namespace AixWire_API {
topP: z.number().min(0).max(1).optional(),
forceNoStream: z.boolean().optional(),
// Cross-vendor Structured Outputs
/**
* Constrain model response to a JSON schema for data extraction. Response will be valid JSON. Schema limitations vary by vendor.
* Supported: Anthropic (output_format), OpenAI (response_format), Gemini (responseSchema)
@@ -449,53 +447,6 @@ export namespace AixWire_API {
*/
strictToolInvocations: z.boolean().optional(),
// Unified effort parameter (replaces vendor-specific effort params)
effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(),
// NOTE: kept for backward compatibility during the migration; and they flow into effort - REMOVE for 2.0.5
vndAntEffort: z.enum(['low', 'medium', 'high', 'max']).optional(),
vndGeminiThinkingLevel: z.enum(['high', 'medium', 'low', 'minimal']).optional(), // new param
vndOaiReasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(),
vndOaiReasoningSummary: z.enum(['none', 'detailed']).optional(),
vndGeminiShowThoughts: z.boolean().optional(),
// Anthropic
vndAnt1MContext: z.boolean().optional(),
vndAntInfSpeed: z.enum(['fast']).optional(),
vndAntSkills: z.string().optional(),
vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
vndAntWebFetch: z.enum(['auto']).optional(),
vndAntWebSearch: z.enum(['auto']).optional(),
// Gemini
vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(),
vndGeminiCodeExecution: z.enum(['auto']).optional(),
vndGeminiComputerUse: z.enum(['browser']).optional(),
vndGeminiGoogleSearch: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(),
vndGeminiImageSize: z.enum(['1K', '2K', '4K']).optional(),
vndGeminiMediaResolution: z.enum(['mr_high', 'mr_medium', 'mr_low']).optional(),
vndGeminiThinkingBudget: z.number().optional(), // -1 for 'adaptive'
vndGeminiUrlContext: z.enum(['auto']).optional(),
// Moonshot
vndMoonshotWebSearch: z.enum(['auto']).optional(),
// OpenAI
vndOaiCodeInterpreter: z.enum(['off', 'auto']).optional(),
vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(),
vndOaiResponsesAPI: z.boolean().optional(),
vndOaiRestoreMarkdown: z.boolean().optional(),
vndOaiVerbosity: z.enum(['low', 'medium', 'high']).optional(),
vndOaiWebSearchContext: z.enum(['low', 'medium', 'high']).optional(),
// OpenRouter
vndOrtWebSearch: z.enum(['auto']).optional(),
// Perplexity
vndPerplexityDateFilter: z.enum(['unfiltered', '1m', '3m', '6m', '1y']).optional(),
vndPerplexitySearchMode: z.enum(['default', 'academic']).optional(),
// xAI
vndXaiCodeExecution: z.enum(['off', 'auto']).optional(),
vndXaiSearchInterval: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(),
vndXaiWebSearch: z.enum(['off', 'auto']).optional(),
vndXaiXSearch: z.enum(['off', 'auto']).optional(),
vndXaiXSearchHandles: z.string().optional(),
/**
* [OpenAI, 2025-03-11] This is the generic version of the `web_search_options.user_location` field
* This AIX field mimics on purpose: https://platform.openai.com/docs/api-reference/chat/create
@@ -506,6 +457,66 @@ export namespace AixWire_API {
country: z.string().optional(), // two-letter ISO country code of the user, e.g. US
timezone: z.string().optional(), // IANA timezone of the user, e.g. America/Los_Angeles
}).optional(),
// Cross-provider unified (but with semantic specialization) options
/**
* Union of all the possible reasoning effort values. Different dispatches will validate the
* domain (subset) of values they support, but the client can send any of them and let the server handle it.
*/
reasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(),
// REMOVE for 2.0.5: we used to have the parameters below - here for doc purposes only - parsing doesn't break if they are set (backward comp)
// vndAntEffort: z.enum(['low', 'medium', 'high', 'max']).optional(),
// vndGeminiThinkingLevel: z.enum(['high', 'medium', 'low', 'minimal']).optional(), // new param
// vndOaiReasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(),
// vndOaiReasoningSummary: z.enum(['none', 'detailed']).optional(),
// vndGeminiShowThoughts: z.boolean().optional(),
// Anthropic
vndAnt1MContext: z.boolean().optional(),
vndAntInfSpeed: z.enum(['fast']).optional(),
vndAntSkills: z.string().optional(),
vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
vndAntWebFetch: z.enum(['auto']).optional(),
vndAntWebSearch: z.enum(['auto']).optional(),
// Gemini
vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(),
vndGeminiCodeExecution: z.enum(['auto']).optional(),
vndGeminiComputerUse: z.enum(['browser']).optional(),
vndGeminiGoogleSearch: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(),
vndGeminiImageSize: z.enum(['1K', '2K', '4K']).optional(),
vndGeminiMediaResolution: z.enum(['mr_high', 'mr_medium', 'mr_low']).optional(),
vndGeminiThinkingBudget: z.number().optional(), // -1 for 'adaptive'
vndGeminiUrlContext: z.enum(['auto']).optional(),
// Moonshot
vndMoonshotWebSearch: z.enum(['auto']).optional(),
// OpenAI
vndOaiCodeInterpreter: z.enum(['off', 'auto']).optional(),
vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(),
vndOaiResponsesAPI: z.boolean().optional(),
vndOaiRestoreMarkdown: z.boolean().optional(),
vndOaiVerbosity: z.enum(['low', 'medium', 'high']).optional(),
vndOaiWebSearchContext: z.enum(['low', 'medium', 'high']).optional(),
// OpenRouter
vndOrtWebSearch: z.enum(['auto']).optional(),
// Perplexity
vndPerplexityDateFilter: z.enum(['unfiltered', '1m', '3m', '6m', '1y']).optional(),
vndPerplexitySearchMode: z.enum(['default', 'academic']).optional(),
// xAI
vndXaiCodeExecution: z.enum(['off', 'auto']).optional(),
vndXaiSearchInterval: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(),
vndXaiWebSearch: z.enum(['off', 'auto']).optional(),
vndXaiXSearch: z.enum(['off', 'auto']).optional(),
vndXaiXSearchHandles: z.string().optional(),
});
/// Resume Handle
@@ -171,7 +171,7 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
}
// [Anthropic] Effort parameter [Anthropic, effort-2025-11-24]
const reasoningEffort = model.effort ?? model.vndAntEffort;
const reasoningEffort = model.reasoningEffort; // ?? model.vndAntEffort;
if (reasoningEffort) {
if (reasoningEffort === 'none' || reasoningEffort === 'minimal' || reasoningEffort === 'xhigh') throw new Error(`Anthropic API does not support '${reasoningEffort}' effort level`);
payload.output_config = {
@@ -96,15 +96,15 @@ export function aixToGeminiGenerateContent(model: AixAPI_Model, _chatGenerate: A
}
// Thinking models: thinking budget and show thoughts
const thinkingLevel = model.effort ?? model.vndGeminiThinkingLevel;
const thinkingLevel = model.reasoningEffort; // ?? model.vndGeminiThinkingLevel;
if (thinkingLevel === 'none' || thinkingLevel === 'xhigh' || thinkingLevel === 'max') // domain validation
throw new Error(`Gemini API does not support '${thinkingLevel}' thinking level`);
if (thinkingLevel || model.vndGeminiThinkingBudget !== undefined || model.vndGeminiShowThoughts === true) {
if (thinkingLevel || model.vndGeminiThinkingBudget !== undefined /*|| model.vndGeminiShowThoughts === true*/) {
const thinkingConfig: Exclude<TRequest['generationConfig'], undefined>['thinkingConfig'] = {};
// This shows mainly 'summaries' of thoughts, and we enable it for most cases where thinking is requested
if (thinkingLevel || (model.vndGeminiThinkingBudget ?? 0) > 1 || model.vndGeminiShowThoughts === true)
if (thinkingLevel || (model.vndGeminiThinkingBudget ?? 0) > 1 /*|| model.vndGeminiShowThoughts === true*/)
thinkingConfig.includeThoughts = true;
// [Gemini 3, 2025-11-18] Thinking Level (replaces thinkingBudget for Gemini 3)
@@ -139,11 +139,10 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
}
// [OpenAI] Vendor-specific reasoning effort
const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort;
const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort;
if (reasoningEffort
&& openAIDialect !== 'openrouter' // OpenRouter has its own channeling of this
&& openAIDialect !== 'moonshot' // MoonShot maps to none->disabled / high->enabled
&& openAIDialect !== 'zai' // Z.ai maps like MoonShot
&& openAIDialect !== 'deepseek' && openAIDialect !== 'moonshot' && openAIDialect !== 'zai' // MoonShot maps to none->disabled / high->enabled
&& openAIDialect !== 'perplexity' // Perplexity has its own block below with stricter validation
) {
if (reasoningEffort === 'max') // domain validation
@@ -153,7 +152,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
// [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now)
// [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode
if (reasoningEffort && (openAIDialect === 'moonshot' || openAIDialect === 'zai')) {
if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) {
if (reasoningEffort !== 'none' && reasoningEffort !== 'high') // domain validation
throw new Error(`${openAIDialect} only supports reasoning effort 'none' or 'high', got '${reasoningEffort}'`);
@@ -246,7 +245,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
const isTunneledGemini = model.id.startsWith('google/');
if (isTunneledAnt) {
// Effort -> OpenRouter verbosity -> Anthropic upstream output_config.effort
const antEffort = model.effort ?? model.vndAntEffort;
const antEffort = model.reasoningEffort; // ?? model.vndAntEffort;
if (antEffort) {
if (antEffort === 'none' || antEffort === 'minimal' || antEffort === 'xhigh') // domain validation
throw new Error(`OpenRouter->Anthropic API does not support '${antEffort}' reasoning effort`);
@@ -261,10 +260,10 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
if (model.vndAntThinkingBudget === 'adaptive') {
payload.reasoning = { enabled: true };
delete payload.temperature;
} else if (model.vndAntThinkingBudget) {
} else if (typeof model.vndAntThinkingBudget === 'number') {
payload.reasoning = { enabled: true, max_tokens: model.vndAntThinkingBudget };
delete payload.temperature;
} else {
} else /* null or undefined */ {
// NOTE: with thinking disabled (null), we can still use temperature, so we don't delete it
// see the note on llms.parameters.ts: 'llmVndAntThinkingBudget'
}
@@ -274,7 +273,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
if (model.vndGeminiThinkingBudget !== undefined) {
payload.reasoning = { enabled: true, max_tokens: model.vndGeminiThinkingBudget };
} else {
const gemEffort = model.effort ?? model.vndGeminiThinkingLevel;
const gemEffort = model.reasoningEffort; // ?? model.vndGeminiThinkingLevel;
if (gemEffort) {
if (gemEffort === 'none' || gemEffort === 'xhigh' || gemEffort === 'max') // domain validation
throw new Error(`OpenRouter->Gemini API does not support '${gemEffort}' reasoning effort`);
@@ -116,7 +116,7 @@ export function aixToOpenAIResponses(
// Reasoning
const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort;
const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort;
if (reasoningEffort === 'max') // domain validation
throw new Error(`OpenAI Responses API does not support '${reasoningEffort}' reasoning effort`);
@@ -95,7 +95,7 @@ export function aixToXAIResponses(
}
// Reasoning
const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort;
const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort;
if (reasoningEffort === 'none' || reasoningEffort === 'minimal' || reasoningEffort === 'xhigh' || reasoningEffort === 'max') // domain validation
throw new Error(`XAI Responses API does not support reasoning effort '${reasoningEffort}'`);
@@ -336,7 +336,7 @@ export namespace OpenAIWire_API_Chat_Completions {
// https://openrouter.ai/docs/api/reference/parameters#verbosity
verbosity: z.enum([
'low', 'medium', 'high',
'max', // [OpenRouter, 2026-02-06] Anthropic-through-openrouter has its llmEffort mapped to 'verbosity'
'max', // [OpenRouter, 2026-02-06] Anthropic-through-openrouter has its effort mapped to 'verbosity'
]).optional(), // 'max' is Opus 4.6 only
// [OpenRouter, 2025-11-11] Unified reasoning parameter for all models
reasoning: z.object({
@@ -21,9 +21,26 @@ import { AnthropicSkillsConfig } from './AnthropicSkillsConfig';
const _UNSPECIFIED = '_UNSPECIFIED' as const;
// unified effort options - descending order (strongest first), filtered per-model by enumValues
const _effortOptions = [
{ value: 'max', label: 'Max', description: 'Deepest reasoning, no constraints' } as const,
// Vendor-specific effort options - descending order, filtered per-model by enumValues
const _antEffortOptions = [
{ value: 'max', label: 'Max', description: 'Deepest reasoning' } as const,
{ value: 'high', label: 'High', description: 'Maximum capability' } as const,
{ value: 'medium', label: 'Medium', description: 'Balanced' } as const,
{ value: 'low', label: 'Low', description: 'Most efficient' } as const,
{ value: _UNSPECIFIED, label: 'Default', description: 'Default (High)' } as const,
] as const;
const _gemEffortOptions = [
{ value: 'high', label: 'High', description: 'Maximum reasoning depth' } as const,
{ value: 'medium', label: 'Medium', description: 'Balanced reasoning' } as const,
{ value: 'low', label: 'Low', description: 'Quick responses' } as const,
{ value: 'minimal', label: 'Minimal', description: 'Fastest, least reasoning' } as const,
{ value: _UNSPECIFIED, label: 'Default', description: 'Model decides' } as const,
] as const;
const _oaiEffortOptions = [
{ value: 'xhigh', label: 'X-High', description: 'Hardest thinking, best quality' } as const,
{ value: 'high', label: 'High', description: 'Deep, thorough analysis' } as const,
{ value: 'medium', label: 'Medium', description: 'Balanced reasoning depth' } as const,
@@ -33,6 +50,20 @@ const _effortOptions = [
{ value: _UNSPECIFIED, label: 'Default', description: 'Default value (unset)' } as const,
] as const;
const _miscEffortOptions = [
{ value: 'high', label: 'On', description: 'Multi-step reasoning' } as const,
{ value: 'none', label: 'Off', description: 'Disable thinking mode' } as const,
{ value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const,
] as const;
export function llmParametersFilterEffortOptions<T extends { value: string }>(options: readonly T[], spec: DModelParameterSpecAny | undefined, registryKey: keyof typeof DModelParameterRegistry): T[] | null {
if (!spec) return null;
const registry = DModelParameterRegistry[registryKey];
const allowedSet = new Set((spec.enumValues as readonly string[] | undefined) ?? ('values' in registry ? registry.values : []));
return options.filter(o => o.value === _UNSPECIFIED || allowedSet.has(o.value));
}
const _verbosityOptions = [
{ value: 'high', label: 'Detailed', description: 'Thorough responses, great for audits' } as const,
{ value: 'medium', label: 'Balanced', description: 'Standard detail level (default)' } as const,
@@ -191,13 +222,13 @@ export function LLMParametersEditor(props: {
, [props.parameterSpecs]);
// effort options: filtered to model's allowed values, preserving descending order from _effortOptions
const llmEffortSpec = modelParamSpec['llmEffort'];
const effortOptions = React.useMemo(() => {
if (!llmEffortSpec) return null;
const allowedSet = new Set((llmEffortSpec.enumValues as readonly string[] | undefined) ?? DModelParameterRegistry['llmEffort'].values);
return _effortOptions.filter(o => o.value === _UNSPECIFIED || allowedSet.has(o.value));
}, [llmEffortSpec]);
// effort options: one memo for all vendors, filtered to model's allowed values
const { antEffortOptions, gemEffortOptions, oaiEffortOptions, miscEffortOptions } = React.useMemo(() => ({
antEffortOptions: llmParametersFilterEffortOptions(_antEffortOptions, modelParamSpec['llmVndAntEffort'], 'llmVndAntEffort'),
gemEffortOptions: llmParametersFilterEffortOptions(_gemEffortOptions, modelParamSpec['llmVndGemEffort'], 'llmVndGemEffort'),
oaiEffortOptions: llmParametersFilterEffortOptions(_oaiEffortOptions, modelParamSpec['llmVndOaiEffort'], 'llmVndOaiEffort'),
miscEffortOptions: llmParametersFilterEffortOptions(_miscEffortOptions, modelParamSpec['llmVndMiscEffort'], 'llmVndMiscEffort'),
}), [modelParamSpec]);
// current values: { ...fallback, ...baseline, ...user }
@@ -205,21 +236,24 @@ export function LLMParametersEditor(props: {
const {
llmResponseTokens = LLMImplicitParamersRuntimeFallback.llmResponseTokens, // fallback for undefined, result is number | null
llmTemperature, // null: no temperature, number: temperature value, undefined: shall not happen, we treat is similarly to null
llmEffort,
llmForceNoStream,
llmVndAnt1MContext,
llmVndAntEffort,
llmVndAntInfSpeed,
llmVndAntSkills,
llmVndAntThinkingBudget,
llmVndAntWebFetch,
llmVndAntWebSearch,
llmVndGemEffort,
llmVndGeminiAspectRatio,
llmVndGeminiCodeExecution,
llmVndGeminiGoogleSearch,
llmVndGeminiImageSize,
llmVndGeminiMediaResolution,
llmVndGeminiThinkingBudget,
llmVndMiscEffort,
// llmVndMoonshotWebSearch,
llmVndOaiEffort,
llmVndOaiRestoreMarkdown,
llmVndOaiWebSearchContext,
llmVndOaiWebSearchGeolocation,
@@ -229,7 +263,6 @@ export function LLMParametersEditor(props: {
llmVndOrtWebSearch,
llmVndPerplexityDateFilter,
llmVndPerplexitySearchMode,
llmVndXaiCodeExecution,
llmVndXaiSearchInterval,
llmVndXaiWebSearch,
@@ -276,10 +309,10 @@ export function LLMParametersEditor(props: {
const gemTBSpec = modelParamSpec['llmVndGeminiThinkingBudget'];
const gemTBMinMax = gemTBSpec?.rangeOverride || defGemTB.range;
// check if web search should be disabled
// 2026-02-17: NOTE: formerly we checked for `llmEffort === 'minimal' || llmEffort === 'none'`, but seems to be working now
// Now this seems to be still the case for llmEffort === 'minimal' (gpt 5.0 and before), 5.1/5.2 work even with 'none'
const oaiSkipSearchOnMinimalEffort = llmEffort === 'minimal';
// check if web search should be disabled (OpenAI-only)
// 2026-02-17: NOTE: formerly we checked for `llmVndOaiEffort === 'minimal' || llmVndOaiEffort === 'none'`, but seems to be working now
// Now this seems to be still the case for llmVndOaiEffort === 'minimal' (gpt 5.0 and before), 5.1/5.2 work even with 'none'
const oaiSkipSearchOnMinimalEffort = llmVndOaiEffort === 'minimal';
return <>
@@ -329,6 +362,8 @@ export function LLMParametersEditor(props: {
</Box>
)}
{/* pre-Effort: Anthropic [thinking budget, effort, ...] */}
{antThinkingShown && (
<FormSliderControl
title={antThinkingEnabled ? 'Thinking Budget' : 'Disabled'} ariaLabel='Anthropic Extended Thinking Token Budget'
@@ -355,19 +390,60 @@ export function LLMParametersEditor(props: {
/>
)}
{/* Unified Effort - dynamic options from model spec's enumValues, descending order */}
{showParam('llmEffort') && effortOptions && (
{/* Anthropic Effort */}
{showParam('llmVndAntEffort') && antEffortOptions && (
<FormSelectControl
title='Effort'
tooltip='Controls reasoning depth and effort level'
value={llmEffort ?? _UNSPECIFIED}
tooltip='Controls thinking depth. Max = deepest reasoning with no constraints, High = default. Works alongside thinking budget.'
value={llmVndAntEffort ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmEffort');
else onChangeParameter({ llmEffort: value });
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndAntEffort');
else onChangeParameter({ llmVndAntEffort: value });
}}
options={effortOptions}
options={antEffortOptions}
/>
)}
{/* Gemini Thinking Level */}
{showParam('llmVndGemEffort') && gemEffortOptions && (
<FormSelectControl
title='Thinking Level'
tooltip='Controls internal reasoning depth. When unset, the model decides dynamically.'
value={llmVndGemEffort ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndGemEffort');
else onChangeParameter({ llmVndGemEffort: value });
}}
options={gemEffortOptions}
/>
)}
{/* OpenAI Reasoning Effort */}
{showParam('llmVndOaiEffort') && oaiEffortOptions && (
<FormSelectControl
title='Reasoning Effort'
tooltip='Controls how much effort the model spends on reasoning'
value={llmVndOaiEffort ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndOaiEffort');
else onChangeParameter({ llmVndOaiEffort: value });
}}
options={oaiEffortOptions}
/>
)}
{/* Moonshot/Z.ai Thinking */}
{showParam('llmVndMiscEffort') && miscEffortOptions && (
<FormSelectControl
title='Thinking'
tooltip='Enable or disable extended thinking mode'
value={llmVndMiscEffort ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndMiscEffort');
else onChangeParameter({ llmVndMiscEffort: value });
}}
options={miscEffortOptions}
/>
)}
{showParam('llmVndAntWebSearch') && (
<FormSelectControl
@@ -427,6 +503,54 @@ export function LLMParametersEditor(props: {
)}
{/* Gemini [effort, ... ] */}
{showParam('llmVndGeminiThinkingBudget') && (
<FormSliderControl
title='Thinking Budget' ariaLabel='Gemini Thinking Token Budget'
description={gemThinkingAuto ? 'Auto' : gemThinkingOff ? 'Thinking Off' : 'Tokens'}
min={gemTBMinMax[0]} max={gemTBMinMax[1]} step={1024}
valueLabelDisplay={(gemThinkingAuto || gemThinkingOff) ? 'off' : 'on'}
value={llmVndGeminiThinkingBudget ?? [gemTBMinMax[0], gemTBMinMax[1]]}
variant={gemThinkingAuto ? 'soft' : undefined}
// disabled={gemThinkingAuto}
onChange={value => onChangeParameter({ llmVndGeminiThinkingBudget: Array.isArray(value) ? (value[0] || value[1]) : value })}
startAdornment={gemTBMinMax[0] === 0 && (
<Tooltip arrow disableInteractive title={gemThinkingOff ? 'Thinking Off' : 'Disable Thinking'}>
<IconButton
variant={gemThinkingOff ? 'solid' : 'outlined'}
// disabled={gemThinkingOff}
onClick={() => onChangeParameter({ llmVndGeminiThinkingBudget: 0 })}
sx={{ mr: 2 }}
>
{gemThinkingOff ? <ClearIcon sx={{ fontSize: 'lg' }} /> : <PowerSettingsNewIcon />}
</IconButton>
</Tooltip>
)}
endAdornment={
<Tooltip arrow disableInteractive title={gemThinkingAuto ? 'Automatic Thinking (default)' : 'Auto Budget'}>
<IconButton
variant={gemThinkingAuto ? 'solid' : 'outlined'}
// disabled={gemThinkingAuto}
onClick={() => onRemoveParameter('llmVndGeminiThinkingBudget')}
sx={{ ml: 2 }}
>
<AutoModeIcon sx={{ fontSize: 'xl' }} />
</IconButton>
</Tooltip>
}
/>
)}
{/*{showParam('llmVndGeminiShowThoughts') && (*/}
{/* <FormSwitchControl*/}
{/* title='Show Reasoning'*/}
{/* description='Show chain of thoughts'*/}
{/* checked={!!llmVndGeminiShowThoughts}*/}
{/* onChange={checked => onChangeParameter({ llmVndGeminiShowThoughts: checked })}*/}
{/* />*/}
{/*)}*/}
{showParam('llmVndGeminiImageSize') && (
<FormSelectControl
title='Image Size'
@@ -467,44 +591,6 @@ export function LLMParametersEditor(props: {
/>
)}
{showParam('llmVndGeminiThinkingBudget') && (
<FormSliderControl
title='Thinking Budget' ariaLabel='Gemini Thinking Token Budget'
description={gemThinkingAuto ? 'Auto' : gemThinkingOff ? 'Thinking Off' : 'Tokens'}
min={gemTBMinMax[0]} max={gemTBMinMax[1]} step={1024}
valueLabelDisplay={(gemThinkingAuto || gemThinkingOff) ? 'off' : 'on'}
value={llmVndGeminiThinkingBudget ?? [gemTBMinMax[0], gemTBMinMax[1]]}
variant={gemThinkingAuto ? 'soft' : undefined}
// disabled={gemThinkingAuto}
onChange={value => onChangeParameter({ llmVndGeminiThinkingBudget: Array.isArray(value) ? (value[0] || value[1]) : value })}
startAdornment={gemTBMinMax[0] === 0 && (
<Tooltip arrow disableInteractive title={gemThinkingOff ? 'Thinking Off' : 'Disable Thinking'}>
<IconButton
variant={gemThinkingOff ? 'solid' : 'outlined'}
// disabled={gemThinkingOff}
onClick={() => onChangeParameter({ llmVndGeminiThinkingBudget: 0 })}
sx={{ mr: 2 }}
>
{gemThinkingOff ? <ClearIcon sx={{ fontSize: 'lg' }} /> : <PowerSettingsNewIcon />}
</IconButton>
</Tooltip>
)}
endAdornment={
<Tooltip arrow disableInteractive title={gemThinkingAuto ? 'Automatic Thinking (default)' : 'Auto Budget'}>
<IconButton
variant={gemThinkingAuto ? 'solid' : 'outlined'}
// disabled={gemThinkingAuto}
onClick={() => onRemoveParameter('llmVndGeminiThinkingBudget')}
sx={{ ml: 2 }}
>
<AutoModeIcon sx={{ fontSize: 'xl' }} />
</IconButton>
</Tooltip>
}
/>
)}
{showParam('llmVndGeminiCodeExecution') && (
<FormSelectControl
title='Code Execution'
@@ -582,37 +668,6 @@ export function LLMParametersEditor(props: {
/>
)}
{showParam('llmVndPerplexitySearchMode') && (
<FormSelectControl
title='Search Mode'
tooltip='Type of sources to prioritize in search results'
value={llmVndPerplexitySearchMode ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value)
onRemoveParameter('llmVndPerplexitySearchMode');
else
onChangeParameter({ llmVndPerplexitySearchMode: value });
}}
options={_perplexitySearchModeOptions}
/>
)}
{showParam('llmVndPerplexityDateFilter') && (
<FormSelectControl
title='Date Range'
tooltip='Filter search results by publication date'
value={llmVndPerplexityDateFilter ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value)
onRemoveParameter('llmVndPerplexityDateFilter');
else
onChangeParameter({ llmVndPerplexityDateFilter: value });
}}
options={_perplexityDateFilterOptions}
/>
)}
{showParam('llmVndOaiVerbosity') && (
<FormSelectControl
title='Verbosity'
@@ -673,7 +728,6 @@ export function LLMParametersEditor(props: {
/>
)}
{showParam('llmForceNoStream') && (
<FormSwitchControl
title='Disable Streaming'
@@ -690,6 +744,37 @@ export function LLMParametersEditor(props: {
)}
{showParam('llmVndPerplexitySearchMode') && (
<FormSelectControl
title='Search Mode'
tooltip='Type of sources to prioritize in search results'
value={llmVndPerplexitySearchMode ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value)
onRemoveParameter('llmVndPerplexitySearchMode');
else
onChangeParameter({ llmVndPerplexitySearchMode: value });
}}
options={_perplexitySearchModeOptions}
/>
)}
{showParam('llmVndPerplexityDateFilter') && (
<FormSelectControl
title='Date Range'
tooltip='Filter search results by publication date'
value={llmVndPerplexityDateFilter ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value)
onRemoveParameter('llmVndPerplexityDateFilter');
else
onChangeParameter({ llmVndPerplexityDateFilter: value });
}}
options={_perplexityDateFilterOptions}
/>
)}
{showParam('llmVndOrtWebSearch') && (
<FormSelectControl
title='Web Search'
@@ -18,7 +18,7 @@ const IF_4_R = [...IF_4, LLM_IF_OAI_Reasoning];
// Anthropic Parameters Semantics:
// - llmEffort unified effort: each model declares its subset via enumValues
// - llmVndAntEffort Anthropic effort: each model declares its subset via enumValues
// - llmVndAnt1MContext only available on select models
// - llmVndAntSkills 2026-02-06: seems GA to any model now: a parameter spec for user/UI configurability
// - llmVndAntThinkingBudget 2026-02-06: deprecated since 4.6 in favor of adaptive thinking, was used for manual control of thinking up to 4.5, we pre-default it to 16384 and the user can set it to another value or null to turn thinking off
@@ -43,7 +43,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* FORCE adaptive */ },
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'max'] },
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high', 'max'] },
{ paramId: 'llmVndAnt1MContext' },
{ paramId: 'llmVndAntInfSpeed' },
...ANT_TOOLS,
@@ -58,7 +58,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* FORCE adaptive */ },
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndAnt1MContext' },
...ANT_TOOLS,
],
@@ -73,7 +73,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [
{ paramId: 'llmVndAntThinkingBudget' },
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
...ANT_TOOLS,
],
benchmark: { cbaElo: 1468 }, // claude-opus-4-5-20251101-thinking-32k
@@ -181,7 +181,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 128000,
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'max'] },
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high', 'max'] },
{ paramId: 'llmVndAnt1MContext' },
{ paramId: 'llmVndAntInfSpeed' },
...ANT_TOOLS,
@@ -208,7 +208,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 64000,
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndAnt1MContext' },
...ANT_TOOLS,
],
@@ -236,7 +236,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
maxCompletionTokens: 64000,
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
...ANT_TOOLS,
],
chatPrice: { input: 5, output: 25, cache: { cType: 'ant-bp', read: 0.50, write: 6.25, duration: 300 } },
@@ -445,7 +445,7 @@ const _ORT_ANT_IF_ALLOWLIST: ReadonlySet<string> = new Set([
] as const);
// NOTE: llmVndAntInfSpeed intentionally NOT included - fast mode not available through OpenRouter
const _ORT_ANT_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
'llmEffort', // unified effort
'llmVndAntEffort', // Anthropic effort
'llmVndAntThinkingBudget',
] as const satisfies DModelParameterId[]);
@@ -167,7 +167,7 @@ const _knownGeminiModels: ({
chatPrice: gemini30ProPricing,
interfaces: IF_30,
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'high']},
{ paramId: 'llmVndGemEffort', enumValues: ['low', 'high']},
{ paramId: 'llmVndGeminiMediaResolution' },
{ paramId: 'llmVndGeminiCodeExecution' },
{ paramId: 'llmVndGeminiGoogleSearch' },
@@ -216,7 +216,7 @@ const _knownGeminiModels: ({
chatPrice: gemini30FlashPricing,
interfaces: IF_30,
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high']},
{ paramId: 'llmVndGemEffort', enumValues: ['minimal', 'low', 'medium', 'high']},
{ paramId: 'llmVndGeminiMediaResolution' },
{ paramId: 'llmVndGeminiCodeExecution' },
{ paramId: 'llmVndGeminiGoogleSearch' },
@@ -860,7 +860,7 @@ const _ORT_GEM_IF_ALLOWLIST: ReadonlySet<string> = new Set([
] as const);
const _ORT_GEM_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
'llmVndGeminiThinkingBudget', 'llmEffort', // OR supports Gemini thinking (unified effort)
'llmVndGeminiThinkingBudget', 'llmVndGemEffort', // OR supports Gemini thinking
'llmVndGeminiAspectRatio', 'llmVndGeminiImageSize', // OR supports Gemini image generation
] as const satisfies DModelParameterId[]);
+5 -1
View File
@@ -76,7 +76,11 @@ const ModelParameterSpec_schema = z.object({
paramId: z.enum([
'llmTopP',
'llmForceNoStream',
'llmEffort', // unified effort
// Vendor-specific effort params (converge to unified `effort` wire field)
'llmVndAntEffort',
'llmVndGemEffort',
'llmVndOaiEffort',
'llmVndMiscEffort',
// Anthropic
'llmVndAnt1MContext',
'llmVndAntInfSpeed',
@@ -20,7 +20,7 @@ const IF_K2_5 = [
];
const _PS_Reasoning: ModelDescriptionSchema['parameterSpecs'] = [
{ paramId: 'llmEffort', enumValues: ['none', 'high'] },
{ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] },
] as const;
@@ -22,7 +22,7 @@ export const hardcodedOpenAIVariants: ModelVariantMap = {
description: 'Supports temperature control for creative applications. GPT-5.2 with reasoning disabled (reasoning_effort=none).',
interfaces: [LLM_IF_OAI_Responses, LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], // NO LLM_IF_OAI_Reasoning, NO LLM_IF_HOTFIX_NoTemperature
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'none', hidden: true }, // factory 'none', not changeable
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'none', hidden: true }, // factory 'none', not changeable
{ paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmVndOaiVerbosity' },
{ paramId: 'llmVndOaiImageGeneration' },
@@ -40,7 +40,7 @@ export const hardcodedOpenAIVariants: ModelVariantMap = {
// // customize this param
// { paramId: 'llmVndOaiWebSearchContext', initialValue: 'medium', hidden: true }, // Search enabled by default
// // copy other params
// { paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] },
// { paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] },
// { paramId: 'llmVndOaiRestoreMarkdown' },
// { paramId: 'llmVndOaiVerbosity' },
// { paramId: 'llmVndOaiImageGeneration' },
@@ -84,7 +84,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'medium' /* our decision: set to medium to have thinking - clones can set to 'none' to have temperature */ },
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'medium' /* our decision: set to medium to have thinking - clones can set to 'none' to have temperature */ },
{ paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmVndOaiVerbosity' },
{ paramId: 'llmVndOaiImageGeneration' },
@@ -110,7 +110,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmForceNoStream' },
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] },
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] },
{ paramId: 'llmVndOaiWebSearchContext' },
],
chatPrice: { input: 1.75, cache: { cType: 'oai-ac', read: 0.175 }, output: 14 },
@@ -144,7 +144,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 272000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['medium', 'high', 'xhigh'] },
{ paramId: 'llmVndOaiEffort', enumValues: ['medium', 'high', 'xhigh'] },
{ paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmVndOaiVerbosity' },
{ paramId: 'llmVndOaiImageGeneration' },
@@ -171,7 +171,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmVndOaiVerbosity' },
{ paramId: 'llmVndOaiImageGeneration' },
{ paramId: 'llmVndOaiCodeInterpreter' },
@@ -214,7 +214,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] },
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] },
{ paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmForceNoStream' },
],
@@ -230,7 +230,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal)
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal)
{ paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmForceNoStream' },
],
@@ -246,7 +246,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal)
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal)
{ paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmForceNoStream' },
],
@@ -266,7 +266,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmVndOaiVerbosity' }, // gpt-5-class nets have verbosity control
{ paramId: 'llmVndOaiImageGeneration' }, // image generation capability
{ paramId: 'llmVndOaiCodeInterpreter' }, // code execution in sandboxed container
@@ -321,7 +321,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // works
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // works
{ paramId: 'llmVndOaiWebSearchContext' }, // works, although is not triggered often
// { paramId: 'llmVndOaiRestoreMarkdown', initialValue: false }, // since this is for code, let the prompt dictate markdown usage rather than us injecting
{ paramId: 'llmForceNoStream' },
@@ -356,7 +356,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 400000,
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
chatPrice: { input: 0.25, cache: { cType: 'oai-ac', read: 0.025 }, output: 2 },
benchmark: { cbaElo: 1390 }, // gpt-5-mini-high
},
@@ -374,7 +374,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 400000,
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }],
chatPrice: { input: 0.05, cache: { cType: 'oai-ac', read: 0.005 }, output: 0.4 },
benchmark: { cbaElo: 1338 }, // gpt-5-nano-high
},
@@ -394,7 +394,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 128000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'] },
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'] },
{ paramId: 'llmVndOaiWebSearchContext' },
{ paramId: 'llmForceNoStream' },
],
@@ -412,7 +412,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
maxCompletionTokens: 32768,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmForceNoStream' },
],
// chatPrice: TBD - unknown pricing
@@ -447,7 +447,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 200000,
maxCompletionTokens: 100000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }],
chatPrice: { input: 1.5, cache: { cType: 'oai-ac', read: 0.375 }, output: 6 },
isLegacy: true, // Deprecated January 16, 2026.
},
@@ -480,7 +480,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 200000,
maxCompletionTokens: 100000,
interfaces: IFS_CHAT_CACHE_REASON,
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.275 }, output: 4.4 },
benchmark: { cbaElo: 1391 }, // o4-mini-2025-04-16
},
@@ -515,7 +515,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 200000,
maxCompletionTokens: 100000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
chatPrice: { input: 20, output: 80 },
// benchmark: has not been measured yet
},
@@ -533,7 +533,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 200000,
maxCompletionTokens: 100000,
interfaces: IFS_CHAT_CACHE_REASON,
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmForceNoStream' }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmForceNoStream' }],
chatPrice: { input: 2, cache: { cType: 'oai-ac', read: 0.5 }, output: 8 },
benchmark: { cbaElo: 1433 }, // o3-2025-04-16
},
@@ -551,7 +551,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 200000,
maxCompletionTokens: 100000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_StripImages],
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.55 }, output: 4.4 },
benchmark: { cbaElo: 1348 }, // o3-mini
},
@@ -570,7 +570,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 200000,
maxCompletionTokens: 100000,
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature],
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmForceNoStream' }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmForceNoStream' }],
chatPrice: { input: 150, output: 600 },
// benchmark: has not been measured yet by third parties
},
@@ -588,7 +588,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 200000,
maxCompletionTokens: 100000,
interfaces: IFS_CHAT_CACHE_REASON,
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiRestoreMarkdown' }],
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiRestoreMarkdown' }],
chatPrice: { input: 15, cache: { cType: 'oai-ac', read: 7.5 }, output: 60 },
benchmark: { cbaElo: 1402 }, // o1-2024-12-17
},
@@ -1232,7 +1232,7 @@ const _ORT_OAI_IF_ALLOWLIST: ReadonlySet<string> = new Set([
LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning,
] as const);
const _ORT_OAI_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
'llmEffort', // unified reasoning effort
'llmVndOaiEffort', // OpenAI reasoning effort
'llmVndOaiVerbosity', // verbosity
// 'llmVndOaiImageGeneration', // OR does NOT support image gen with OAI yet (2026-02-06)
] as const satisfies DModelParameterId[]);
@@ -191,8 +191,8 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndAntThinkingBudget')) {
DEV_DEBUG_OPENROUTER_MODELS && console.log(`[DEV] openRouterModelToModelDescription: unexpected ${antLookup ? 'KNOWN' : 'unknown'} Anthropic reasoning model:`, model.id);
parameterSpecs.push({ paramId: 'llmVndAntThinkingBudget' }); // configurable thinking budget
if (!parameterSpecs.some(p => p.paramId === 'llmEffort'))
parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'max'] }); // use the latest known Anthropic effort levels superset
if (!parameterSpecs.some(p => p.paramId === 'llmVndAntEffort'))
parameterSpecs.push({ paramId: 'llmVndAntEffort' }); // use the latest known Anthropic effort levels superset
}
break;
@@ -204,9 +204,9 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
console.log('[DEV] openRouterModelToModelDescription: unknown Gemini model:', model.id);
// 0-day: reasoning models get default thinking budget if not inherited
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndGeminiThinkingBudget' || p.paramId === 'llmEffort')) {
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndGeminiThinkingBudget' || p.paramId === 'llmVndGemEffort')) {
// DEV_DEBUG_OPENROUTER_MODELS && console.log(`[DEV] openRouterModelToModelDescription: tagging ${gemLookup ? 'KNOWN' : 'unknown'} Gemini reasoning model:`, model.id);
parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }); // use the latest known Gemini effort (thinking) levels superset
parameterSpecs.push({ paramId: 'llmVndGemEffort' }); // use the latest known Gemini effort (thinking) levels superset
// parameterSpecs.push({ paramId: 'llmVndGeminiThinkingBudget' }); // fallback with default range
}
@@ -229,17 +229,17 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
console.log('[DEV] openRouterModelToModelDescription: unknown OpenAI model:', model.id);
// 0-day: reasoning models get default 3-level effort if not inherited
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmEffort')) {
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndOaiEffort')) {
// console.log('[DEV] openRouterModelToModelDescription: unexpected OpenAI reasoning model:', model.id);
parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'] }); // latest known OpenAI effort levels superset
parameterSpecs.push({ paramId: 'llmVndOaiEffort' }); // latest known OpenAI effort levels superset
}
break;
case model.id.startsWith('x-ai/') || model.id.startsWith('moonshotai/') || model.id.startsWith('z-ai/') || model.id.startsWith('deepseek/'):
// 0-day: xAI/Grok models get default reasoning effort if not inherited
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmEffort')) {
// 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) {
// console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id);
parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }); // latest known xAI effort levels superset
parameterSpecs.push({ paramId: 'llmVndMiscEffort' }); // binary thinking for these vendors
}
break;
@@ -1,7 +1,7 @@
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { createVariantInjector, ModelVariantMap } from '../../llm.server.variants';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types';
import { LLM_IF_HOTFIX_NoStream, LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types';
// configuration
@@ -16,13 +16,16 @@ const _hardcodedPerplexityVariants: ModelVariantMap = !PERPLEXITY_ENABLE_VARIANT
idVariant: 'academic',
label: 'Sonar Deep Research (Academic)',
description: 'Expert-level research model with academic sources only. Searches scholarly databases, peer-reviewed papers, and academic publications. 128k context.',
interfaces: [
LLM_IF_HOTFIX_NoStream, // seems to be required for medium/academic
LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning,
],
parameterSpecs: [
// Fixed parameters for academic search
{ paramId: 'llmVndOaiWebSearchContext', initialValue: 'medium', hidden: true },
{ paramId: 'llmVndPerplexitySearchMode', initialValue: 'academic', hidden: true },
{ paramId: 'llmForceNoStream', initialValue: true, hidden: true },
// Free parameters
// { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'], initialValue: 'medium' },
// { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'], initialValue: 'medium' },
{ paramId: 'llmVndPerplexityDateFilter' },
],
},
@@ -40,7 +43,7 @@ const _knownPerplexityChatModels: ModelDescriptionSchema[] = [
contextWindow: 128000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning],
parameterSpecs: [
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] },
{ paramId: 'llmVndOaiWebSearchContext', initialValue: 'low' }, // REUSE!
{ paramId: 'llmVndPerplexitySearchMode' },
{ paramId: 'llmVndPerplexityDateFilter' },
@@ -152,7 +152,10 @@ const _knownXAIChatModels: ManualMappings = [
contextWindow: 131072,
maxCompletionTokens: undefined,
interfaces: [...XAI_IF_Pre4, LLM_IF_OAI_Reasoning],
parameterSpecs: XAI_PAR_Pre4,
parameterSpecs: [
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] },
...XAI_PAR_Pre4,
],
chatPrice: { input: 0.3, output: 0.5, cache: { cType: 'oai-ac', read: 0.075 } },
benchmark: { cbaElo: 1357 }, // grok-3-mini-beta
},
@@ -17,7 +17,7 @@ const _IF_Vision_Reasoning = [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, L
// - Z.ai thinking maps from effort: 'none' → disabled, anything else → enabled
// - Z.ai only supports binary enabled/disabled, so we expose 'none' and 'high'
const _PS_Reasoning: ModelDescriptionSchema['parameterSpecs'] = [
{ paramId: 'llmEffort', enumValues: ['none', 'high'] },
{ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] },
] as const;
+8 -8
View File
@@ -42,8 +42,8 @@ const SWEEP_DEFINITIONS = [
name: 'oai-reasoning-effort',
description: 'OpenAI reasoning_effort values',
applicability: { type: 'dialects', dialects: ['openai', 'azure', 'openrouter'] },
applyToModel: (value) => ({ effort: value }),
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh' /*, 'max'*/ /* OpenRouter-only? */] satisfies AixAPI_Model['effort'][],
applyToModel: (value) => ({ reasoningEffort: value }),
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh' /*, 'max'*/ /* OpenRouter-only? */] satisfies AixAPI_Model['reasoningEffort'][],
neuteredValues: ['medium'], // medium is the default, so only-medium means no real support
mode: 'enumerate',
}),
@@ -85,8 +85,8 @@ const SWEEP_DEFINITIONS = [
name: 'ant-effort',
description: 'Anthropic output_config.effort values',
applicability: { type: 'dialects', dialects: ['anthropic'] },
applyToModel: (value) => ({ effort: value }),
values: ['low', 'medium', 'high', 'max'] satisfies AixAPI_Model['effort'][],
applyToModel: (value) => ({ reasoningEffort: value }),
values: ['low', 'medium', 'high', 'max'] satisfies AixAPI_Model['reasoningEffort'][],
mode: 'enumerate',
}),
@@ -112,8 +112,8 @@ const SWEEP_DEFINITIONS = [
name: 'gemini-thinking-level',
description: 'Gemini thinkingConfig.thinkingLevel values',
applicability: { type: 'dialects', dialects: ['gemini'] },
applyToModel: (value) => value ? { effort: value } : {}, // null = dynamic mode, don't set level
values: ['minimal', 'low', 'medium', 'high'] satisfies (AixAPI_Model['effort'] | null)[],
applyToModel: (value) => value ? { reasoningEffort: value } : {}, // null = dynamic mode, don't set level
values: ['minimal', 'low', 'medium', 'high'] satisfies (AixAPI_Model['reasoningEffort'] | null)[],
mode: 'enumerate',
}),
@@ -138,8 +138,8 @@ const SWEEP_DEFINITIONS = [
name: 'xai-reasoning-effort',
description: 'xAI reasoning.effort values',
applicability: { type: 'dialects', dialects: ['xai'] },
applyToModel: (value) => ({ effort: value }),
values: ['low', 'medium', 'high'] satisfies AixAPI_Model['effort'][],
applyToModel: (value) => ({ reasoningEffort: value }),
values: ['low', 'medium', 'high'] satisfies AixAPI_Model['reasoningEffort'][],
mode: 'enumerate',
}),