mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
LLM Effort: split definition for UI namings with unified backend. #940
This commit is contained in:
@@ -38,17 +38,17 @@ Example parameter mapping. Note that new parameters may have been added to both
|
||||
The objective of the sweep is to hint at model definition values, but the model definitions are what matters for Big-AGI,
|
||||
and need to be carefully updated, otherwise thousands of clients may break.
|
||||
|
||||
| Dialect | Sweep Key | Model paramId |
|
||||
|---------|-----------|---------------|
|
||||
| OpenAI | `oai-reasoning-effort` | `llmEffort` |
|
||||
| OpenAI | `oai-verbosity` | `llmVndOaiVerbosity` |
|
||||
| OpenAI | `oai-image-generation` | `llmVndOaiImageGeneration` |
|
||||
| OpenAI | `oai-web-search` | `llmVndOaiWebSearchContext` |
|
||||
| Anthropic | `ant-effort` | `llmEffort` |
|
||||
| Anthropic | `ant-thinking-budget` | `llmVndAntThinkingBudget` |
|
||||
| Gemini | `gemini-thinking-level` | `llmEffort` |
|
||||
| Gemini | `gemini-thinking-budget` | `llmVndGeminiThinkingBudget` |
|
||||
| xAI | `xai-web-search` | `llmVndXaiWebSearch` |
|
||||
| Dialect | Sweep Key | Model paramId |
|
||||
|-----------|--------------------------|------------------------------|
|
||||
| OpenAI | `oai-reasoning-effort` | `llmVndOaiEffort` |
|
||||
| OpenAI | `oai-verbosity` | `llmVndOaiVerbosity` |
|
||||
| OpenAI | `oai-image-generation` | `llmVndOaiImageGeneration` |
|
||||
| OpenAI | `oai-web-search` | `llmVndOaiWebSearchContext` |
|
||||
| Anthropic | `ant-effort` | `llmVndAntEffort` |
|
||||
| Anthropic | `ant-thinking-budget` | `llmVndAntThinkingBudget` |
|
||||
| Gemini | `gemini-thinking-level` | `llmVndGemEffort` |
|
||||
| Gemini | `gemini-thinking-budget` | `llmVndGeminiThinkingBudget` |
|
||||
| xAI | `xai-web-search` | `llmVndXaiWebSearch` |
|
||||
|
||||
## Output
|
||||
|
||||
|
||||
@@ -83,8 +83,7 @@ function _enumDef<const V extends string>(def: _EnumParamDef<V>): _EnumParamDef<
|
||||
|
||||
export const DModelParameterRegistry = {
|
||||
|
||||
/// Common 'implicit' parameters, available to all models ///
|
||||
// Note: we still use pre-v2 names for compatibility and ease of migration
|
||||
// -- Common 'implicit' parameters, available to all models --
|
||||
|
||||
llmRef: {
|
||||
label: 'Model ID',
|
||||
@@ -114,7 +113,7 @@ export const DModelParameterRegistry = {
|
||||
// due to implicit, when undefined we apply the runtime fallback
|
||||
},
|
||||
|
||||
/// Extended parameters, specific to certain models/vendors
|
||||
// -- Extended parameters, specific to certain models/vendors --
|
||||
|
||||
llmTopP: {
|
||||
label: 'Top P',
|
||||
@@ -124,30 +123,6 @@ export const DModelParameterRegistry = {
|
||||
// when undefined is omitted from the requests (default)
|
||||
},
|
||||
|
||||
/**
|
||||
* Unified 'reasoning' effort parameter for all vendors. The full superset of all possible effort levels.
|
||||
* Each model declares its own subset via `enumValues` in its parameterSpec.
|
||||
*
|
||||
* Mapping to vendor-native values is done in adapters (the only place with vendor knowledge):
|
||||
* - Anthropic: output_config.effort
|
||||
* - OpenAI: reasoning_effort (ChatCompletions) / reasoning.effort (Responses)
|
||||
* - Gemini: thinkingConfig.thinkingLevel (depending on model: low/high, minimal/low/medium/high, ...)
|
||||
* - Moonshot/ZAI: thinking.type (none->disabled, high->enabled)
|
||||
* - Perplexity: reasoning_effort
|
||||
* - etc.
|
||||
*/
|
||||
llmEffort: _enumDef({
|
||||
label: 'Reasoning Effort',
|
||||
type: 'enum',
|
||||
description: 'Controls reasoning depth and effort level.',
|
||||
values: [
|
||||
// all values (max includes) sorted in ascending order of effort
|
||||
'none', 'minimal', 'low', 'medium', 'high', 'xhigh', // OpenAI/common
|
||||
'max', // Anthropic only, for now
|
||||
],
|
||||
// undefined means vendor default (usually high or medium, could be different such as none)
|
||||
}),
|
||||
|
||||
/**
|
||||
* First introduced as a user-configurable parameter for the 'Verification' required by o3.
|
||||
* [2025-04-16] Adding parameter to disable streaming for o3, and possibly more models.
|
||||
@@ -165,6 +140,46 @@ export const DModelParameterRegistry = {
|
||||
},
|
||||
|
||||
|
||||
// -- 'Effort' unified semantic specialization --
|
||||
|
||||
/**
|
||||
* Vendor-specific effort parameters. Each vendor has its own effort param with vendor-contextual
|
||||
* labels and descriptions. Models declare their subset via `enumValues` in parameterSpec.
|
||||
* All converge to the unified `effort` wire field in aix.client.ts.
|
||||
*/
|
||||
llmVndAntEffort: _enumDef({
|
||||
label: 'Effort',
|
||||
type: 'enum',
|
||||
description: 'Controls reasoning depth. Works alongside thinking budget.',
|
||||
values: ['low', 'medium', 'high', 'max'],
|
||||
// undefined means high effort (default)
|
||||
}),
|
||||
|
||||
llmVndGemEffort: _enumDef({
|
||||
label: 'Thinking Level',
|
||||
type: 'enum',
|
||||
description: 'Controls internal reasoning depth. When unset, the model decides dynamically.',
|
||||
values: ['minimal', 'low', 'medium', 'high'],
|
||||
// undefined means dynamic (model decides)
|
||||
}),
|
||||
|
||||
llmVndOaiEffort: _enumDef({
|
||||
label: 'Reasoning Effort',
|
||||
type: 'enum',
|
||||
description: 'Controls how much effort the model spends on reasoning.',
|
||||
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'],
|
||||
// undefined means vendor default
|
||||
}),
|
||||
|
||||
llmVndMiscEffort: _enumDef({
|
||||
label: 'Thinking',
|
||||
type: 'enum',
|
||||
description: 'Enable or disable extended thinking mode.',
|
||||
values: ['none', 'high'],
|
||||
// undefined means vendor default (usually 'high', i.e. thinking enabled)
|
||||
}),
|
||||
|
||||
|
||||
// Anthropic-specific
|
||||
|
||||
llmVndAnt1MContext: {
|
||||
@@ -192,7 +207,8 @@ export const DModelParameterRegistry = {
|
||||
},
|
||||
|
||||
/**
|
||||
* NOTE: this is being phased out with Opus 4.6 in favor of llmEffort ('low', 'medium', 'high', 'max')
|
||||
* NOTE: this is being phased out with Opus 4.6 in favor of llmVndAntEffort, while this is implicitly
|
||||
* adaptive if missing (as-if we had our custom sentinel value of -1).
|
||||
*
|
||||
* Important: when this is set to anything other than nullish, it enables Adaptive(-1)/Extended(int > 1024) thinking,
|
||||
* and as a side effect **disables the temperature** in the requests (even when tunneled through OpenRouter). So this
|
||||
@@ -504,7 +520,7 @@ interface DModelParameterSpec<T extends DModelParameterId> {
|
||||
/**
|
||||
* (optional) For enum params: restrict which values from the registry are allowed for this model.
|
||||
* The UI will only show these values. Analogous to rangeOverride for numeric params.
|
||||
* Example: llmEffort registry has 7 values, but a specific model may only support ['low', 'medium', 'high'].
|
||||
* Example: llmVndOaiEffort registry has 6 values, but a specific model may only support ['low', 'medium', 'high'].
|
||||
*/
|
||||
enumValues?: readonly string[];
|
||||
}
|
||||
|
||||
@@ -50,14 +50,13 @@ export function aixCreateModelFromLLMOptions(
|
||||
// destructure input with the overrides
|
||||
const {
|
||||
llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream,
|
||||
llmEffort,
|
||||
llmVndAntEffort, llmVndGemEffort, llmVndOaiEffort, llmVndMiscEffort,
|
||||
llmVndAnt1MContext, llmVndAntInfSpeed, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch,
|
||||
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiThinkingBudget,
|
||||
// llmVndMoonshotWebSearch,
|
||||
llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration, llmVndOaiCodeInterpreter,
|
||||
llmVndOrtWebSearch,
|
||||
llmVndPerplexityDateFilter, llmVndPerplexitySearchMode,
|
||||
// xAI
|
||||
llmVndXaiCodeExecution, llmVndXaiSearchInterval, llmVndXaiWebSearch, llmVndXaiXSearch, llmVndXaiXSearchHandles,
|
||||
} = {
|
||||
...llmOptions,
|
||||
@@ -102,11 +101,15 @@ export function aixCreateModelFromLLMOptions(
|
||||
return stripUndefined({
|
||||
id: llmRef,
|
||||
acceptsOutputs: acceptsOutputs,
|
||||
...(hotfixOmitTemperature ? { temperature: null } : llmTemperature !== undefined ? { temperature: llmTemperature } : {}),
|
||||
...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
|
||||
...(llmTopP !== undefined ? { topP: llmTopP } : {}),
|
||||
...(llmEffort ? { effort: llmEffort } : {}),
|
||||
...(llmForceNoStream ? { forceNoStream: true } : {}),
|
||||
temperature: (hotfixOmitTemperature || llmTemperature === null) ? null : llmTemperature, // strippable
|
||||
maxTokens: llmResponseTokens ?? undefined, // strippable - null: like undefined -> strip -> omit the value
|
||||
topP: llmTopP, // strippable (likely)
|
||||
forceNoStream: llmForceNoStream ? true : undefined, // strippable
|
||||
userGeolocation: userGeolocation, // strippable (likely)
|
||||
|
||||
// Cross-provider unified options
|
||||
reasoningEffort: llmVndAntEffort ?? llmVndGemEffort ?? llmVndOaiEffort ?? llmVndMiscEffort, // strippable
|
||||
|
||||
// Anthropic
|
||||
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget === -1 ? 'adaptive' as const : llmVndAntThinkingBudget } : {}),
|
||||
...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}),
|
||||
@@ -114,6 +117,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}),
|
||||
...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
|
||||
...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}),
|
||||
|
||||
// Gemini
|
||||
...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}),
|
||||
...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}),
|
||||
@@ -126,8 +130,10 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(llmVndGeminiMediaResolution ? { vndGeminiMediaResolution: llmVndGeminiMediaResolution } : {}),
|
||||
...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}),
|
||||
// ...(llmVndGeminiUrlContext === 'auto' ? { vndGeminiUrlContext: llmVndGeminiUrlContext } : {}),
|
||||
|
||||
// Moonshot
|
||||
// ...(llmVndMoonshotWebSearch === 'auto' ? { vndMoonshotWebSearch: 'auto' } : {}),
|
||||
|
||||
// OpenAI
|
||||
...(llmVndOaiResponsesAPI ? { vndOaiResponsesAPI: true } : {}),
|
||||
...(llmVndOaiRestoreMarkdown ? { vndOaiRestoreMarkdown: llmVndOaiRestoreMarkdown } : {}),
|
||||
@@ -135,12 +141,14 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(llmVndOaiWebSearchContext ? { vndOaiWebSearchContext: llmVndOaiWebSearchContext } : {}),
|
||||
...(llmVndOaiImageGeneration ? { vndOaiImageGeneration: (llmVndOaiImageGeneration as any /* backward comp */) === true ? 'mq' : llmVndOaiImageGeneration } : {}),
|
||||
...(llmVndOaiCodeInterpreter === 'auto' ? { vndOaiCodeInterpreter: llmVndOaiCodeInterpreter } : {}),
|
||||
|
||||
// OpenRouter
|
||||
...(llmVndOrtWebSearch === 'auto' ? { vndOrtWebSearch: 'auto' } : {}),
|
||||
|
||||
// Perplexity
|
||||
...(llmVndPerplexityDateFilter ? { vndPerplexityDateFilter: llmVndPerplexityDateFilter } : {}),
|
||||
...(llmVndPerplexitySearchMode ? { vndPerplexitySearchMode: llmVndPerplexitySearchMode } : {}),
|
||||
...(userGeolocation ? { userGeolocation } : {}),
|
||||
|
||||
// xAI
|
||||
...(llmVndXaiCodeExecution === 'auto' ? { vndXaiCodeExecution: llmVndXaiCodeExecution } : {}),
|
||||
...(llmVndXaiSearchInterval ? { vndXaiSearchInterval: llmVndXaiSearchInterval } : {}),
|
||||
|
||||
@@ -431,8 +431,6 @@ export namespace AixWire_API {
|
||||
topP: z.number().min(0).max(1).optional(),
|
||||
forceNoStream: z.boolean().optional(),
|
||||
|
||||
// Cross-vendor Structured Outputs
|
||||
|
||||
/**
|
||||
* Constrain model response to a JSON schema for data extraction. Response will be valid JSON. Schema limitations vary by vendor.
|
||||
* Supported: Anthropic (output_format), OpenAI (response_format), Gemini (responseSchema)
|
||||
@@ -449,53 +447,6 @@ export namespace AixWire_API {
|
||||
*/
|
||||
strictToolInvocations: z.boolean().optional(),
|
||||
|
||||
// Unified effort parameter (replaces vendor-specific effort params)
|
||||
effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(),
|
||||
|
||||
// NOTE: kept for backward compatibility during the migration; and they flow into effort - REMOVE for 2.0.5
|
||||
vndAntEffort: z.enum(['low', 'medium', 'high', 'max']).optional(),
|
||||
vndGeminiThinkingLevel: z.enum(['high', 'medium', 'low', 'minimal']).optional(), // new param
|
||||
vndOaiReasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(),
|
||||
vndOaiReasoningSummary: z.enum(['none', 'detailed']).optional(),
|
||||
vndGeminiShowThoughts: z.boolean().optional(),
|
||||
|
||||
// Anthropic
|
||||
vndAnt1MContext: z.boolean().optional(),
|
||||
vndAntInfSpeed: z.enum(['fast']).optional(),
|
||||
vndAntSkills: z.string().optional(),
|
||||
vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
|
||||
vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
|
||||
vndAntWebFetch: z.enum(['auto']).optional(),
|
||||
vndAntWebSearch: z.enum(['auto']).optional(),
|
||||
// Gemini
|
||||
vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(),
|
||||
vndGeminiCodeExecution: z.enum(['auto']).optional(),
|
||||
vndGeminiComputerUse: z.enum(['browser']).optional(),
|
||||
vndGeminiGoogleSearch: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(),
|
||||
vndGeminiImageSize: z.enum(['1K', '2K', '4K']).optional(),
|
||||
vndGeminiMediaResolution: z.enum(['mr_high', 'mr_medium', 'mr_low']).optional(),
|
||||
vndGeminiThinkingBudget: z.number().optional(), // -1 for 'adaptive'
|
||||
vndGeminiUrlContext: z.enum(['auto']).optional(),
|
||||
// Moonshot
|
||||
vndMoonshotWebSearch: z.enum(['auto']).optional(),
|
||||
// OpenAI
|
||||
vndOaiCodeInterpreter: z.enum(['off', 'auto']).optional(),
|
||||
vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(),
|
||||
vndOaiResponsesAPI: z.boolean().optional(),
|
||||
vndOaiRestoreMarkdown: z.boolean().optional(),
|
||||
vndOaiVerbosity: z.enum(['low', 'medium', 'high']).optional(),
|
||||
vndOaiWebSearchContext: z.enum(['low', 'medium', 'high']).optional(),
|
||||
// OpenRouter
|
||||
vndOrtWebSearch: z.enum(['auto']).optional(),
|
||||
// Perplexity
|
||||
vndPerplexityDateFilter: z.enum(['unfiltered', '1m', '3m', '6m', '1y']).optional(),
|
||||
vndPerplexitySearchMode: z.enum(['default', 'academic']).optional(),
|
||||
// xAI
|
||||
vndXaiCodeExecution: z.enum(['off', 'auto']).optional(),
|
||||
vndXaiSearchInterval: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(),
|
||||
vndXaiWebSearch: z.enum(['off', 'auto']).optional(),
|
||||
vndXaiXSearch: z.enum(['off', 'auto']).optional(),
|
||||
vndXaiXSearchHandles: z.string().optional(),
|
||||
/**
|
||||
* [OpenAI, 2025-03-11] This is the generic version of the `web_search_options.user_location` field
|
||||
* This AIX field mimics on purpose: https://platform.openai.com/docs/api-reference/chat/create
|
||||
@@ -506,6 +457,66 @@ export namespace AixWire_API {
|
||||
country: z.string().optional(), // two-letter ISO country code of the user, e.g. US
|
||||
timezone: z.string().optional(), // IANA timezone of the user, e.g. America/Los_Angeles
|
||||
}).optional(),
|
||||
|
||||
|
||||
// Cross-provider unified (but with semantic specialization) options
|
||||
|
||||
/**
|
||||
* Union of all the possible reasoning effort values. Different dispatches will validate the
|
||||
* domain (subset) of values they support, but the client can send any of them and let the server handle it.
|
||||
*/
|
||||
reasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(),
|
||||
// REMOVE for 2.0.5: we used to have the parameters below - here for doc purposes only - parsing doesn't break if they are set (backward comp)
|
||||
// vndAntEffort: z.enum(['low', 'medium', 'high', 'max']).optional(),
|
||||
// vndGeminiThinkingLevel: z.enum(['high', 'medium', 'low', 'minimal']).optional(), // new param
|
||||
// vndOaiReasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(),
|
||||
// vndOaiReasoningSummary: z.enum(['none', 'detailed']).optional(),
|
||||
// vndGeminiShowThoughts: z.boolean().optional(),
|
||||
|
||||
// Anthropic
|
||||
vndAnt1MContext: z.boolean().optional(),
|
||||
vndAntInfSpeed: z.enum(['fast']).optional(),
|
||||
vndAntSkills: z.string().optional(),
|
||||
vndAntThinkingBudget: z.number().or(z.literal('adaptive')).nullable().optional(),
|
||||
vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
|
||||
vndAntWebFetch: z.enum(['auto']).optional(),
|
||||
vndAntWebSearch: z.enum(['auto']).optional(),
|
||||
|
||||
// Gemini
|
||||
vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(),
|
||||
vndGeminiCodeExecution: z.enum(['auto']).optional(),
|
||||
vndGeminiComputerUse: z.enum(['browser']).optional(),
|
||||
vndGeminiGoogleSearch: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(),
|
||||
vndGeminiImageSize: z.enum(['1K', '2K', '4K']).optional(),
|
||||
vndGeminiMediaResolution: z.enum(['mr_high', 'mr_medium', 'mr_low']).optional(),
|
||||
vndGeminiThinkingBudget: z.number().optional(), // -1 for 'adaptive'
|
||||
vndGeminiUrlContext: z.enum(['auto']).optional(),
|
||||
|
||||
// Moonshot
|
||||
vndMoonshotWebSearch: z.enum(['auto']).optional(),
|
||||
|
||||
// OpenAI
|
||||
vndOaiCodeInterpreter: z.enum(['off', 'auto']).optional(),
|
||||
vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(),
|
||||
vndOaiResponsesAPI: z.boolean().optional(),
|
||||
vndOaiRestoreMarkdown: z.boolean().optional(),
|
||||
vndOaiVerbosity: z.enum(['low', 'medium', 'high']).optional(),
|
||||
vndOaiWebSearchContext: z.enum(['low', 'medium', 'high']).optional(),
|
||||
|
||||
// OpenRouter
|
||||
vndOrtWebSearch: z.enum(['auto']).optional(),
|
||||
|
||||
// Perplexity
|
||||
vndPerplexityDateFilter: z.enum(['unfiltered', '1m', '3m', '6m', '1y']).optional(),
|
||||
vndPerplexitySearchMode: z.enum(['default', 'academic']).optional(),
|
||||
|
||||
// xAI
|
||||
vndXaiCodeExecution: z.enum(['off', 'auto']).optional(),
|
||||
vndXaiSearchInterval: z.enum(['unfiltered', '1d', '1w', '1m', '6m', '1y']).optional(),
|
||||
vndXaiWebSearch: z.enum(['off', 'auto']).optional(),
|
||||
vndXaiXSearch: z.enum(['off', 'auto']).optional(),
|
||||
vndXaiXSearchHandles: z.string().optional(),
|
||||
|
||||
});
|
||||
|
||||
/// Resume Handle
|
||||
|
||||
@@ -171,7 +171,7 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
|
||||
}
|
||||
|
||||
// [Anthropic] Effort parameter [Anthropic, effort-2025-11-24]
|
||||
const reasoningEffort = model.effort ?? model.vndAntEffort;
|
||||
const reasoningEffort = model.reasoningEffort; // ?? model.vndAntEffort;
|
||||
if (reasoningEffort) {
|
||||
if (reasoningEffort === 'none' || reasoningEffort === 'minimal' || reasoningEffort === 'xhigh') throw new Error(`Anthropic API does not support '${reasoningEffort}' effort level`);
|
||||
payload.output_config = {
|
||||
|
||||
@@ -96,15 +96,15 @@ export function aixToGeminiGenerateContent(model: AixAPI_Model, _chatGenerate: A
|
||||
}
|
||||
|
||||
// Thinking models: thinking budget and show thoughts
|
||||
const thinkingLevel = model.effort ?? model.vndGeminiThinkingLevel;
|
||||
const thinkingLevel = model.reasoningEffort; // ?? model.vndGeminiThinkingLevel;
|
||||
if (thinkingLevel === 'none' || thinkingLevel === 'xhigh' || thinkingLevel === 'max') // domain validation
|
||||
throw new Error(`Gemini API does not support '${thinkingLevel}' thinking level`);
|
||||
|
||||
if (thinkingLevel || model.vndGeminiThinkingBudget !== undefined || model.vndGeminiShowThoughts === true) {
|
||||
if (thinkingLevel || model.vndGeminiThinkingBudget !== undefined /*|| model.vndGeminiShowThoughts === true*/) {
|
||||
const thinkingConfig: Exclude<TRequest['generationConfig'], undefined>['thinkingConfig'] = {};
|
||||
|
||||
// This shows mainly 'summaries' of thoughts, and we enable it for most cases where thinking is requested
|
||||
if (thinkingLevel || (model.vndGeminiThinkingBudget ?? 0) > 1 || model.vndGeminiShowThoughts === true)
|
||||
if (thinkingLevel || (model.vndGeminiThinkingBudget ?? 0) > 1 /*|| model.vndGeminiShowThoughts === true*/)
|
||||
thinkingConfig.includeThoughts = true;
|
||||
|
||||
// [Gemini 3, 2025-11-18] Thinking Level (replaces thinkingBudget for Gemini 3)
|
||||
|
||||
@@ -139,11 +139,10 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
}
|
||||
|
||||
// [OpenAI] Vendor-specific reasoning effort
|
||||
const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort;
|
||||
const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort;
|
||||
if (reasoningEffort
|
||||
&& openAIDialect !== 'openrouter' // OpenRouter has its own channeling of this
|
||||
&& openAIDialect !== 'moonshot' // MoonShot maps to none->disabled / high->enabled
|
||||
&& openAIDialect !== 'zai' // Z.ai maps like MoonShot
|
||||
&& openAIDialect !== 'deepseek' && openAIDialect !== 'moonshot' && openAIDialect !== 'zai' // MoonShot maps to none->disabled / high->enabled
|
||||
&& openAIDialect !== 'perplexity' // Perplexity has its own block below with stricter validation
|
||||
) {
|
||||
if (reasoningEffort === 'max') // domain validation
|
||||
@@ -153,7 +152,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
|
||||
// [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now)
|
||||
// [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode
|
||||
if (reasoningEffort && (openAIDialect === 'moonshot' || openAIDialect === 'zai')) {
|
||||
if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) {
|
||||
if (reasoningEffort !== 'none' && reasoningEffort !== 'high') // domain validation
|
||||
throw new Error(`${openAIDialect} only supports reasoning effort 'none' or 'high', got '${reasoningEffort}'`);
|
||||
|
||||
@@ -246,7 +245,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
const isTunneledGemini = model.id.startsWith('google/');
|
||||
if (isTunneledAnt) {
|
||||
// Effort -> OpenRouter verbosity -> Anthropic upstream output_config.effort
|
||||
const antEffort = model.effort ?? model.vndAntEffort;
|
||||
const antEffort = model.reasoningEffort; // ?? model.vndAntEffort;
|
||||
if (antEffort) {
|
||||
if (antEffort === 'none' || antEffort === 'minimal' || antEffort === 'xhigh') // domain validation
|
||||
throw new Error(`OpenRouter->Anthropic API does not support '${antEffort}' reasoning effort`);
|
||||
@@ -261,10 +260,10 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
if (model.vndAntThinkingBudget === 'adaptive') {
|
||||
payload.reasoning = { enabled: true };
|
||||
delete payload.temperature;
|
||||
} else if (model.vndAntThinkingBudget) {
|
||||
} else if (typeof model.vndAntThinkingBudget === 'number') {
|
||||
payload.reasoning = { enabled: true, max_tokens: model.vndAntThinkingBudget };
|
||||
delete payload.temperature;
|
||||
} else {
|
||||
} else /* null or undefined */ {
|
||||
// NOTE: with thinking disabled (null), we can still use temperature, so we don't delete it
|
||||
// see the note on llms.parameters.ts: 'llmVndAntThinkingBudget'
|
||||
}
|
||||
@@ -274,7 +273,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
if (model.vndGeminiThinkingBudget !== undefined) {
|
||||
payload.reasoning = { enabled: true, max_tokens: model.vndGeminiThinkingBudget };
|
||||
} else {
|
||||
const gemEffort = model.effort ?? model.vndGeminiThinkingLevel;
|
||||
const gemEffort = model.reasoningEffort; // ?? model.vndGeminiThinkingLevel;
|
||||
if (gemEffort) {
|
||||
if (gemEffort === 'none' || gemEffort === 'xhigh' || gemEffort === 'max') // domain validation
|
||||
throw new Error(`OpenRouter->Gemini API does not support '${gemEffort}' reasoning effort`);
|
||||
|
||||
@@ -116,7 +116,7 @@ export function aixToOpenAIResponses(
|
||||
|
||||
|
||||
// Reasoning
|
||||
const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort;
|
||||
const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort;
|
||||
if (reasoningEffort === 'max') // domain validation
|
||||
throw new Error(`OpenAI Responses API does not support '${reasoningEffort}' reasoning effort`);
|
||||
|
||||
|
||||
@@ -95,7 +95,7 @@ export function aixToXAIResponses(
|
||||
}
|
||||
|
||||
// Reasoning
|
||||
const reasoningEffort = model.effort ?? model.vndOaiReasoningEffort;
|
||||
const reasoningEffort = model.reasoningEffort; // ?? model.vndOaiReasoningEffort;
|
||||
if (reasoningEffort === 'none' || reasoningEffort === 'minimal' || reasoningEffort === 'xhigh' || reasoningEffort === 'max') // domain validation
|
||||
throw new Error(`XAI Responses API does not support reasoning effort '${reasoningEffort}'`);
|
||||
|
||||
|
||||
@@ -336,7 +336,7 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
// https://openrouter.ai/docs/api/reference/parameters#verbosity
|
||||
verbosity: z.enum([
|
||||
'low', 'medium', 'high',
|
||||
'max', // [OpenRouter, 2026-02-06] Anthropic-through-openrouter has its llmEffort mapped to 'verbosity'
|
||||
'max', // [OpenRouter, 2026-02-06] Anthropic-through-openrouter has its effort mapped to 'verbosity'
|
||||
]).optional(), // 'max' is Opus 4.6 only
|
||||
// [OpenRouter, 2025-11-11] Unified reasoning parameter for all models
|
||||
reasoning: z.object({
|
||||
|
||||
@@ -21,9 +21,26 @@ import { AnthropicSkillsConfig } from './AnthropicSkillsConfig';
|
||||
|
||||
const _UNSPECIFIED = '_UNSPECIFIED' as const;
|
||||
|
||||
// unified effort options - descending order (strongest first), filtered per-model by enumValues
|
||||
const _effortOptions = [
|
||||
{ value: 'max', label: 'Max', description: 'Deepest reasoning, no constraints' } as const,
|
||||
|
||||
// Vendor-specific effort options - descending order, filtered per-model by enumValues
|
||||
|
||||
const _antEffortOptions = [
|
||||
{ value: 'max', label: 'Max', description: 'Deepest reasoning' } as const,
|
||||
{ value: 'high', label: 'High', description: 'Maximum capability' } as const,
|
||||
{ value: 'medium', label: 'Medium', description: 'Balanced' } as const,
|
||||
{ value: 'low', label: 'Low', description: 'Most efficient' } as const,
|
||||
{ value: _UNSPECIFIED, label: 'Default', description: 'Default (High)' } as const,
|
||||
] as const;
|
||||
|
||||
const _gemEffortOptions = [
|
||||
{ value: 'high', label: 'High', description: 'Maximum reasoning depth' } as const,
|
||||
{ value: 'medium', label: 'Medium', description: 'Balanced reasoning' } as const,
|
||||
{ value: 'low', label: 'Low', description: 'Quick responses' } as const,
|
||||
{ value: 'minimal', label: 'Minimal', description: 'Fastest, least reasoning' } as const,
|
||||
{ value: _UNSPECIFIED, label: 'Default', description: 'Model decides' } as const,
|
||||
] as const;
|
||||
|
||||
const _oaiEffortOptions = [
|
||||
{ value: 'xhigh', label: 'X-High', description: 'Hardest thinking, best quality' } as const,
|
||||
{ value: 'high', label: 'High', description: 'Deep, thorough analysis' } as const,
|
||||
{ value: 'medium', label: 'Medium', description: 'Balanced reasoning depth' } as const,
|
||||
@@ -33,6 +50,20 @@ const _effortOptions = [
|
||||
{ value: _UNSPECIFIED, label: 'Default', description: 'Default value (unset)' } as const,
|
||||
] as const;
|
||||
|
||||
const _miscEffortOptions = [
|
||||
{ value: 'high', label: 'On', description: 'Multi-step reasoning' } as const,
|
||||
{ value: 'none', label: 'Off', description: 'Disable thinking mode' } as const,
|
||||
{ value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const,
|
||||
] as const;
|
||||
|
||||
export function llmParametersFilterEffortOptions<T extends { value: string }>(options: readonly T[], spec: DModelParameterSpecAny | undefined, registryKey: keyof typeof DModelParameterRegistry): T[] | null {
|
||||
if (!spec) return null;
|
||||
const registry = DModelParameterRegistry[registryKey];
|
||||
const allowedSet = new Set((spec.enumValues as readonly string[] | undefined) ?? ('values' in registry ? registry.values : []));
|
||||
return options.filter(o => o.value === _UNSPECIFIED || allowedSet.has(o.value));
|
||||
}
|
||||
|
||||
|
||||
const _verbosityOptions = [
|
||||
{ value: 'high', label: 'Detailed', description: 'Thorough responses, great for audits' } as const,
|
||||
{ value: 'medium', label: 'Balanced', description: 'Standard detail level (default)' } as const,
|
||||
@@ -191,13 +222,13 @@ export function LLMParametersEditor(props: {
|
||||
, [props.parameterSpecs]);
|
||||
|
||||
|
||||
// effort options: filtered to model's allowed values, preserving descending order from _effortOptions
|
||||
const llmEffortSpec = modelParamSpec['llmEffort'];
|
||||
const effortOptions = React.useMemo(() => {
|
||||
if (!llmEffortSpec) return null;
|
||||
const allowedSet = new Set((llmEffortSpec.enumValues as readonly string[] | undefined) ?? DModelParameterRegistry['llmEffort'].values);
|
||||
return _effortOptions.filter(o => o.value === _UNSPECIFIED || allowedSet.has(o.value));
|
||||
}, [llmEffortSpec]);
|
||||
// effort options: one memo for all vendors, filtered to model's allowed values
|
||||
const { antEffortOptions, gemEffortOptions, oaiEffortOptions, miscEffortOptions } = React.useMemo(() => ({
|
||||
antEffortOptions: llmParametersFilterEffortOptions(_antEffortOptions, modelParamSpec['llmVndAntEffort'], 'llmVndAntEffort'),
|
||||
gemEffortOptions: llmParametersFilterEffortOptions(_gemEffortOptions, modelParamSpec['llmVndGemEffort'], 'llmVndGemEffort'),
|
||||
oaiEffortOptions: llmParametersFilterEffortOptions(_oaiEffortOptions, modelParamSpec['llmVndOaiEffort'], 'llmVndOaiEffort'),
|
||||
miscEffortOptions: llmParametersFilterEffortOptions(_miscEffortOptions, modelParamSpec['llmVndMiscEffort'], 'llmVndMiscEffort'),
|
||||
}), [modelParamSpec]);
|
||||
|
||||
|
||||
// current values: { ...fallback, ...baseline, ...user }
|
||||
@@ -205,21 +236,24 @@ export function LLMParametersEditor(props: {
|
||||
const {
|
||||
llmResponseTokens = LLMImplicitParamersRuntimeFallback.llmResponseTokens, // fallback for undefined, result is number | null
|
||||
llmTemperature, // null: no temperature, number: temperature value, undefined: shall not happen, we treat is similarly to null
|
||||
llmEffort,
|
||||
llmForceNoStream,
|
||||
llmVndAnt1MContext,
|
||||
llmVndAntEffort,
|
||||
llmVndAntInfSpeed,
|
||||
llmVndAntSkills,
|
||||
llmVndAntThinkingBudget,
|
||||
llmVndAntWebFetch,
|
||||
llmVndAntWebSearch,
|
||||
llmVndGemEffort,
|
||||
llmVndGeminiAspectRatio,
|
||||
llmVndGeminiCodeExecution,
|
||||
llmVndGeminiGoogleSearch,
|
||||
llmVndGeminiImageSize,
|
||||
llmVndGeminiMediaResolution,
|
||||
llmVndGeminiThinkingBudget,
|
||||
llmVndMiscEffort,
|
||||
// llmVndMoonshotWebSearch,
|
||||
llmVndOaiEffort,
|
||||
llmVndOaiRestoreMarkdown,
|
||||
llmVndOaiWebSearchContext,
|
||||
llmVndOaiWebSearchGeolocation,
|
||||
@@ -229,7 +263,6 @@ export function LLMParametersEditor(props: {
|
||||
llmVndOrtWebSearch,
|
||||
llmVndPerplexityDateFilter,
|
||||
llmVndPerplexitySearchMode,
|
||||
|
||||
llmVndXaiCodeExecution,
|
||||
llmVndXaiSearchInterval,
|
||||
llmVndXaiWebSearch,
|
||||
@@ -276,10 +309,10 @@ export function LLMParametersEditor(props: {
|
||||
const gemTBSpec = modelParamSpec['llmVndGeminiThinkingBudget'];
|
||||
const gemTBMinMax = gemTBSpec?.rangeOverride || defGemTB.range;
|
||||
|
||||
// check if web search should be disabled
|
||||
// 2026-02-17: NOTE: formerly we checked for `llmEffort === 'minimal' || llmEffort === 'none'`, but seems to be working now
|
||||
// Now this seems to be still the case for llmEffort === 'minimal' (gpt 5.0 and before), 5.1/5.2 work even with 'none'
|
||||
const oaiSkipSearchOnMinimalEffort = llmEffort === 'minimal';
|
||||
// check if web search should be disabled (OpenAI-only)
|
||||
// 2026-02-17: NOTE: formerly we checked for `llmVndOaiEffort === 'minimal' || llmVndOaiEffort === 'none'`, but seems to be working now
|
||||
// Now this seems to be still the case for llmVndOaiEffort === 'minimal' (gpt 5.0 and before), 5.1/5.2 work even with 'none'
|
||||
const oaiSkipSearchOnMinimalEffort = llmVndOaiEffort === 'minimal';
|
||||
|
||||
return <>
|
||||
|
||||
@@ -329,6 +362,8 @@ export function LLMParametersEditor(props: {
|
||||
</Box>
|
||||
)}
|
||||
|
||||
|
||||
{/* pre-Effort: Anthropic [thinking budget, effort, ...] */}
|
||||
{antThinkingShown && (
|
||||
<FormSliderControl
|
||||
title={antThinkingEnabled ? 'Thinking Budget' : 'Disabled'} ariaLabel='Anthropic Extended Thinking Token Budget'
|
||||
@@ -355,19 +390,60 @@ export function LLMParametersEditor(props: {
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Unified Effort - dynamic options from model spec's enumValues, descending order */}
|
||||
{showParam('llmEffort') && effortOptions && (
|
||||
|
||||
{/* Anthropic Effort */}
|
||||
{showParam('llmVndAntEffort') && antEffortOptions && (
|
||||
<FormSelectControl
|
||||
title='Effort'
|
||||
tooltip='Controls reasoning depth and effort level'
|
||||
value={llmEffort ?? _UNSPECIFIED}
|
||||
tooltip='Controls thinking depth. Max = deepest reasoning with no constraints, High = default. Works alongside thinking budget.'
|
||||
value={llmVndAntEffort ?? _UNSPECIFIED}
|
||||
onChange={(value) => {
|
||||
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmEffort');
|
||||
else onChangeParameter({ llmEffort: value });
|
||||
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndAntEffort');
|
||||
else onChangeParameter({ llmVndAntEffort: value });
|
||||
}}
|
||||
options={effortOptions}
|
||||
options={antEffortOptions}
|
||||
/>
|
||||
)}
|
||||
{/* Gemini Thinking Level */}
|
||||
{showParam('llmVndGemEffort') && gemEffortOptions && (
|
||||
<FormSelectControl
|
||||
title='Thinking Level'
|
||||
tooltip='Controls internal reasoning depth. When unset, the model decides dynamically.'
|
||||
value={llmVndGemEffort ?? _UNSPECIFIED}
|
||||
onChange={(value) => {
|
||||
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndGemEffort');
|
||||
else onChangeParameter({ llmVndGemEffort: value });
|
||||
}}
|
||||
options={gemEffortOptions}
|
||||
/>
|
||||
)}
|
||||
{/* OpenAI Reasoning Effort */}
|
||||
{showParam('llmVndOaiEffort') && oaiEffortOptions && (
|
||||
<FormSelectControl
|
||||
title='Reasoning Effort'
|
||||
tooltip='Controls how much effort the model spends on reasoning'
|
||||
value={llmVndOaiEffort ?? _UNSPECIFIED}
|
||||
onChange={(value) => {
|
||||
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndOaiEffort');
|
||||
else onChangeParameter({ llmVndOaiEffort: value });
|
||||
}}
|
||||
options={oaiEffortOptions}
|
||||
/>
|
||||
)}
|
||||
{/* Moonshot/Z.ai Thinking */}
|
||||
{showParam('llmVndMiscEffort') && miscEffortOptions && (
|
||||
<FormSelectControl
|
||||
title='Thinking'
|
||||
tooltip='Enable or disable extended thinking mode'
|
||||
value={llmVndMiscEffort ?? _UNSPECIFIED}
|
||||
onChange={(value) => {
|
||||
if (value === _UNSPECIFIED || !value) onRemoveParameter('llmVndMiscEffort');
|
||||
else onChangeParameter({ llmVndMiscEffort: value });
|
||||
}}
|
||||
options={miscEffortOptions}
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
{showParam('llmVndAntWebSearch') && (
|
||||
<FormSelectControl
|
||||
@@ -427,6 +503,54 @@ export function LLMParametersEditor(props: {
|
||||
)}
|
||||
|
||||
|
||||
{/* Gemini [effort, ... ] */}
|
||||
|
||||
{showParam('llmVndGeminiThinkingBudget') && (
|
||||
<FormSliderControl
|
||||
title='Thinking Budget' ariaLabel='Gemini Thinking Token Budget'
|
||||
description={gemThinkingAuto ? 'Auto' : gemThinkingOff ? 'Thinking Off' : 'Tokens'}
|
||||
min={gemTBMinMax[0]} max={gemTBMinMax[1]} step={1024}
|
||||
valueLabelDisplay={(gemThinkingAuto || gemThinkingOff) ? 'off' : 'on'}
|
||||
value={llmVndGeminiThinkingBudget ?? [gemTBMinMax[0], gemTBMinMax[1]]}
|
||||
variant={gemThinkingAuto ? 'soft' : undefined}
|
||||
// disabled={gemThinkingAuto}
|
||||
onChange={value => onChangeParameter({ llmVndGeminiThinkingBudget: Array.isArray(value) ? (value[0] || value[1]) : value })}
|
||||
startAdornment={gemTBMinMax[0] === 0 && (
|
||||
<Tooltip arrow disableInteractive title={gemThinkingOff ? 'Thinking Off' : 'Disable Thinking'}>
|
||||
<IconButton
|
||||
variant={gemThinkingOff ? 'solid' : 'outlined'}
|
||||
// disabled={gemThinkingOff}
|
||||
onClick={() => onChangeParameter({ llmVndGeminiThinkingBudget: 0 })}
|
||||
sx={{ mr: 2 }}
|
||||
>
|
||||
{gemThinkingOff ? <ClearIcon sx={{ fontSize: 'lg' }} /> : <PowerSettingsNewIcon />}
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
)}
|
||||
endAdornment={
|
||||
<Tooltip arrow disableInteractive title={gemThinkingAuto ? 'Automatic Thinking (default)' : 'Auto Budget'}>
|
||||
<IconButton
|
||||
variant={gemThinkingAuto ? 'solid' : 'outlined'}
|
||||
// disabled={gemThinkingAuto}
|
||||
onClick={() => onRemoveParameter('llmVndGeminiThinkingBudget')}
|
||||
sx={{ ml: 2 }}
|
||||
>
|
||||
<AutoModeIcon sx={{ fontSize: 'xl' }} />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/*{showParam('llmVndGeminiShowThoughts') && (*/}
|
||||
{/* <FormSwitchControl*/}
|
||||
{/* title='Show Reasoning'*/}
|
||||
{/* description='Show chain of thoughts'*/}
|
||||
{/* checked={!!llmVndGeminiShowThoughts}*/}
|
||||
{/* onChange={checked => onChangeParameter({ llmVndGeminiShowThoughts: checked })}*/}
|
||||
{/* />*/}
|
||||
{/*)}*/}
|
||||
|
||||
{showParam('llmVndGeminiImageSize') && (
|
||||
<FormSelectControl
|
||||
title='Image Size'
|
||||
@@ -467,44 +591,6 @@ export function LLMParametersEditor(props: {
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
{showParam('llmVndGeminiThinkingBudget') && (
|
||||
<FormSliderControl
|
||||
title='Thinking Budget' ariaLabel='Gemini Thinking Token Budget'
|
||||
description={gemThinkingAuto ? 'Auto' : gemThinkingOff ? 'Thinking Off' : 'Tokens'}
|
||||
min={gemTBMinMax[0]} max={gemTBMinMax[1]} step={1024}
|
||||
valueLabelDisplay={(gemThinkingAuto || gemThinkingOff) ? 'off' : 'on'}
|
||||
value={llmVndGeminiThinkingBudget ?? [gemTBMinMax[0], gemTBMinMax[1]]}
|
||||
variant={gemThinkingAuto ? 'soft' : undefined}
|
||||
// disabled={gemThinkingAuto}
|
||||
onChange={value => onChangeParameter({ llmVndGeminiThinkingBudget: Array.isArray(value) ? (value[0] || value[1]) : value })}
|
||||
startAdornment={gemTBMinMax[0] === 0 && (
|
||||
<Tooltip arrow disableInteractive title={gemThinkingOff ? 'Thinking Off' : 'Disable Thinking'}>
|
||||
<IconButton
|
||||
variant={gemThinkingOff ? 'solid' : 'outlined'}
|
||||
// disabled={gemThinkingOff}
|
||||
onClick={() => onChangeParameter({ llmVndGeminiThinkingBudget: 0 })}
|
||||
sx={{ mr: 2 }}
|
||||
>
|
||||
{gemThinkingOff ? <ClearIcon sx={{ fontSize: 'lg' }} /> : <PowerSettingsNewIcon />}
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
)}
|
||||
endAdornment={
|
||||
<Tooltip arrow disableInteractive title={gemThinkingAuto ? 'Automatic Thinking (default)' : 'Auto Budget'}>
|
||||
<IconButton
|
||||
variant={gemThinkingAuto ? 'solid' : 'outlined'}
|
||||
// disabled={gemThinkingAuto}
|
||||
onClick={() => onRemoveParameter('llmVndGeminiThinkingBudget')}
|
||||
sx={{ ml: 2 }}
|
||||
>
|
||||
<AutoModeIcon sx={{ fontSize: 'xl' }} />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
}
|
||||
/>
|
||||
)}
|
||||
|
||||
{showParam('llmVndGeminiCodeExecution') && (
|
||||
<FormSelectControl
|
||||
title='Code Execution'
|
||||
@@ -582,37 +668,6 @@ export function LLMParametersEditor(props: {
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
{showParam('llmVndPerplexitySearchMode') && (
|
||||
<FormSelectControl
|
||||
title='Search Mode'
|
||||
tooltip='Type of sources to prioritize in search results'
|
||||
value={llmVndPerplexitySearchMode ?? _UNSPECIFIED}
|
||||
onChange={(value) => {
|
||||
if (value === _UNSPECIFIED || !value)
|
||||
onRemoveParameter('llmVndPerplexitySearchMode');
|
||||
else
|
||||
onChangeParameter({ llmVndPerplexitySearchMode: value });
|
||||
}}
|
||||
options={_perplexitySearchModeOptions}
|
||||
/>
|
||||
)}
|
||||
|
||||
{showParam('llmVndPerplexityDateFilter') && (
|
||||
<FormSelectControl
|
||||
title='Date Range'
|
||||
tooltip='Filter search results by publication date'
|
||||
value={llmVndPerplexityDateFilter ?? _UNSPECIFIED}
|
||||
onChange={(value) => {
|
||||
if (value === _UNSPECIFIED || !value)
|
||||
onRemoveParameter('llmVndPerplexityDateFilter');
|
||||
else
|
||||
onChangeParameter({ llmVndPerplexityDateFilter: value });
|
||||
}}
|
||||
options={_perplexityDateFilterOptions}
|
||||
/>
|
||||
)}
|
||||
|
||||
{showParam('llmVndOaiVerbosity') && (
|
||||
<FormSelectControl
|
||||
title='Verbosity'
|
||||
@@ -673,7 +728,6 @@ export function LLMParametersEditor(props: {
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
{showParam('llmForceNoStream') && (
|
||||
<FormSwitchControl
|
||||
title='Disable Streaming'
|
||||
@@ -690,6 +744,37 @@ export function LLMParametersEditor(props: {
|
||||
)}
|
||||
|
||||
|
||||
{showParam('llmVndPerplexitySearchMode') && (
|
||||
<FormSelectControl
|
||||
title='Search Mode'
|
||||
tooltip='Type of sources to prioritize in search results'
|
||||
value={llmVndPerplexitySearchMode ?? _UNSPECIFIED}
|
||||
onChange={(value) => {
|
||||
if (value === _UNSPECIFIED || !value)
|
||||
onRemoveParameter('llmVndPerplexitySearchMode');
|
||||
else
|
||||
onChangeParameter({ llmVndPerplexitySearchMode: value });
|
||||
}}
|
||||
options={_perplexitySearchModeOptions}
|
||||
/>
|
||||
)}
|
||||
|
||||
{showParam('llmVndPerplexityDateFilter') && (
|
||||
<FormSelectControl
|
||||
title='Date Range'
|
||||
tooltip='Filter search results by publication date'
|
||||
value={llmVndPerplexityDateFilter ?? _UNSPECIFIED}
|
||||
onChange={(value) => {
|
||||
if (value === _UNSPECIFIED || !value)
|
||||
onRemoveParameter('llmVndPerplexityDateFilter');
|
||||
else
|
||||
onChangeParameter({ llmVndPerplexityDateFilter: value });
|
||||
}}
|
||||
options={_perplexityDateFilterOptions}
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
{showParam('llmVndOrtWebSearch') && (
|
||||
<FormSelectControl
|
||||
title='Web Search'
|
||||
|
||||
@@ -18,7 +18,7 @@ const IF_4_R = [...IF_4, LLM_IF_OAI_Reasoning];
|
||||
|
||||
|
||||
// Anthropic Parameters Semantics:
|
||||
// - llmEffort unified effort: each model declares its subset via enumValues
|
||||
// - llmVndAntEffort Anthropic effort: each model declares its subset via enumValues
|
||||
// - llmVndAnt1MContext only available on select models
|
||||
// - llmVndAntSkills 2026-02-06: seems GA to any model now: a parameter spec for user/UI configurability
|
||||
// - llmVndAntThinkingBudget 2026-02-06: deprecated since 4.6 in favor of adaptive thinking, was used for manual control of thinking up to 4.5, we pre-default it to 16384 and the user can set it to another value or null to turn thinking off
|
||||
@@ -43,7 +43,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* FORCE adaptive */ },
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'max'] },
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high', 'max'] },
|
||||
{ paramId: 'llmVndAnt1MContext' },
|
||||
{ paramId: 'llmVndAntInfSpeed' },
|
||||
...ANT_TOOLS,
|
||||
@@ -58,7 +58,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndAntThinkingBudget', hidden: true, initialValue: -1 /* FORCE adaptive */ },
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndAnt1MContext' },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
@@ -73,7 +73,7 @@ const _hardcodedAnthropicThinkingVariants: ModelVariantMap & { [id: string]: { i
|
||||
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndAntThinkingBudget' },
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
benchmark: { cbaElo: 1468 }, // claude-opus-4-5-20251101-thinking-32k
|
||||
@@ -181,7 +181,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'max'] },
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high', 'max'] },
|
||||
{ paramId: 'llmVndAnt1MContext' },
|
||||
{ paramId: 'llmVndAntInfSpeed' },
|
||||
...ANT_TOOLS,
|
||||
@@ -208,7 +208,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
maxCompletionTokens: 64000,
|
||||
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndAnt1MContext' },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
@@ -236,7 +236,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
maxCompletionTokens: 64000,
|
||||
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndAntEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
...ANT_TOOLS,
|
||||
],
|
||||
chatPrice: { input: 5, output: 25, cache: { cType: 'ant-bp', read: 0.50, write: 6.25, duration: 300 } },
|
||||
@@ -445,7 +445,7 @@ const _ORT_ANT_IF_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
] as const);
|
||||
// NOTE: llmVndAntInfSpeed intentionally NOT included - fast mode not available through OpenRouter
|
||||
const _ORT_ANT_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
'llmEffort', // unified effort
|
||||
'llmVndAntEffort', // Anthropic effort
|
||||
'llmVndAntThinkingBudget',
|
||||
] as const satisfies DModelParameterId[]);
|
||||
|
||||
|
||||
@@ -167,7 +167,7 @@ const _knownGeminiModels: ({
|
||||
chatPrice: gemini30ProPricing,
|
||||
interfaces: IF_30,
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'high']},
|
||||
{ paramId: 'llmVndGemEffort', enumValues: ['low', 'high']},
|
||||
{ paramId: 'llmVndGeminiMediaResolution' },
|
||||
{ paramId: 'llmVndGeminiCodeExecution' },
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
@@ -216,7 +216,7 @@ const _knownGeminiModels: ({
|
||||
chatPrice: gemini30FlashPricing,
|
||||
interfaces: IF_30,
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high']},
|
||||
{ paramId: 'llmVndGemEffort', enumValues: ['minimal', 'low', 'medium', 'high']},
|
||||
{ paramId: 'llmVndGeminiMediaResolution' },
|
||||
{ paramId: 'llmVndGeminiCodeExecution' },
|
||||
{ paramId: 'llmVndGeminiGoogleSearch' },
|
||||
@@ -860,7 +860,7 @@ const _ORT_GEM_IF_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
] as const);
|
||||
|
||||
const _ORT_GEM_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
'llmVndGeminiThinkingBudget', 'llmEffort', // OR supports Gemini thinking (unified effort)
|
||||
'llmVndGeminiThinkingBudget', 'llmVndGemEffort', // OR supports Gemini thinking
|
||||
'llmVndGeminiAspectRatio', 'llmVndGeminiImageSize', // OR supports Gemini image generation
|
||||
] as const satisfies DModelParameterId[]);
|
||||
|
||||
|
||||
@@ -76,7 +76,11 @@ const ModelParameterSpec_schema = z.object({
|
||||
paramId: z.enum([
|
||||
'llmTopP',
|
||||
'llmForceNoStream',
|
||||
'llmEffort', // unified effort
|
||||
// Vendor-specific effort params (converge to unified `effort` wire field)
|
||||
'llmVndAntEffort',
|
||||
'llmVndGemEffort',
|
||||
'llmVndOaiEffort',
|
||||
'llmVndMiscEffort',
|
||||
// Anthropic
|
||||
'llmVndAnt1MContext',
|
||||
'llmVndAntInfSpeed',
|
||||
|
||||
@@ -20,7 +20,7 @@ const IF_K2_5 = [
|
||||
];
|
||||
|
||||
const _PS_Reasoning: ModelDescriptionSchema['parameterSpecs'] = [
|
||||
{ paramId: 'llmEffort', enumValues: ['none', 'high'] },
|
||||
{ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] },
|
||||
] as const;
|
||||
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ export const hardcodedOpenAIVariants: ModelVariantMap = {
|
||||
description: 'Supports temperature control for creative applications. GPT-5.2 with reasoning disabled (reasoning_effort=none).',
|
||||
interfaces: [LLM_IF_OAI_Responses, LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], // NO LLM_IF_OAI_Reasoning, NO LLM_IF_HOTFIX_NoTemperature
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'none', hidden: true }, // factory 'none', not changeable
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'none', hidden: true }, // factory 'none', not changeable
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiVerbosity' },
|
||||
{ paramId: 'llmVndOaiImageGeneration' },
|
||||
@@ -40,7 +40,7 @@ export const hardcodedOpenAIVariants: ModelVariantMap = {
|
||||
// // customize this param
|
||||
// { paramId: 'llmVndOaiWebSearchContext', initialValue: 'medium', hidden: true }, // Search enabled by default
|
||||
// // copy other params
|
||||
// { paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] },
|
||||
// { paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] },
|
||||
// { paramId: 'llmVndOaiRestoreMarkdown' },
|
||||
// { paramId: 'llmVndOaiVerbosity' },
|
||||
// { paramId: 'llmVndOaiImageGeneration' },
|
||||
@@ -84,7 +84,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'medium' /* our decision: set to medium to have thinking - clones can set to 'none' to have temperature */ },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'medium' /* our decision: set to medium to have thinking - clones can set to 'none' to have temperature */ },
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiVerbosity' },
|
||||
{ paramId: 'llmVndOaiImageGeneration' },
|
||||
@@ -110,7 +110,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] },
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
],
|
||||
chatPrice: { input: 1.75, cache: { cType: 'oai-ac', read: 0.175 }, output: 14 },
|
||||
@@ -144,7 +144,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 272000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['medium', 'high', 'xhigh'] },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['medium', 'high', 'xhigh'] },
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiVerbosity' },
|
||||
{ paramId: 'llmVndOaiImageGeneration' },
|
||||
@@ -171,7 +171,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiVerbosity' },
|
||||
{ paramId: 'llmVndOaiImageGeneration' },
|
||||
{ paramId: 'llmVndOaiCodeInterpreter' },
|
||||
@@ -214,7 +214,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] },
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
@@ -230,7 +230,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal)
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal)
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
@@ -246,7 +246,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal)
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // low, medium, high (no minimal)
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
@@ -266,7 +266,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiVerbosity' }, // gpt-5-class nets have verbosity control
|
||||
{ paramId: 'llmVndOaiImageGeneration' }, // image generation capability
|
||||
{ paramId: 'llmVndOaiCodeInterpreter' }, // code execution in sandboxed container
|
||||
@@ -321,7 +321,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, // works
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, // works
|
||||
{ paramId: 'llmVndOaiWebSearchContext' }, // works, although is not triggered often
|
||||
// { paramId: 'llmVndOaiRestoreMarkdown', initialValue: false }, // since this is for code, let the prompt dictate markdown usage rather than us injecting
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
@@ -356,7 +356,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 400000,
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
|
||||
chatPrice: { input: 0.25, cache: { cType: 'oai-ac', read: 0.025 }, output: 2 },
|
||||
benchmark: { cbaElo: 1390 }, // gpt-5-mini-high
|
||||
},
|
||||
@@ -374,7 +374,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 400000,
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }],
|
||||
chatPrice: { input: 0.05, cache: { cType: 'oai-ac', read: 0.005 }, output: 0.4 },
|
||||
benchmark: { cbaElo: 1338 }, // gpt-5-nano-high
|
||||
},
|
||||
@@ -394,7 +394,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'] },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'] },
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
@@ -412,7 +412,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 32768,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
// chatPrice: TBD - unknown pricing
|
||||
@@ -447,7 +447,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }],
|
||||
chatPrice: { input: 1.5, cache: { cType: 'oai-ac', read: 0.375 }, output: 6 },
|
||||
isLegacy: true, // Deprecated January 16, 2026.
|
||||
},
|
||||
@@ -480,7 +480,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: IFS_CHAT_CACHE_REASON,
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
|
||||
chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.275 }, output: 4.4 },
|
||||
benchmark: { cbaElo: 1391 }, // o4-mini-2025-04-16
|
||||
},
|
||||
@@ -515,7 +515,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
|
||||
chatPrice: { input: 20, output: 80 },
|
||||
// benchmark: has not been measured yet
|
||||
},
|
||||
@@ -533,7 +533,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: IFS_CHAT_CACHE_REASON,
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmForceNoStream' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmForceNoStream' }],
|
||||
chatPrice: { input: 2, cache: { cType: 'oai-ac', read: 0.5 }, output: 8 },
|
||||
benchmark: { cbaElo: 1433 }, // o3-2025-04-16
|
||||
},
|
||||
@@ -551,7 +551,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_StripImages],
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }],
|
||||
chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.55 }, output: 4.4 },
|
||||
benchmark: { cbaElo: 1348 }, // o3-mini
|
||||
},
|
||||
@@ -570,7 +570,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmForceNoStream' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] }, { paramId: 'llmForceNoStream' }],
|
||||
chatPrice: { input: 150, output: 600 },
|
||||
// benchmark: has not been measured yet by third parties
|
||||
},
|
||||
@@ -588,7 +588,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: IFS_CHAT_CACHE_REASON,
|
||||
parameterSpecs: [{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiRestoreMarkdown' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high', 'xhigh'] }, { paramId: 'llmVndOaiRestoreMarkdown' }],
|
||||
chatPrice: { input: 15, cache: { cType: 'oai-ac', read: 7.5 }, output: 60 },
|
||||
benchmark: { cbaElo: 1402 }, // o1-2024-12-17
|
||||
},
|
||||
@@ -1232,7 +1232,7 @@ const _ORT_OAI_IF_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning,
|
||||
] as const);
|
||||
const _ORT_OAI_PARAM_ALLOWLIST: ReadonlySet<string> = new Set([
|
||||
'llmEffort', // unified reasoning effort
|
||||
'llmVndOaiEffort', // OpenAI reasoning effort
|
||||
'llmVndOaiVerbosity', // verbosity
|
||||
// 'llmVndOaiImageGeneration', // OR does NOT support image gen with OAI yet (2026-02-06)
|
||||
] as const satisfies DModelParameterId[]);
|
||||
|
||||
@@ -191,8 +191,8 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
|
||||
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndAntThinkingBudget')) {
|
||||
DEV_DEBUG_OPENROUTER_MODELS && console.log(`[DEV] openRouterModelToModelDescription: unexpected ${antLookup ? 'KNOWN' : 'unknown'} Anthropic reasoning model:`, model.id);
|
||||
parameterSpecs.push({ paramId: 'llmVndAntThinkingBudget' }); // configurable thinking budget
|
||||
if (!parameterSpecs.some(p => p.paramId === 'llmEffort'))
|
||||
parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high', 'max'] }); // use the latest known Anthropic effort levels superset
|
||||
if (!parameterSpecs.some(p => p.paramId === 'llmVndAntEffort'))
|
||||
parameterSpecs.push({ paramId: 'llmVndAntEffort' }); // use the latest known Anthropic effort levels superset
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -204,9 +204,9 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
|
||||
console.log('[DEV] openRouterModelToModelDescription: unknown Gemini model:', model.id);
|
||||
|
||||
// 0-day: reasoning models get default thinking budget if not inherited
|
||||
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndGeminiThinkingBudget' || p.paramId === 'llmEffort')) {
|
||||
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndGeminiThinkingBudget' || p.paramId === 'llmVndGemEffort')) {
|
||||
// DEV_DEBUG_OPENROUTER_MODELS && console.log(`[DEV] openRouterModelToModelDescription: tagging ${gemLookup ? 'KNOWN' : 'unknown'} Gemini reasoning model:`, model.id);
|
||||
parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['minimal', 'low', 'medium', 'high'] }); // use the latest known Gemini effort (thinking) levels superset
|
||||
parameterSpecs.push({ paramId: 'llmVndGemEffort' }); // use the latest known Gemini effort (thinking) levels superset
|
||||
// parameterSpecs.push({ paramId: 'llmVndGeminiThinkingBudget' }); // fallback with default range
|
||||
}
|
||||
|
||||
@@ -229,17 +229,17 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
|
||||
console.log('[DEV] openRouterModelToModelDescription: unknown OpenAI model:', model.id);
|
||||
|
||||
// 0-day: reasoning models get default 3-level effort if not inherited
|
||||
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmEffort')) {
|
||||
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndOaiEffort')) {
|
||||
// console.log('[DEV] openRouterModelToModelDescription: unexpected OpenAI reasoning model:', model.id);
|
||||
parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'] }); // latest known OpenAI effort levels superset
|
||||
parameterSpecs.push({ paramId: 'llmVndOaiEffort' }); // latest known OpenAI effort levels superset
|
||||
}
|
||||
break;
|
||||
|
||||
case model.id.startsWith('x-ai/') || model.id.startsWith('moonshotai/') || model.id.startsWith('z-ai/') || model.id.startsWith('deepseek/'):
|
||||
// 0-day: xAI/Grok models get default reasoning effort if not inherited
|
||||
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmEffort')) {
|
||||
// 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited
|
||||
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) {
|
||||
// console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id);
|
||||
parameterSpecs.push({ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] }); // latest known xAI effort levels superset
|
||||
parameterSpecs.push({ paramId: 'llmVndMiscEffort' }); // binary thinking for these vendors
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import { createVariantInjector, ModelVariantMap } from '../../llm.server.variants';
|
||||
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types';
|
||||
import { LLM_IF_HOTFIX_NoStream, LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types';
|
||||
|
||||
|
||||
// configuration
|
||||
@@ -16,13 +16,16 @@ const _hardcodedPerplexityVariants: ModelVariantMap = !PERPLEXITY_ENABLE_VARIANT
|
||||
idVariant: 'academic',
|
||||
label: 'Sonar Deep Research (Academic)',
|
||||
description: 'Expert-level research model with academic sources only. Searches scholarly databases, peer-reviewed papers, and academic publications. 128k context.',
|
||||
interfaces: [
|
||||
LLM_IF_HOTFIX_NoStream, // seems to be required for medium/academic
|
||||
LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning,
|
||||
],
|
||||
parameterSpecs: [
|
||||
// Fixed parameters for academic search
|
||||
{ paramId: 'llmVndOaiWebSearchContext', initialValue: 'medium', hidden: true },
|
||||
{ paramId: 'llmVndPerplexitySearchMode', initialValue: 'academic', hidden: true },
|
||||
{ paramId: 'llmForceNoStream', initialValue: true, hidden: true },
|
||||
// Free parameters
|
||||
// { paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'], initialValue: 'medium' },
|
||||
// { paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'], initialValue: 'medium' },
|
||||
{ paramId: 'llmVndPerplexityDateFilter' },
|
||||
],
|
||||
},
|
||||
@@ -40,7 +43,7 @@ const _knownPerplexityChatModels: ModelDescriptionSchema[] = [
|
||||
contextWindow: 128000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
{ paramId: 'llmVndOaiWebSearchContext', initialValue: 'low' }, // REUSE!
|
||||
{ paramId: 'llmVndPerplexitySearchMode' },
|
||||
{ paramId: 'llmVndPerplexityDateFilter' },
|
||||
|
||||
@@ -152,7 +152,10 @@ const _knownXAIChatModels: ManualMappings = [
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: undefined,
|
||||
interfaces: [...XAI_IF_Pre4, LLM_IF_OAI_Reasoning],
|
||||
parameterSpecs: XAI_PAR_Pre4,
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['low', 'medium', 'high'] },
|
||||
...XAI_PAR_Pre4,
|
||||
],
|
||||
chatPrice: { input: 0.3, output: 0.5, cache: { cType: 'oai-ac', read: 0.075 } },
|
||||
benchmark: { cbaElo: 1357 }, // grok-3-mini-beta
|
||||
},
|
||||
|
||||
@@ -17,7 +17,7 @@ const _IF_Vision_Reasoning = [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, L
|
||||
// - Z.ai thinking maps from effort: 'none' → disabled, anything else → enabled
|
||||
// - Z.ai only supports binary enabled/disabled, so we expose 'none' and 'high'
|
||||
const _PS_Reasoning: ModelDescriptionSchema['parameterSpecs'] = [
|
||||
{ paramId: 'llmEffort', enumValues: ['none', 'high'] },
|
||||
{ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] },
|
||||
] as const;
|
||||
|
||||
|
||||
|
||||
@@ -42,8 +42,8 @@ const SWEEP_DEFINITIONS = [
|
||||
name: 'oai-reasoning-effort',
|
||||
description: 'OpenAI reasoning_effort values',
|
||||
applicability: { type: 'dialects', dialects: ['openai', 'azure', 'openrouter'] },
|
||||
applyToModel: (value) => ({ effort: value }),
|
||||
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh' /*, 'max'*/ /* OpenRouter-only? */] satisfies AixAPI_Model['effort'][],
|
||||
applyToModel: (value) => ({ reasoningEffort: value }),
|
||||
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh' /*, 'max'*/ /* OpenRouter-only? */] satisfies AixAPI_Model['reasoningEffort'][],
|
||||
neuteredValues: ['medium'], // medium is the default, so only-medium means no real support
|
||||
mode: 'enumerate',
|
||||
}),
|
||||
@@ -85,8 +85,8 @@ const SWEEP_DEFINITIONS = [
|
||||
name: 'ant-effort',
|
||||
description: 'Anthropic output_config.effort values',
|
||||
applicability: { type: 'dialects', dialects: ['anthropic'] },
|
||||
applyToModel: (value) => ({ effort: value }),
|
||||
values: ['low', 'medium', 'high', 'max'] satisfies AixAPI_Model['effort'][],
|
||||
applyToModel: (value) => ({ reasoningEffort: value }),
|
||||
values: ['low', 'medium', 'high', 'max'] satisfies AixAPI_Model['reasoningEffort'][],
|
||||
mode: 'enumerate',
|
||||
}),
|
||||
|
||||
@@ -112,8 +112,8 @@ const SWEEP_DEFINITIONS = [
|
||||
name: 'gemini-thinking-level',
|
||||
description: 'Gemini thinkingConfig.thinkingLevel values',
|
||||
applicability: { type: 'dialects', dialects: ['gemini'] },
|
||||
applyToModel: (value) => value ? { effort: value } : {}, // null = dynamic mode, don't set level
|
||||
values: ['minimal', 'low', 'medium', 'high'] satisfies (AixAPI_Model['effort'] | null)[],
|
||||
applyToModel: (value) => value ? { reasoningEffort: value } : {}, // null = dynamic mode, don't set level
|
||||
values: ['minimal', 'low', 'medium', 'high'] satisfies (AixAPI_Model['reasoningEffort'] | null)[],
|
||||
mode: 'enumerate',
|
||||
}),
|
||||
|
||||
@@ -138,8 +138,8 @@ const SWEEP_DEFINITIONS = [
|
||||
name: 'xai-reasoning-effort',
|
||||
description: 'xAI reasoning.effort values',
|
||||
applicability: { type: 'dialects', dialects: ['xai'] },
|
||||
applyToModel: (value) => ({ effort: value }),
|
||||
values: ['low', 'medium', 'high'] satisfies AixAPI_Model['effort'][],
|
||||
applyToModel: (value) => ({ reasoningEffort: value }),
|
||||
values: ['low', 'medium', 'high'] satisfies AixAPI_Model['reasoningEffort'][],
|
||||
mode: 'enumerate',
|
||||
}),
|
||||
|
||||
|
||||
Reference in New Issue
Block a user