From 4d097d7136c9af3dc6ea7fee84012518ea37566e Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Thu, 23 Apr 2026 23:30:34 -0700 Subject: [PATCH] LLMs: DeepSeek: add V4 support infra --- src/common/stores/llms/llms.parameters.ts | 3 ++- .../adapters/openai.chatCompletions.ts | 15 ++++++++++++--- .../server/dispatch/wiretypes/openai.wiretypes.ts | 3 +++ .../llms/models-modal/LLMParametersEditor.tsx | 1 + .../server/openai/models/openrouter.models.ts | 5 ++++- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/common/stores/llms/llms.parameters.ts b/src/common/stores/llms/llms.parameters.ts index 017ceef2f..06e804fa9 100644 --- a/src/common/stores/llms/llms.parameters.ts +++ b/src/common/stores/llms/llms.parameters.ts @@ -175,7 +175,8 @@ export const DModelParameterRegistry = { label: 'Thinking', type: 'enum', description: 'Enable or disable extended thinking mode.', - values: ['none', 'high'], + values: ['none', 'high', 'max'], + // 'max' is for now DeepSeek V4-specific (reasoning_effort=max); other vendors restrict via enumValues // undefined means vendor default (usually 'high', i.e. thinking enabled) }), diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts index 7ae5da992..2bfd78996 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts @@ -152,11 +152,20 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: // [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now) // [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode + // [DeepSeek, 2026-04-23] V4 thinking control if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) { - if (reasoningEffort !== 'none' && reasoningEffort !== 'high') // domain validation - throw new Error(`${openAIDialect} only supports reasoning effort 'none' or 'high', got '${reasoningEffort}'`); + const allowedEffort = openAIDialect === 'deepseek' ? ['none', 'high', 'max'] : ['none', 'high']; + if (!allowedEffort.includes(reasoningEffort)) // domain validation + throw new Error(`${openAIDialect} only supports reasoning effort ${allowedEffort.join(', ')}, got '${reasoningEffort}'`); - payload.thinking = { type: reasoningEffort === 'none' ? 'disabled' : 'enabled' }; + if (reasoningEffort === 'none') + payload.thinking = { type: 'disabled' }; + else + payload.thinking = { + type: 'enabled', + // DeepSeek: forward the user-selected effort to tune thinking depth ('high' | 'max') + ...(openAIDialect === 'deepseek' && { reasoning_effort: reasoningEffort as 'high' | 'max' }), + }; } diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index e14145794..435b50142 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -447,8 +447,11 @@ export namespace OpenAIWire_API_Chat_Completions { search_after_date_filter: z.string().optional(), // Date filter in MM/DD/YYYY format // [Moonshot, 2026-01-26] Kimi K2.5 thinking mode control + // [Z.ai, 2025-xx] GLM thinking mode: type 'enabled' | 'disabled' + // [DeepSeek, 2026-04-23] V4 thinking mode: adds optional `reasoning_effort` ('high' | 'max') thinking: z.object({ type: z.enum(['enabled', 'disabled']), + reasoning_effort: z.enum(['high', 'max']).optional(), // [2026-04-23, Deepseek] introduced this here - a not to Anthropic values, and semantics different from OpenRouter }).optional(), seed: z.number().int().optional(), diff --git a/src/modules/llms/models-modal/LLMParametersEditor.tsx b/src/modules/llms/models-modal/LLMParametersEditor.tsx index f3ccb719c..c90d58844 100644 --- a/src/modules/llms/models-modal/LLMParametersEditor.tsx +++ b/src/modules/llms/models-modal/LLMParametersEditor.tsx @@ -51,6 +51,7 @@ const _oaiEffortOptions = [ ] as const; const _miscEffortOptions = [ + { value: 'max', label: 'Max', description: 'Hardest thinking' } as const, { value: 'high', label: 'On', description: 'Multi-step reasoning' } as const, { value: 'none', label: 'Off', description: 'Disable thinking mode' } as const, { value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const, diff --git a/src/modules/llms/server/openai/models/openrouter.models.ts b/src/modules/llms/server/openai/models/openrouter.models.ts index 57c054f34..3a516e39c 100644 --- a/src/modules/llms/server/openai/models/openrouter.models.ts +++ b/src/modules/llms/server/openai/models/openrouter.models.ts @@ -246,7 +246,10 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr // 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) { // console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id); - parameterSpecs.push({ paramId: 'llmVndMiscEffort' }); // binary thinking for these vendors + // Binary thinking only: OpenRouter's unified reasoning API currently rejects 'max' (see openai.chatCompletions.ts). + // We pin enumValues here so the shared llmVndMiscEffort registry (which also includes 'max' for native DeepSeek V4) + // does not surface 'max' in the UI for OR-routed models that can't honor it. + parameterSpecs.push({ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] }); } break;