From 4d097d7136c9af3dc6ea7fee84012518ea37566e Mon Sep 17 00:00:00 2001
From: Enrico Ros <enrico.ros@gmail.com>
Date: Thu, 23 Apr 2026 23:30:34 -0700
Subject: [PATCH] LLMs: DeepSeek: add V4 support infra

---
 src/common/stores/llms/llms.parameters.ts         |  3 ++-
 .../adapters/openai.chatCompletions.ts            | 15 ++++++++++++---
 .../server/dispatch/wiretypes/openai.wiretypes.ts |  3 +++
 .../llms/models-modal/LLMParametersEditor.tsx     |  1 +
 .../server/openai/models/openrouter.models.ts     |  5 ++++-
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/common/stores/llms/llms.parameters.ts b/src/common/stores/llms/llms.parameters.ts
index 017ceef2f..06e804fa9 100644
--- a/src/common/stores/llms/llms.parameters.ts
+++ b/src/common/stores/llms/llms.parameters.ts
@@ -175,7 +175,8 @@ export const DModelParameterRegistry = {
     label: 'Thinking',
     type: 'enum',
     description: 'Enable or disable extended thinking mode.',
-    values: ['none', 'high'],
+    values: ['none', 'high', 'max'],
+    // 'max' is for now DeepSeek V4-specific (reasoning_effort=max); other vendors restrict via enumValues
     // undefined means vendor default (usually 'high', i.e. thinking enabled)
   }),
 
diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts
index 7ae5da992..2bfd78996 100644
--- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts
+++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts
@@ -152,11 +152,20 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
 
   // [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now)
   // [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode
+  // [DeepSeek, 2026-04-23] V4 thinking control
   if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) {
-    if (reasoningEffort !== 'none' && reasoningEffort !== 'high') // domain validation
-      throw new Error(`${openAIDialect} only supports reasoning effort 'none' or 'high', got '${reasoningEffort}'`);
+    const allowedEffort = openAIDialect === 'deepseek' ? ['none', 'high', 'max'] : ['none', 'high'];
+    if (!allowedEffort.includes(reasoningEffort)) // domain validation
+      throw new Error(`${openAIDialect} only supports reasoning effort ${allowedEffort.join(', ')}, got '${reasoningEffort}'`);
 
-    payload.thinking = { type: reasoningEffort === 'none' ? 'disabled' : 'enabled' };
+    if (reasoningEffort === 'none')
+      payload.thinking = { type: 'disabled' };
+    else
+      payload.thinking = {
+        type: 'enabled',
+        // DeepSeek: forward the user-selected effort to tune thinking depth ('high' | 'max')
+        ...(openAIDialect === 'deepseek' && { reasoning_effort: reasoningEffort as 'high' | 'max' }),
+      };
   }
 
 
diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts
index e14145794..435b50142 100644
--- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts
+++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts
@@ -447,8 +447,11 @@ export namespace OpenAIWire_API_Chat_Completions {
     search_after_date_filter: z.string().optional(), // Date filter in MM/DD/YYYY format
 
     // [Moonshot, 2026-01-26] Kimi K2.5 thinking mode control
+    // [Z.ai, 2025-xx] GLM thinking mode: type 'enabled' | 'disabled'
+    // [DeepSeek, 2026-04-23] V4 thinking mode: adds optional `reasoning_effort` ('high' | 'max')
     thinking: z.object({
       type: z.enum(['enabled', 'disabled']),
+      reasoning_effort: z.enum(['high', 'max']).optional(), // [2026-04-23, Deepseek] introduced this here - a not to Anthropic values, and semantics different from OpenRouter
     }).optional(),
 
     seed: z.number().int().optional(),
diff --git a/src/modules/llms/models-modal/LLMParametersEditor.tsx b/src/modules/llms/models-modal/LLMParametersEditor.tsx
index f3ccb719c..c90d58844 100644
--- a/src/modules/llms/models-modal/LLMParametersEditor.tsx
+++ b/src/modules/llms/models-modal/LLMParametersEditor.tsx
@@ -51,6 +51,7 @@ const _oaiEffortOptions = [
 ] as const;
 
 const _miscEffortOptions = [
+  { value: 'max', label: 'Max', description: 'Hardest thinking' } as const,
   { value: 'high', label: 'On', description: 'Multi-step reasoning' } as const,
   { value: 'none', label: 'Off', description: 'Disable thinking mode' } as const,
   { value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const,
diff --git a/src/modules/llms/server/openai/models/openrouter.models.ts b/src/modules/llms/server/openai/models/openrouter.models.ts
index 57c054f34..3a516e39c 100644
--- a/src/modules/llms/server/openai/models/openrouter.models.ts
+++ b/src/modules/llms/server/openai/models/openrouter.models.ts
@@ -246,7 +246,10 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
       // 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited
       if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) {
         // console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id);
-        parameterSpecs.push({ paramId: 'llmVndMiscEffort' }); // binary thinking for these vendors
+        // Binary thinking only: OpenRouter's unified reasoning API currently rejects 'max' (see openai.chatCompletions.ts).
+        // We pin enumValues here so the shared llmVndMiscEffort registry (which also includes 'max' for native DeepSeek V4)
+        // does not surface 'max' in the UI for OR-routed models that can't honor it.
+        parameterSpecs.push({ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] });
       }
       break;