LLMs: DeepSeek: add V4 support infra

2026-05-10 21:50:14 -07:00 · 2026-04-23 23:30:34 -07:00
parent 178619d275
commit 4d097d7136
5 changed files with 22 additions and 5 deletions
@@ -175,7 +175,8 @@ export const DModelParameterRegistry = {
    label: 'Thinking',
    type: 'enum',
    description: 'Enable or disable extended thinking mode.',
-    values: ['none', 'high'],
+    values: ['none', 'high', 'max'],
+    // 'max' is for now DeepSeek V4-specific (reasoning_effort=max); other vendors restrict via enumValues
    // undefined means vendor default (usually 'high', i.e. thinking enabled)
  }),

@@ -152,11 +152,20 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:

  // [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now)
  // [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode
+  // [DeepSeek, 2026-04-23] V4 thinking control
  if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) {
-    if (reasoningEffort !== 'none' && reasoningEffort !== 'high') // domain validation
-      throw new Error(`${openAIDialect} only supports reasoning effort 'none' or 'high', got '${reasoningEffort}'`);
+    const allowedEffort = openAIDialect === 'deepseek' ? ['none', 'high', 'max'] : ['none', 'high'];
+    if (!allowedEffort.includes(reasoningEffort)) // domain validation
+      throw new Error(`${openAIDialect} only supports reasoning effort ${allowedEffort.join(', ')}, got '${reasoningEffort}'`);

-    payload.thinking = { type: reasoningEffort === 'none' ? 'disabled' : 'enabled' };
+    if (reasoningEffort === 'none')
+      payload.thinking = { type: 'disabled' };
+    else
+      payload.thinking = {
+        type: 'enabled',
+        // DeepSeek: forward the user-selected effort to tune thinking depth ('high' | 'max')
+        ...(openAIDialect === 'deepseek' && { reasoning_effort: reasoningEffort as 'high' | 'max' }),
+      };
  }


@@ -447,8 +447,11 @@ export namespace OpenAIWire_API_Chat_Completions {
    search_after_date_filter: z.string().optional(), // Date filter in MM/DD/YYYY format

    // [Moonshot, 2026-01-26] Kimi K2.5 thinking mode control
+    // [Z.ai, 2025-xx] GLM thinking mode: type 'enabled' | 'disabled'
+    // [DeepSeek, 2026-04-23] V4 thinking mode: adds optional `reasoning_effort` ('high' | 'max')
    thinking: z.object({
      type: z.enum(['enabled', 'disabled']),
+      reasoning_effort: z.enum(['high', 'max']).optional(), // [2026-04-23, Deepseek] introduced this here - a not to Anthropic values, and semantics different from OpenRouter
    }).optional(),

    seed: z.number().int().optional(),
@@ -51,6 +51,7 @@ const _oaiEffortOptions = [
 ] as const;

 const _miscEffortOptions = [
+  { value: 'max', label: 'Max', description: 'Hardest thinking' } as const,
  { value: 'high', label: 'On', description: 'Multi-step reasoning' } as const,
  { value: 'none', label: 'Off', description: 'Disable thinking mode' } as const,
  { value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const,
@@ -246,7 +246,10 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
      // 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited
      if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) {
        // console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id);
-        parameterSpecs.push({ paramId: 'llmVndMiscEffort' }); // binary thinking for these vendors
+        // Binary thinking only: OpenRouter's unified reasoning API currently rejects 'max' (see openai.chatCompletions.ts).
+        // We pin enumValues here so the shared llmVndMiscEffort registry (which also includes 'max' for native DeepSeek V4)
+        // does not surface 'max' in the UI for OR-routed models that can't honor it.
+        parameterSpecs.push({ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] });
      }
      break;