AIX: OpenRouter: write/parse the new reasoning request / reasoning_details response

Removes older system(s) from OpenRouter.
2026-05-11 14:10:15 -07:00 · 2025-11-11 17:46:58 -08:00
parent babb1dd962
commit d6843d7fcf
3 changed files with 82 additions and 33 deletions
@@ -43,7 +43,6 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
    // [OpenRouter] 2025-10-02: do not throw, rather let it fail if upstream has issues
    // openAIDialect === 'openrouter' || /* OpenRouter FC support is not good (as of 2024-07-15) */
    openAIDialect === 'perplexity';
-  const hotFixVndORIncludeReasoning = openAIDialect === 'openrouter'; // [OpenRouter, 2025-01-24] has a special `include_reasoning` field to show the chain of thought

  // Model incompatibilities -> Hotfixes

@@ -90,10 +89,6 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
    user: undefined,
  };

-  // [OpenRouter, 2025-01-24]
-  if (hotFixVndORIncludeReasoning)
-    payload.include_reasoning = true;
-
  // Top-P instead of temperature
  if (model.topP !== undefined) {
    delete payload.temperature;
@@ -224,20 +219,35 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
    }
  }

-  // [OpenRouter] -> [Anthropic] via OpenAI API - https://openrouter.ai/docs/use-cases/reasoning-tokens
-  if (openAIDialect === 'openrouter' && model.vndAntThinkingBudget !== undefined) {
+  // [OpenRouter, 2025-11-11] Unified reasoning parameter - supports both token-based and effort-based control
+  if (openAIDialect === 'openrouter') {

-    // vndAntThinkingBudget's presence indicates a user preference:
-    // - [x] a number, which is the budget in tokens
-    // - [ ] null: shall disable thinking, but openrouter does not support this?
-    if (model.vndAntThinkingBudget === null) {
-      // simply not setting the reasoning field downgrades this to a non-thinking model
-      // console.warn('OpenRouter does not support disabling thinking of Anthropic models. Using default.');
-    } else {
-      payload.reasoning = {
-        max_tokens: model.vndAntThinkingBudget || 1024,
-      };
+    // Anthropic via OpenRouter
+    if (model.vndAntThinkingBudget !== undefined) {
+      // vndAntThinkingBudget's presence indicates a user preference:
+      // - a number: explicit token budget (1024-32000)
+      // - null: disable thinking (don't set reasoning field)
+      if (model.vndAntThinkingBudget === null) {
+        // If null, don't set reasoning field at all (disables thinking)
+      } else
+        payload.reasoning = { max_tokens: model.vndAntThinkingBudget || 8192 };
    }
+    // Gemini via OpenRouter
+    else if (model.vndGeminiThinkingBudget !== undefined)
+      payload.reasoning = { max_tokens: model.vndGeminiThinkingBudget || 8192 };
+    // OpenAI via OpenRouter
+    else if (model.vndOaiReasoningEffort && model.vndOaiReasoningEffort !== 'minimal')
+      payload.reasoning = { effort: model.vndOaiReasoningEffort };
+
+    // FIX double-reasoning request - remove reasoning_effort after transferring it to reasoning (unless already set)
+    if (payload.reasoning_effort && payload.reasoning_effort !== 'minimal') {
+      // we don't know which one takes precedence, so we prioritize .reasoning (OpenRouter) even if .reasoning_effort (OpenAI) is present
+      if (!payload.reasoning)
+        payload.reasoning = { effort: payload.reasoning_effort };
+      // Fix for `Only one of "reasoning" and "reasoning_effort" may be provided`
+      delete payload.reasoning_effort;
+    }
+
  }

  if (hotFixOpenAIOFamily)
@@ -196,11 +196,27 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
        deltaHasReasoning = true;

      }
-      // delta: Reasoning [OpenRouter, 2025-01-24]
-      else if (typeof delta.reasoning === 'string') {
+      // delta: Reasoning Details (Structured) [OpenRouter, 2025-11-11]
+      else if (Array.isArray(delta.reasoning_details)) {

-        pt.appendReasoningText(delta.reasoning);
-        deltaHasReasoning = true;
+        for (const reasoningDetail of delta.reasoning_details) {
+          // Extract text from reasoning blocks based on type
+          if (reasoningDetail.type === 'reasoning.text' && typeof reasoningDetail.text === 'string') {
+            pt.appendReasoningText(reasoningDetail.text);
+            deltaHasReasoning = true;
+          }
+          // Summaries can also be shown as reasoning
+          else if (reasoningDetail.type === 'reasoning.summary' && typeof reasoningDetail.summary === 'string') {
+            pt.appendReasoningText(`[Summary] ${reasoningDetail.summary}`);
+            deltaHasReasoning = true;
+          }
+          // 'encrypted' type - reasoning happened but not returned, skip
+          else if (reasoningDetail.type === 'reasoning.encrypted') {
+            // NOTE: Anthropic supports this, and we do too, but.. not now
+            // reasoning happened but not returned, skip
+          } else
+            console.log('AIX: OpenAI-dispatch: unexpected reasoning detail type:', reasoningDetail);
+        }

      }

@@ -425,9 +441,19 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
      } else if (message.content !== undefined && message.content !== null)
        throw new Error(`unexpected message content type: ${typeof message.content}`);

-      // [OpenRouter, 2025-06-05] Handle reasoning field from OpenRouter
-      if (typeof message.reasoning === 'string')
-        pt.appendReasoningText(message.reasoning);
+      // [OpenRouter, 2025-11-11] Handle structured reasoning_details
+      if (Array.isArray(message.reasoning_details)) {
+        for (const reasoningDetail of message.reasoning_details) {
+          if (reasoningDetail.type === 'reasoning.text' && typeof reasoningDetail.text === 'string') {
+            pt.appendReasoningText(reasoningDetail.text);
+          } else if (reasoningDetail.type === 'reasoning.summary' && typeof reasoningDetail.summary === 'string') {
+            pt.appendReasoningText(`[Summary] ${reasoningDetail.summary}`);
+          } else if (reasoningDetail.type === 'reasoning.encrypted') {
+            // reasoning happened but not returned, skip
+          } else
+            console.log('AIX: OpenAI-dispatch-NS: unexpected reasoning detail type:', reasoningDetail);
+        }
+      }

      // message: Tool Calls
      for (const toolCall of (message.tool_calls || [])) {
@@ -108,6 +108,18 @@ export namespace OpenAIWire_ContentParts {
    }),
  });

+  // [OpenRouter, 2025-11-11] Reasoning details - structured reasoning output
+  // https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-detail-types
+  export const OpenRouter_ReasoningDetail_schema = z.object({
+    type: z.union([
+      z.enum(['reasoning.summary', 'reasoning.text', 'reasoning.encrypted']),
+      z.string(),
+    ]),
+    text: z.string().optional(), // Actual reasoning text (for 'text' type)
+    summary: z.string().optional(), // Summary of reasoning (for 'summary' type)
+    // 'encrypted' type has no additional fields - indicates reasoning happened but not returned
+  });
+
 }

 export namespace OpenAIWire_Messages {
@@ -164,10 +176,8 @@ export namespace OpenAIWire_Messages {
      id: z.string(),
    }).nullable().optional(),

-    /**
-     * [OpenRouter, 2025-06-05] The reasoning text generated by the model (e.g. with Anthropic thinking requests).
-     */
-    reasoning: z.string().nullable().optional(),
+    /** [OpenRouter, 2025-11-11] Reasoning traces with multiple blocks (summary, text, encrypted). */
+    reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).optional(),

    // function_call: // ignored, as it's deprecated
    // name: _optionalParticipantName, // omitted by choice: generally unsupported
@@ -303,9 +313,12 @@ export namespace OpenAIWire_API_Chat_Completions {
      include_usage: z.boolean().optional(), // If set, an additional chunk will be streamed with a 'usage' field on the entire request.
    }).optional(),
    reasoning_effort: z.enum(['minimal', 'low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] [Perplexity, 2025-06-23] reasoning effort
-    include_reasoning: z.boolean().optional(), // [OpenRouter, 2025-01-24] enables reasoning tokens
-    reasoning: z.object({ // [OpenRouter, 2025-06-05] Reasoning parameter for Claude models
-      max_tokens: z.number().int().positive(),
+    // [OpenRouter, 2025-11-11] Unified reasoning parameter for all models
+    reasoning: z.object({
+      max_tokens: z.number().int().positive().optional(), // Token-based control (Anthropic, Gemini): 1024-32000
+      effort: z.enum(['low', 'medium', 'high']).optional(), // Effort-based control (OpenAI o1/o3, DeepSeek): allocates % of max_tokens
+      enabled: z.boolean().optional(), // Simple enable with medium effort defaults
+      exclude: z.boolean().optional(), // Use reasoning internally without returning it in response
    }).optional(),
    prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content.
      type: z.literal('content'),
@@ -625,8 +638,8 @@ export namespace OpenAIWire_API_Chat_Completions {
      }))),
    // delta-reasoning content
    reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20]
-    reasoning: z.string().optional() // [OpenRouter, 2025-01-24]
-      .nullable(), // [OpenRouter, 2025-06-05] null on Anthropic text responses past the reasoning blocks
+    // [OpenRouter, 2025-11-11] Reasoning traces
+    reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).nullish(),
    // delta-tool-calls content
    tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
      .nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160