Use reasoning instead of thinking for openrouter

2026-05-10 21:50:14 -07:00 · 2025-06-05 17:29:43 +01:00
parent b28a282aba
commit b699a665a1
4 changed files with 43 additions and 22 deletions
@@ -132,18 +132,15 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
    thinkingBudget = model.vndAntThinkingBudget;
  }
  
-  // Add thinking parameter if the model supports it
+  // Add reasoning parameter for Claude 4 thinking capability via OpenRouter
  if (openAIDialect === 'openrouter' && (thinkingBudget !== undefined || hasThinkingSuffix)) {
    // Use explicitly configured budget if provided, otherwise fall back to default
-    const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 256;
+    const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 1024;
    
-    payload.thinking = {
-      type: "enabled",
-      budget_tokens: finalThinkingBudget,
+    // OpenRouter expects reasoning.max_tokens for Anthropic's thinking feature
+    payload.reasoning = {
+      max_tokens: finalThinkingBudget,
    };
-    
-    // Debug log to show the final request to OpenRouter
-    console.log(`[DEBUG] OpenRouter request for model: ${payload.model}, thinking budget: ${finalThinkingBudget}`);
  }

  if (hotFixOpenAIOFamily)
@@ -152,6 +149,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
  if (hotFixRemoveStreamOptions)
    payload = _fixRemoveStreamOptions(payload);

+
  // Preemptive error detection with server-side payload validation before sending it upstream
  const validated = OpenAIWire_API_Chat_Completions.Request_schema.safeParse(payload);
  if (!validated.success) {
@@ -98,7 +98,9 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
          body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming),
        },
        demuxerFormat: streaming ? 'fast-sse' : null,
-        chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(),
+        chatGenerateParse: streaming 
+          ? createOpenAIChatCompletionsChunkParser() 
+          : createOpenAIChatCompletionsParserNS(),
      };
  }
 }
@@ -64,9 +64,9 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
    // Throws on malformed event data
    // ```Can you extend the Zod chunk response object parsing (all optional) to include the missing data? The following is an exampel of the object I received:```
    const chunkData = JSON.parse(eventData); // this is here just for ease of breakpoint, otherwise it could be inlined
-
+    
    // [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid)
-    if (_forwardOpenRouterDataError(chunkData, pt))
+    if (_isOpenRouterResponse(chunkData) && _forwardOpenRouterDataError(chunkData, pt))
      return;

    const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(chunkData);
@@ -259,15 +259,7 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction

    // Throws on malformed event data
    const completeData = JSON.parse(eventData);
-
-    // [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid)
-    if (_forwardOpenRouterDataError(completeData, pt))
-      return;
-
-    // [OpenAI] we don't know yet if warning messages are sent in non-streaming - for now we log
-    if (completeData.warning)
-      console.log('AIX: OpenAI-dispatch-NS warning:', completeData.warning);
-
+    
    // Parse the complete response
    const json = OpenAIWire_API_Chat_Completions.Response_schema.parse(completeData);

@@ -296,6 +288,11 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
      if (!message)
        throw new Error(`server response missing content (finish_reason: ${finish_reason})`);

+      // Handle reasoning field from OpenRouter
+      if (typeof message.reasoning === 'string') {
+        pt.appendReasoningText(message.reasoning);
+      }
+
      // message: Text
      if (typeof message.content === 'string') {
        if (message.content) {
@@ -450,6 +447,21 @@ function _fromOpenAIUsage(usage: OpenAIWire_API_Chat_Completions.Response['usage
  return metricsUpdate;
 }

+/**
+ * Check if the response is from OpenRouter based on its structure or provider information
+ */
+function _isOpenRouterResponse(parsedData: any): boolean {
+  if (!parsedData) return false;
+  
+  // Check for OpenRouter-specific properties
+  if (parsedData.provider) return true;
+  
+  // Check for error metadata which is OpenRouter-specific
+  if (parsedData.error?.metadata?.provider_name) return true;
+  
+  return false;
+}
+
 /**
 * If there's an error in the pre-decoded message, push it down to the particle transmitter.
 */
@@ -480,4 +492,4 @@ function _forwardOpenRouterDataError(parsedData: any, pt: IParticleTransmitter)
  // Transmit the error as text - note: throw if you want to transmit as 'error'
  pt.setDialectTerminatingIssue(errorMessage, IssueSymbols.Generic);
  return true;
-}
+}
@@ -151,6 +151,10 @@ export namespace OpenAIWire_Messages {
     * [OpenAI, 2024-10-01] The refusal message generated by the model.
     */
    refusal: z.string().nullable().optional(),
+    /**
+     * [OpenRouter, 2025-01-24] The reasoning/thinking content generated by the model.
+     */
+    reasoning: z.string().nullable().optional(),
    /**
     * [OpenAI, 2024-10-17] Data about a previous audio response from the model. Usage depends on the context:
     * - request (this schema): has an id, if present
@@ -350,6 +354,11 @@ export namespace OpenAIWire_API_Chat_Completions {
      budget_tokens: z.number().int().positive(),
    }).optional(),

+    // [OpenRouter] Reasoning parameter for Claude models
+    reasoning: z.object({
+      max_tokens: z.number().int().positive(),
+    }).optional(),
+
    seed: z.number().int().optional(),
    stop: z.array(z.string()).optional(), // Up to 4 sequences where the API will stop generating further tokens.
    user: z.string().optional(),
@@ -546,7 +555,7 @@ export namespace OpenAIWire_API_Chat_Completions {
    content: z.string().nullable().optional(),
    // delta-reasoning content
    reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20]
-    reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24]
+    reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24] could be null for Anthropic models
    // delta-tool-calls content
    tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
      .nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160