OpenRouter: support for Anthropic thinking variants via the OpenAI protocol. #811

2026-05-11 14:10:15 -07:00 · 2025-06-06 11:09:14 -07:00
parent b7bdae00f8
commit 4f63e98e7f
8 changed files with 82 additions and 170 deletions
@@ -474,17 +474,6 @@ export function prettyShortChatModelName(model: string | undefined): string {
 }

 function _prettyAnthropicModelName(modelId: string): string | null {
-  // Check for OpenRouter Anthropic models (format: "anthropic/claude-...")
-  if (modelId.includes('anthropic/claude-')) {
-    const subStr = modelId.replace('anthropic/', '');
-    
-    // Handle Claude 4 models from OpenRouter
-    if (subStr.includes('claude-opus-4')) return 'Claude 4 Opus';
-    if (subStr.includes('claude-sonnet-4')) return 'Claude 4 Sonnet';
-    if (subStr.includes('claude-haiku-4')) return 'Claude 4 Haiku';
-  }
-
-  // Handle direct Anthropic models
  if (modelId.indexOf('claude-') === -1) return null; // not a Claude model

  // must match any known prefix
@@ -500,10 +489,12 @@ function _prettyAnthropicModelName(modelId: string): string | null {

  const subStr = modelId.slice(claudeIndex);
  const version =
-    subStr.includes('-4-') ? '4'
-      : subStr.includes('-3-7-') ? '3.7'
-        : subStr.includes('-3-5-') ? '3.5'
-          : '3';
+    subStr.includes('-5') ? '5'
+      : subStr.includes('-4') ? '4'
+        : subStr.includes('-3-7') ? '3.7'
+          : subStr.includes('-3-5') ? '3.5'
+            : subStr.includes('-3') ? '3'
+              : '?';

  if (subStr.includes(`-opus`)) return `Claude ${version} Opus`;
  if (subStr.includes(`-sonnet`)) return `Claude ${version} Sonnet`;
@@ -92,9 +92,7 @@ export function aixCreateModelFromLLMOptions(
    ...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
    ...(llmTopP !== undefined ? { topP: llmTopP } : {}),
    ...(llmForceNoStream ? { forceNoStream: llmForceNoStream } : {}),
-    ...(llmVndAntThinkingBudget !== undefined ? { 
-      vndAntThinkingBudget: llmVndAntThinkingBudget
-    } : {}),
+    ...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}),
    ...(llmVndGeminiShowThoughts ? { vndGeminiShowThoughts: llmVndGeminiShowThoughts } : {}),
    ...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}),
    ...(llmVndOaiReasoningEffort ? { vndOaiReasoningEffort: llmVndOaiReasoningEffort } : {}),
@@ -120,27 +120,21 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
      };
  }

-  // Handle Anthropic Claude thinking capability via OpenRouter
-  const hasThinkingSuffix = model.id.includes(':thinking');
-  if (hasThinkingSuffix && openAIDialect === 'openrouter') {
-    payload.model = model.id.replace(':thinking', '');
-  }
-  
-  // Get thinking budget from the model's vndAntThinkingBudget property
-  let thinkingBudget: number | undefined;
-  if (typeof model.vndAntThinkingBudget === 'number') {
-    thinkingBudget = model.vndAntThinkingBudget;
-  }
-  
-  // Add reasoning parameter for Claude 4 thinking capability via OpenRouter
-  if (openAIDialect === 'openrouter' && (thinkingBudget !== undefined || hasThinkingSuffix)) {
-    // Use explicitly configured budget if provided, otherwise fall back to default
-    const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 1024;
-    
-    // OpenRouter expects reasoning.max_tokens for Anthropic's thinking feature
-    payload.reasoning = {
-      max_tokens: finalThinkingBudget,
-    };
+
+  // [Anthropic] via OpenAI API (OpenRouter) - https://openrouter.ai/docs/use-cases/reasoning-tokens
+  if (openAIDialect === 'openrouter' && model.vndAntThinkingBudget !== undefined) {
+
+    // vndAntThinkingBudget's presence indicates a user preference:
+    // - [x] a number, which is the budget in tokens
+    // - [ ] null: shall disable thinking, but openrouter does not support this?
+    if (model.vndAntThinkingBudget === null) {
+      // simply not setting the reasoning field downgrades this to a non-thinking model
+      // console.warn('OpenRouter does not support disabling thinking of Anthropic models. Using default.');
+    } else {
+      payload.reasoning = {
+        max_tokens: model.vndAntThinkingBudget || 1024,
+      };
+    }
  }

  if (hotFixOpenAIOFamily)
@@ -98,9 +98,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
          body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming),
        },
        demuxerFormat: streaming ? 'fast-sse' : null,
-        chatGenerateParse: streaming 
-          ? createOpenAIChatCompletionsChunkParser() 
-          : createOpenAIChatCompletionsParserNS(),
+        chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(),
      };
  }
 }
@@ -65,8 +65,8 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
    // ```Can you extend the Zod chunk response object parsing (all optional) to include the missing data? The following is an exampel of the object I received:```
    const chunkData = JSON.parse(eventData); // this is here just for ease of breakpoint, otherwise it could be inlined

-    // [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid)
-    if (_isOpenRouterResponse(chunkData) && _forwardOpenRouterDataError(chunkData, pt))
+    // [OpenRouter/others] transmits upstream errors pre-parsing (object wouldn't be valid)
+    if (_forwardOpenRouterDataError(chunkData, pt))
      return;

    const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(chunkData);
@@ -259,7 +259,15 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction

    // Throws on malformed event data
    const completeData = JSON.parse(eventData);
-    
+
+    // [OpenRouter/others] transmits upstream errors pre-parsing (object wouldn't be valid)
+    if (_forwardOpenRouterDataError(completeData, pt))
+      return;
+
+    // [OpenAI] we don't know yet if warning messages are sent in non-streaming - for now we log
+    if (completeData.warning)
+      console.log('AIX: OpenAI-dispatch-NS warning:', completeData.warning);
+
    // Parse the complete response
    const json = OpenAIWire_API_Chat_Completions.Response_schema.parse(completeData);

@@ -288,11 +296,6 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
      if (!message)
        throw new Error(`server response missing content (finish_reason: ${finish_reason})`);

-      // Handle reasoning field from OpenRouter
-      if (typeof message.reasoning === 'string') {
-        pt.appendReasoningText(message.reasoning);
-      }
-
      // message: Text
      if (typeof message.content === 'string') {
        if (message.content) {
@@ -302,6 +305,10 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
      } else if (message.content !== undefined && message.content !== null)
        throw new Error(`unexpected message content type: ${typeof message.content}`);

+      // [OpenRouter, 2025-06-05] Handle reasoning field from OpenRouter
+      if (typeof message.reasoning === 'string')
+        pt.appendReasoningText(message.reasoning);
+
      // message: Tool Calls
      for (const toolCall of (message.tool_calls || [])) {

@@ -447,21 +454,6 @@ function _fromOpenAIUsage(usage: OpenAIWire_API_Chat_Completions.Response['usage
  return metricsUpdate;
 }

-/**
- * Check if the response is from OpenRouter based on its structure or provider information
- */
-function _isOpenRouterResponse(parsedData: any): boolean {
-  if (!parsedData) return false;
-  
-  // Check for OpenRouter-specific properties
-  if (parsedData.provider) return true;
-  
-  // Check for error metadata which is OpenRouter-specific
-  if (parsedData.error?.metadata?.provider_name) return true;
-  
-  return false;
-}
-
 /**
 * If there's an error in the pre-decoded message, push it down to the particle transmitter.
 */
@@ -151,10 +151,6 @@ export namespace OpenAIWire_Messages {
     * [OpenAI, 2024-10-01] The refusal message generated by the model.
     */
    refusal: z.string().nullable().optional(),
-    /**
-     * [OpenRouter, 2025-01-24] The reasoning/thinking content generated by the model.
-     */
-    reasoning: z.string().nullable().optional(),
    /**
     * [OpenAI, 2024-10-17] Data about a previous audio response from the model. Usage depends on the context:
     * - request (this schema): has an id, if present
@@ -165,6 +161,11 @@ export namespace OpenAIWire_Messages {
      id: z.string(),
    }).nullable().optional(),

+    /**
+     * [OpenRouter, 2025-06-05] The reasoning text generated by the model (e.g. with Anthropic thinking requests).
+     */
+    reasoning: z.string().nullable().optional(),
+
    // function_call: // ignored, as it's deprecated
    // name: _optionalParticipantName, // omitted by choice: generally unsupported
  });
@@ -291,6 +292,9 @@ export namespace OpenAIWire_API_Chat_Completions {
    }).optional(),
    reasoning_effort: z.enum(['low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] reasoning effort, o1 models only for now
    include_reasoning: z.boolean().optional(), // [OpenRouter, 2025-01-24] enables reasoning tokens
+    reasoning: z.object({ // [OpenRouter, 2025-06-05] Reasoning parameter for Claude models
+      max_tokens: z.number().int().positive(),
+    }).optional(),
    prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content.
      type: z.literal('content'),
      content: z.union([z.string(), z.array(OpenAIWire_ContentParts.ContentPart_schema)]),
@@ -348,11 +352,6 @@ export namespace OpenAIWire_API_Chat_Completions {
      }).nullable().optional(),
    }).optional(),

-    // [OpenRouter] Reasoning parameter for Claude models
-    reasoning: z.object({
-      max_tokens: z.number().int().positive(),
-    }).optional(),
-
    seed: z.number().int().optional(),
    stop: z.array(z.string()).optional(), // Up to 4 sequences where the API will stop generating further tokens.
    user: z.string().optional(),
@@ -549,7 +548,8 @@ export namespace OpenAIWire_API_Chat_Completions {
    content: z.string().nullable().optional(),
    // delta-reasoning content
    reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20]
-    reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24] could be null for Anthropic models
+    reasoning: z.string().optional() // [OpenRouter, 2025-01-24]
+      .nullable(), // [OpenRouter, 2025-06-05] null on Anthropic text responses past the reasoning blocks
    // delta-tool-calls content
    tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
      .nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160
@@ -1,6 +1,6 @@
 import type { ModelDescriptionSchema } from '~/modules/llms/server/llm.server.types';
 import { wireOpenrouterModelsListOutputSchema } from '~/modules/llms/server/openai/openrouter.wiretypes';
-import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
+import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
 import { fromManualMapping } from '~/modules/llms/server/openai/models/models.data';


@@ -92,6 +92,35 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
  });
 }

+export function openRouterInjectVariants(models: ModelDescriptionSchema[], model: ModelDescriptionSchema): ModelDescriptionSchema[] {
+  // keep the same list of models
+  models.push(model);
+
+  // inject thinking variants for Anthropic thinking models
+  const antThinkingModels = ['anthropic/claude-opus-4', 'anthropic/claude-sonnet-4', 'anthropic/claude-3-7-sonnet'];
+  if (antThinkingModels.includes(model.id)) {
+
+    // create a thinking variant for the model, by setting 'idVariant' and modifying the label/description
+    const thinkingVariant: ModelDescriptionSchema = {
+      ...model,
+      idVariant: 'thinking',
+      label: `${model.label} (thinking)`,
+      description: `(extended thinking mode) ${model.description}`,
+      interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning],
+      // this is what makes it a thinking variant
+      parameterSpecs: [
+        ...(model.parameterSpecs || []),
+        { paramId: 'llmVndAntThinkingBudget', initialValue: 1024 },
+      ],
+    };
+
+    models.push(thinkingVariant);
+  }
+
+  // no more variants to inject for now
+  return models;
+}
+
 /*
 export function openRouterStatTokenizers(openRouterModels: any[]): void {
  // parse all
@@ -25,7 +25,7 @@ import { lmStudioModelToModelDescription, localAIModelSortFn, localAIModelToMode
 import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models';
 import { openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
 import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './models/openpipe.models';
-import { openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
+import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
 import { perplexityAIModelDescriptions, perplexityAIModelSort } from './models/perplexity.models';
 import { togetherAIModelsToModelDescriptions } from './models/together.models';
 import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
@@ -183,18 +183,6 @@ export const llmOpenAIRouter = createTRPCRouter({
      // [OpenAI-dialects]: fetch openAI-style for all but Azure (will be then used in each dialect)
      const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire_API_Models_List.Response>(access, '/v1/models');

-      // Log raw models from OpenRouter API
-      if (access.dialect === 'openrouter') {
-        console.log('[DEBUG] openai.router.ts - Raw OpenRouter Models List (sample):', JSON.stringify(openAIWireModelsResponse?.data?.slice(0, 5), null, 2));
-        
-        // Log all Anthropic Claude models to check the exact IDs
-        const claudeModels = openAIWireModelsResponse?.data?.filter((model: any) => model.id.startsWith('anthropic/claude'));
-        console.log('[DEBUG] openai.router.ts - All Claude Models:', JSON.stringify(claudeModels, null, 2));
-        
-        // Count how many models we have total from OpenRouter
-        console.log('[DEBUG] openai.router.ts - Total models from OpenRouter:', openAIWireModelsResponse?.data?.length);
-      }
-
      // [Together] missing the .data property
      if (access.dialect === 'togetherai')
        return { models: togetherAIModelsToModelDescriptions(openAIWireModelsResponse) };
@@ -286,90 +274,12 @@ export const llmOpenAIRouter = createTRPCRouter({
          break;

        case 'openrouter':
-          console.log('[DEBUG] openai.router.ts - Processing OpenRouter models');
-          
-          // Look for any Claude 4 models to determine actual IDs
-          const allClaudeModelIds = openAIModels
-            .filter((m: any) => m.id.startsWith('anthropic/claude'))
-            .map((m: any) => m.id);
-          
-          console.log('[DEBUG] openai.router.ts - All Claude model IDs:', allClaudeModelIds);
-          
-          // Look specifically for Claude Opus 4 and Sonnet 4 models (using exact IDs)
-          const claudeOpus4Models = openAIModels.filter((m: any) => 
-            m.id === 'anthropic/claude-opus-4');
-          const claudeSonnet4Models = openAIModels.filter((m: any) => 
-            m.id === 'anthropic/claude-sonnet-4');
-          
-          console.log('[DEBUG] openai.router.ts - Claude Opus 4 models:', 
-            claudeOpus4Models.map((m: any) => m.id));
-          console.log('[DEBUG] openai.router.ts - Claude Sonnet 4 models:', 
-            claudeSonnet4Models.map((m: any) => m.id));
-          
-          // Define the Claude model IDs that should have thinking variants
-          // Use actual IDs found in the OpenRouter response if possible
-          const CLAUDE_MODELS_FOR_THINKING_VARIANT = [
-            // Try to find the actual model IDs dynamically if they exist
-            claudeOpus4Models.length > 0 ? claudeOpus4Models[0].id : 'anthropic/claude-opus-4',
-            claudeSonnet4Models.length > 0 ? claudeSonnet4Models[0].id : 'anthropic/claude-sonnet-4',
-          ];
-          
-          console.log('[DEBUG] openai.router.ts - Using these IDs for thinking variants:', 
-            CLAUDE_MODELS_FOR_THINKING_VARIANT);
-          
          // openRouterStatTokenizers(openAIModels);
          models = openAIModels
            .sort(openRouterModelFamilySortFn)
-            .flatMap(rawOpenRouterModel => {
-              const standardDescription = openRouterModelToModelDescription(rawOpenRouterModel);
-              if (!standardDescription) {
-                return []; // Skip if standard model description fails
-              }
-
-              const modelIdFromOpenRouter = rawOpenRouterModel.id;
-              
-              // Log when we find one of our target models
-              if (CLAUDE_MODELS_FOR_THINKING_VARIANT.includes(modelIdFromOpenRouter)) {
-                console.log('[DEBUG] openai.router.ts - Found target Claude model:', modelIdFromOpenRouter);
-              }
-
-              // Check if this model is one of our targets and doesn't already have a thinking indicator
-              if (CLAUDE_MODELS_FOR_THINKING_VARIANT.includes(modelIdFromOpenRouter) && 
-                  !modelIdFromOpenRouter.includes(':thinking')) {
-                console.log('[DEBUG] openai.router.ts - Creating thinking variant for:', modelIdFromOpenRouter);
-                
-                // Create the "thinking" variant based on the standard one
-                const thinkingDescription: ModelDescriptionSchema = {
-                  ...standardDescription,
-                  id: `${standardDescription.id}:thinking`, // Append suffix for uniqueness
-                  label: `${standardDescription.label} (thinking)`,
-                  // Add parameter spec for the thinking budget
-                  parameterSpecs: [
-                    ...(standardDescription.parameterSpecs || []),
-                    {
-                      paramId: 'llmVndAntThinkingBudget',
-                      initialValue: 1024,
-                    },
-                  ],
-                };
-                
-                console.log('[DEBUG] openai.router.ts - Created standard model:', standardDescription.id, standardDescription.label);
-                console.log('[DEBUG] openai.router.ts - Created thinking model:', thinkingDescription.id, thinkingDescription.label);
-                
-                return [standardDescription, thinkingDescription];
-              } else {
-                return [standardDescription]; // Only the standard version
-              }
-            })
-            .filter(desc => !!desc);
-          
-          // Count how many models we have after processing
-          console.log('[DEBUG] openai.router.ts - Total final models after processing:', models.length);
-          
-          // Log the mapped OpenRouter model descriptions (focusing on Anthropic models)
-          const claudeModelsAfterProcessing = models.filter(m => m.id && m.id.includes('anthropic/claude'));
-          console.log('[DEBUG] openai.router.ts - All Claude models after processing:', 
-            JSON.stringify(claudeModelsAfterProcessing.map(m => ({ id: m.id, label: m.label })), null, 2));
+            .map(openRouterModelToModelDescription)
+            .filter(desc => !!desc)
+            .reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
          break;

      }