From 4f63e98e7fa77b8a11e116cfcb5779c8ed8d86b0 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Fri, 6 Jun 2025 11:09:14 -0700 Subject: [PATCH] OpenRouter: support for Anthropic thinking variants via the OpenAI protocol. #811 --- src/common/util/dMessageUtils.tsx | 21 ++-- src/modules/aix/client/aix.client.ts | 4 +- .../adapters/openai.chatCompletions.ts | 36 +++---- .../chatGenerate/chatGenerate.dispatch.ts | 4 +- .../chatGenerate/parsers/openai.parser.ts | 38 +++---- .../dispatch/wiretypes/openai.wiretypes.ts | 20 ++-- .../server/openai/models/openrouter.models.ts | 31 +++++- .../llms/server/openai/openai.router.ts | 98 +------------------ 8 files changed, 82 insertions(+), 170 deletions(-) diff --git a/src/common/util/dMessageUtils.tsx b/src/common/util/dMessageUtils.tsx index 96aea19b5..2fd2bc31c 100644 --- a/src/common/util/dMessageUtils.tsx +++ b/src/common/util/dMessageUtils.tsx @@ -474,17 +474,6 @@ export function prettyShortChatModelName(model: string | undefined): string { } function _prettyAnthropicModelName(modelId: string): string | null { - // Check for OpenRouter Anthropic models (format: "anthropic/claude-...") - if (modelId.includes('anthropic/claude-')) { - const subStr = modelId.replace('anthropic/', ''); - - // Handle Claude 4 models from OpenRouter - if (subStr.includes('claude-opus-4')) return 'Claude 4 Opus'; - if (subStr.includes('claude-sonnet-4')) return 'Claude 4 Sonnet'; - if (subStr.includes('claude-haiku-4')) return 'Claude 4 Haiku'; - } - - // Handle direct Anthropic models if (modelId.indexOf('claude-') === -1) return null; // not a Claude model // must match any known prefix @@ -500,10 +489,12 @@ function _prettyAnthropicModelName(modelId: string): string | null { const subStr = modelId.slice(claudeIndex); const version = - subStr.includes('-4-') ? '4' - : subStr.includes('-3-7-') ? '3.7' - : subStr.includes('-3-5-') ? '3.5' - : '3'; + subStr.includes('-5') ? '5' + : subStr.includes('-4') ? '4' + : subStr.includes('-3-7') ? '3.7' + : subStr.includes('-3-5') ? '3.5' + : subStr.includes('-3') ? '3' + : '?'; if (subStr.includes(`-opus`)) return `Claude ${version} Opus`; if (subStr.includes(`-sonnet`)) return `Claude ${version} Sonnet`; diff --git a/src/modules/aix/client/aix.client.ts b/src/modules/aix/client/aix.client.ts index becb342c4..5e0eac15c 100644 --- a/src/modules/aix/client/aix.client.ts +++ b/src/modules/aix/client/aix.client.ts @@ -92,9 +92,7 @@ export function aixCreateModelFromLLMOptions( ...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}), ...(llmTopP !== undefined ? { topP: llmTopP } : {}), ...(llmForceNoStream ? { forceNoStream: llmForceNoStream } : {}), - ...(llmVndAntThinkingBudget !== undefined ? { - vndAntThinkingBudget: llmVndAntThinkingBudget - } : {}), + ...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}), ...(llmVndGeminiShowThoughts ? { vndGeminiShowThoughts: llmVndGeminiShowThoughts } : {}), ...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}), ...(llmVndOaiReasoningEffort ? { vndOaiReasoningEffort: llmVndOaiReasoningEffort } : {}), diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts index 473a543b2..2bc97327c 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts @@ -120,27 +120,21 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: }; } - // Handle Anthropic Claude thinking capability via OpenRouter - const hasThinkingSuffix = model.id.includes(':thinking'); - if (hasThinkingSuffix && openAIDialect === 'openrouter') { - payload.model = model.id.replace(':thinking', ''); - } - - // Get thinking budget from the model's vndAntThinkingBudget property - let thinkingBudget: number | undefined; - if (typeof model.vndAntThinkingBudget === 'number') { - thinkingBudget = model.vndAntThinkingBudget; - } - - // Add reasoning parameter for Claude 4 thinking capability via OpenRouter - if (openAIDialect === 'openrouter' && (thinkingBudget !== undefined || hasThinkingSuffix)) { - // Use explicitly configured budget if provided, otherwise fall back to default - const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 1024; - - // OpenRouter expects reasoning.max_tokens for Anthropic's thinking feature - payload.reasoning = { - max_tokens: finalThinkingBudget, - }; + + // [Anthropic] via OpenAI API (OpenRouter) - https://openrouter.ai/docs/use-cases/reasoning-tokens + if (openAIDialect === 'openrouter' && model.vndAntThinkingBudget !== undefined) { + + // vndAntThinkingBudget's presence indicates a user preference: + // - [x] a number, which is the budget in tokens + // - [ ] null: shall disable thinking, but openrouter does not support this? + if (model.vndAntThinkingBudget === null) { + // simply not setting the reasoning field downgrades this to a non-thinking model + // console.warn('OpenRouter does not support disabling thinking of Anthropic models. Using default.'); + } else { + payload.reasoning = { + max_tokens: model.vndAntThinkingBudget || 1024, + }; + } } if (hotFixOpenAIOFamily) diff --git a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts index cbc3bc08f..8f908dd5c 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts @@ -98,9 +98,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_ body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming), }, demuxerFormat: streaming ? 'fast-sse' : null, - chatGenerateParse: streaming - ? createOpenAIChatCompletionsChunkParser() - : createOpenAIChatCompletionsParserNS(), + chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(), }; } } diff --git a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts index cde4bbea4..5e7f32631 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts @@ -65,8 +65,8 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct // ```Can you extend the Zod chunk response object parsing (all optional) to include the missing data? The following is an exampel of the object I received:``` const chunkData = JSON.parse(eventData); // this is here just for ease of breakpoint, otherwise it could be inlined - // [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid) - if (_isOpenRouterResponse(chunkData) && _forwardOpenRouterDataError(chunkData, pt)) + // [OpenRouter/others] transmits upstream errors pre-parsing (object wouldn't be valid) + if (_forwardOpenRouterDataError(chunkData, pt)) return; const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(chunkData); @@ -259,7 +259,15 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction // Throws on malformed event data const completeData = JSON.parse(eventData); - + + // [OpenRouter/others] transmits upstream errors pre-parsing (object wouldn't be valid) + if (_forwardOpenRouterDataError(completeData, pt)) + return; + + // [OpenAI] we don't know yet if warning messages are sent in non-streaming - for now we log + if (completeData.warning) + console.log('AIX: OpenAI-dispatch-NS warning:', completeData.warning); + // Parse the complete response const json = OpenAIWire_API_Chat_Completions.Response_schema.parse(completeData); @@ -288,11 +296,6 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction if (!message) throw new Error(`server response missing content (finish_reason: ${finish_reason})`); - // Handle reasoning field from OpenRouter - if (typeof message.reasoning === 'string') { - pt.appendReasoningText(message.reasoning); - } - // message: Text if (typeof message.content === 'string') { if (message.content) { @@ -302,6 +305,10 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction } else if (message.content !== undefined && message.content !== null) throw new Error(`unexpected message content type: ${typeof message.content}`); + // [OpenRouter, 2025-06-05] Handle reasoning field from OpenRouter + if (typeof message.reasoning === 'string') + pt.appendReasoningText(message.reasoning); + // message: Tool Calls for (const toolCall of (message.tool_calls || [])) { @@ -447,21 +454,6 @@ function _fromOpenAIUsage(usage: OpenAIWire_API_Chat_Completions.Response['usage return metricsUpdate; } -/** - * Check if the response is from OpenRouter based on its structure or provider information - */ -function _isOpenRouterResponse(parsedData: any): boolean { - if (!parsedData) return false; - - // Check for OpenRouter-specific properties - if (parsedData.provider) return true; - - // Check for error metadata which is OpenRouter-specific - if (parsedData.error?.metadata?.provider_name) return true; - - return false; -} - /** * If there's an error in the pre-decoded message, push it down to the particle transmitter. */ diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index 7e5353ed6..ddfa25534 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -151,10 +151,6 @@ export namespace OpenAIWire_Messages { * [OpenAI, 2024-10-01] The refusal message generated by the model. */ refusal: z.string().nullable().optional(), - /** - * [OpenRouter, 2025-01-24] The reasoning/thinking content generated by the model. - */ - reasoning: z.string().nullable().optional(), /** * [OpenAI, 2024-10-17] Data about a previous audio response from the model. Usage depends on the context: * - request (this schema): has an id, if present @@ -165,6 +161,11 @@ export namespace OpenAIWire_Messages { id: z.string(), }).nullable().optional(), + /** + * [OpenRouter, 2025-06-05] The reasoning text generated by the model (e.g. with Anthropic thinking requests). + */ + reasoning: z.string().nullable().optional(), + // function_call: // ignored, as it's deprecated // name: _optionalParticipantName, // omitted by choice: generally unsupported }); @@ -291,6 +292,9 @@ export namespace OpenAIWire_API_Chat_Completions { }).optional(), reasoning_effort: z.enum(['low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] reasoning effort, o1 models only for now include_reasoning: z.boolean().optional(), // [OpenRouter, 2025-01-24] enables reasoning tokens + reasoning: z.object({ // [OpenRouter, 2025-06-05] Reasoning parameter for Claude models + max_tokens: z.number().int().positive(), + }).optional(), prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content. type: z.literal('content'), content: z.union([z.string(), z.array(OpenAIWire_ContentParts.ContentPart_schema)]), @@ -348,11 +352,6 @@ export namespace OpenAIWire_API_Chat_Completions { }).nullable().optional(), }).optional(), - // [OpenRouter] Reasoning parameter for Claude models - reasoning: z.object({ - max_tokens: z.number().int().positive(), - }).optional(), - seed: z.number().int().optional(), stop: z.array(z.string()).optional(), // Up to 4 sequences where the API will stop generating further tokens. user: z.string().optional(), @@ -549,7 +548,8 @@ export namespace OpenAIWire_API_Chat_Completions { content: z.string().nullable().optional(), // delta-reasoning content reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20] - reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24] could be null for Anthropic models + reasoning: z.string().optional() // [OpenRouter, 2025-01-24] + .nullable(), // [OpenRouter, 2025-06-05] null on Anthropic text responses past the reasoning blocks // delta-tool-calls content tool_calls: z.array(ChunkDeltaToolCalls_schema).optional() .nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160 diff --git a/src/modules/llms/server/openai/models/openrouter.models.ts b/src/modules/llms/server/openai/models/openrouter.models.ts index a285a595d..596262968 100644 --- a/src/modules/llms/server/openai/models/openrouter.models.ts +++ b/src/modules/llms/server/openai/models/openrouter.models.ts @@ -1,6 +1,6 @@ import type { ModelDescriptionSchema } from '~/modules/llms/server/llm.server.types'; import { wireOpenrouterModelsListOutputSchema } from '~/modules/llms/server/openai/openrouter.wiretypes'; -import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types'; +import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import { fromManualMapping } from '~/modules/llms/server/openai/models/models.data'; @@ -92,6 +92,35 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr }); } +export function openRouterInjectVariants(models: ModelDescriptionSchema[], model: ModelDescriptionSchema): ModelDescriptionSchema[] { + // keep the same list of models + models.push(model); + + // inject thinking variants for Anthropic thinking models + const antThinkingModels = ['anthropic/claude-opus-4', 'anthropic/claude-sonnet-4', 'anthropic/claude-3-7-sonnet']; + if (antThinkingModels.includes(model.id)) { + + // create a thinking variant for the model, by setting 'idVariant' and modifying the label/description + const thinkingVariant: ModelDescriptionSchema = { + ...model, + idVariant: 'thinking', + label: `${model.label} (thinking)`, + description: `(extended thinking mode) ${model.description}`, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning], + // this is what makes it a thinking variant + parameterSpecs: [ + ...(model.parameterSpecs || []), + { paramId: 'llmVndAntThinkingBudget', initialValue: 1024 }, + ], + }; + + models.push(thinkingVariant); + } + + // no more variants to inject for now + return models; +} + /* export function openRouterStatTokenizers(openRouterModels: any[]): void { // parse all diff --git a/src/modules/llms/server/openai/openai.router.ts b/src/modules/llms/server/openai/openai.router.ts index 413a42729..f36df1a69 100644 --- a/src/modules/llms/server/openai/openai.router.ts +++ b/src/modules/llms/server/openai/openai.router.ts @@ -25,7 +25,7 @@ import { lmStudioModelToModelDescription, localAIModelSortFn, localAIModelToMode import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models'; import { openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models'; import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './models/openpipe.models'; -import { openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models'; +import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models'; import { perplexityAIModelDescriptions, perplexityAIModelSort } from './models/perplexity.models'; import { togetherAIModelsToModelDescriptions } from './models/together.models'; import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes'; @@ -183,18 +183,6 @@ export const llmOpenAIRouter = createTRPCRouter({ // [OpenAI-dialects]: fetch openAI-style for all but Azure (will be then used in each dialect) const openAIWireModelsResponse = await openaiGETOrThrow(access, '/v1/models'); - // Log raw models from OpenRouter API - if (access.dialect === 'openrouter') { - console.log('[DEBUG] openai.router.ts - Raw OpenRouter Models List (sample):', JSON.stringify(openAIWireModelsResponse?.data?.slice(0, 5), null, 2)); - - // Log all Anthropic Claude models to check the exact IDs - const claudeModels = openAIWireModelsResponse?.data?.filter((model: any) => model.id.startsWith('anthropic/claude')); - console.log('[DEBUG] openai.router.ts - All Claude Models:', JSON.stringify(claudeModels, null, 2)); - - // Count how many models we have total from OpenRouter - console.log('[DEBUG] openai.router.ts - Total models from OpenRouter:', openAIWireModelsResponse?.data?.length); - } - // [Together] missing the .data property if (access.dialect === 'togetherai') return { models: togetherAIModelsToModelDescriptions(openAIWireModelsResponse) }; @@ -286,90 +274,12 @@ export const llmOpenAIRouter = createTRPCRouter({ break; case 'openrouter': - console.log('[DEBUG] openai.router.ts - Processing OpenRouter models'); - - // Look for any Claude 4 models to determine actual IDs - const allClaudeModelIds = openAIModels - .filter((m: any) => m.id.startsWith('anthropic/claude')) - .map((m: any) => m.id); - - console.log('[DEBUG] openai.router.ts - All Claude model IDs:', allClaudeModelIds); - - // Look specifically for Claude Opus 4 and Sonnet 4 models (using exact IDs) - const claudeOpus4Models = openAIModels.filter((m: any) => - m.id === 'anthropic/claude-opus-4'); - const claudeSonnet4Models = openAIModels.filter((m: any) => - m.id === 'anthropic/claude-sonnet-4'); - - console.log('[DEBUG] openai.router.ts - Claude Opus 4 models:', - claudeOpus4Models.map((m: any) => m.id)); - console.log('[DEBUG] openai.router.ts - Claude Sonnet 4 models:', - claudeSonnet4Models.map((m: any) => m.id)); - - // Define the Claude model IDs that should have thinking variants - // Use actual IDs found in the OpenRouter response if possible - const CLAUDE_MODELS_FOR_THINKING_VARIANT = [ - // Try to find the actual model IDs dynamically if they exist - claudeOpus4Models.length > 0 ? claudeOpus4Models[0].id : 'anthropic/claude-opus-4', - claudeSonnet4Models.length > 0 ? claudeSonnet4Models[0].id : 'anthropic/claude-sonnet-4', - ]; - - console.log('[DEBUG] openai.router.ts - Using these IDs for thinking variants:', - CLAUDE_MODELS_FOR_THINKING_VARIANT); - // openRouterStatTokenizers(openAIModels); models = openAIModels .sort(openRouterModelFamilySortFn) - .flatMap(rawOpenRouterModel => { - const standardDescription = openRouterModelToModelDescription(rawOpenRouterModel); - if (!standardDescription) { - return []; // Skip if standard model description fails - } - - const modelIdFromOpenRouter = rawOpenRouterModel.id; - - // Log when we find one of our target models - if (CLAUDE_MODELS_FOR_THINKING_VARIANT.includes(modelIdFromOpenRouter)) { - console.log('[DEBUG] openai.router.ts - Found target Claude model:', modelIdFromOpenRouter); - } - - // Check if this model is one of our targets and doesn't already have a thinking indicator - if (CLAUDE_MODELS_FOR_THINKING_VARIANT.includes(modelIdFromOpenRouter) && - !modelIdFromOpenRouter.includes(':thinking')) { - console.log('[DEBUG] openai.router.ts - Creating thinking variant for:', modelIdFromOpenRouter); - - // Create the "thinking" variant based on the standard one - const thinkingDescription: ModelDescriptionSchema = { - ...standardDescription, - id: `${standardDescription.id}:thinking`, // Append suffix for uniqueness - label: `${standardDescription.label} (thinking)`, - // Add parameter spec for the thinking budget - parameterSpecs: [ - ...(standardDescription.parameterSpecs || []), - { - paramId: 'llmVndAntThinkingBudget', - initialValue: 1024, - }, - ], - }; - - console.log('[DEBUG] openai.router.ts - Created standard model:', standardDescription.id, standardDescription.label); - console.log('[DEBUG] openai.router.ts - Created thinking model:', thinkingDescription.id, thinkingDescription.label); - - return [standardDescription, thinkingDescription]; - } else { - return [standardDescription]; // Only the standard version - } - }) - .filter(desc => !!desc); - - // Count how many models we have after processing - console.log('[DEBUG] openai.router.ts - Total final models after processing:', models.length); - - // Log the mapped OpenRouter model descriptions (focusing on Anthropic models) - const claudeModelsAfterProcessing = models.filter(m => m.id && m.id.includes('anthropic/claude')); - console.log('[DEBUG] openai.router.ts - All Claude models after processing:', - JSON.stringify(claudeModelsAfterProcessing.map(m => ({ id: m.id, label: m.label })), null, 2)); + .map(openRouterModelToModelDescription) + .filter(desc => !!desc) + .reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]); break; }