From b699a665a1ddbb3dcfb9805a21ff5b60f8cfa837 Mon Sep 17 00:00:00 2001 From: Sam Jones Date: Thu, 5 Jun 2025 17:29:43 +0100 Subject: [PATCH] Use reasoning instead of thinking for openrouter --- .../adapters/openai.chatCompletions.ts | 14 ++++---- .../chatGenerate/chatGenerate.dispatch.ts | 4 ++- .../chatGenerate/parsers/openai.parser.ts | 36 ++++++++++++------- .../dispatch/wiretypes/openai.wiretypes.ts | 11 +++++- 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts index 79d36bf63..ebd73413f 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts @@ -132,18 +132,15 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: thinkingBudget = model.vndAntThinkingBudget; } - // Add thinking parameter if the model supports it + // Add reasoning parameter for Claude 4 thinking capability via OpenRouter if (openAIDialect === 'openrouter' && (thinkingBudget !== undefined || hasThinkingSuffix)) { // Use explicitly configured budget if provided, otherwise fall back to default - const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 256; + const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 1024; - payload.thinking = { - type: "enabled", - budget_tokens: finalThinkingBudget, + // OpenRouter expects reasoning.max_tokens for Anthropic's thinking feature + payload.reasoning = { + max_tokens: finalThinkingBudget, }; - - // Debug log to show the final request to OpenRouter - console.log(`[DEBUG] OpenRouter request for model: ${payload.model}, thinking budget: ${finalThinkingBudget}`); } if (hotFixOpenAIOFamily) @@ -152,6 +149,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: if (hotFixRemoveStreamOptions) payload = _fixRemoveStreamOptions(payload); + // Preemptive error detection with server-side payload validation before sending it upstream const validated = OpenAIWire_API_Chat_Completions.Request_schema.safeParse(payload); if (!validated.success) { diff --git a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts index 8f908dd5c..cbc3bc08f 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts @@ -98,7 +98,9 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_ body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming), }, demuxerFormat: streaming ? 'fast-sse' : null, - chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(), + chatGenerateParse: streaming + ? createOpenAIChatCompletionsChunkParser() + : createOpenAIChatCompletionsParserNS(), }; } } diff --git a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts index 163177b43..6238ca895 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts @@ -64,9 +64,9 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct // Throws on malformed event data // ```Can you extend the Zod chunk response object parsing (all optional) to include the missing data? The following is an exampel of the object I received:``` const chunkData = JSON.parse(eventData); // this is here just for ease of breakpoint, otherwise it could be inlined - + // [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid) - if (_forwardOpenRouterDataError(chunkData, pt)) + if (_isOpenRouterResponse(chunkData) && _forwardOpenRouterDataError(chunkData, pt)) return; const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(chunkData); @@ -259,15 +259,7 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction // Throws on malformed event data const completeData = JSON.parse(eventData); - - // [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid) - if (_forwardOpenRouterDataError(completeData, pt)) - return; - - // [OpenAI] we don't know yet if warning messages are sent in non-streaming - for now we log - if (completeData.warning) - console.log('AIX: OpenAI-dispatch-NS warning:', completeData.warning); - + // Parse the complete response const json = OpenAIWire_API_Chat_Completions.Response_schema.parse(completeData); @@ -296,6 +288,11 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction if (!message) throw new Error(`server response missing content (finish_reason: ${finish_reason})`); + // Handle reasoning field from OpenRouter + if (typeof message.reasoning === 'string') { + pt.appendReasoningText(message.reasoning); + } + // message: Text if (typeof message.content === 'string') { if (message.content) { @@ -450,6 +447,21 @@ function _fromOpenAIUsage(usage: OpenAIWire_API_Chat_Completions.Response['usage return metricsUpdate; } +/** + * Check if the response is from OpenRouter based on its structure or provider information + */ +function _isOpenRouterResponse(parsedData: any): boolean { + if (!parsedData) return false; + + // Check for OpenRouter-specific properties + if (parsedData.provider) return true; + + // Check for error metadata which is OpenRouter-specific + if (parsedData.error?.metadata?.provider_name) return true; + + return false; +} + /** * If there's an error in the pre-decoded message, push it down to the particle transmitter. */ @@ -480,4 +492,4 @@ function _forwardOpenRouterDataError(parsedData: any, pt: IParticleTransmitter) // Transmit the error as text - note: throw if you want to transmit as 'error' pt.setDialectTerminatingIssue(errorMessage, IssueSymbols.Generic); return true; -} \ No newline at end of file +} diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index 378671e9e..e346e2341 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -151,6 +151,10 @@ export namespace OpenAIWire_Messages { * [OpenAI, 2024-10-01] The refusal message generated by the model. */ refusal: z.string().nullable().optional(), + /** + * [OpenRouter, 2025-01-24] The reasoning/thinking content generated by the model. + */ + reasoning: z.string().nullable().optional(), /** * [OpenAI, 2024-10-17] Data about a previous audio response from the model. Usage depends on the context: * - request (this schema): has an id, if present @@ -350,6 +354,11 @@ export namespace OpenAIWire_API_Chat_Completions { budget_tokens: z.number().int().positive(), }).optional(), + // [OpenRouter] Reasoning parameter for Claude models + reasoning: z.object({ + max_tokens: z.number().int().positive(), + }).optional(), + seed: z.number().int().optional(), stop: z.array(z.string()).optional(), // Up to 4 sequences where the API will stop generating further tokens. user: z.string().optional(), @@ -546,7 +555,7 @@ export namespace OpenAIWire_API_Chat_Completions { content: z.string().nullable().optional(), // delta-reasoning content reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20] - reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24] + reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24] could be null for Anthropic models // delta-tool-calls content tool_calls: z.array(ChunkDeltaToolCalls_schema).optional() .nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160