diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts index 1fec88c87..a8d210443 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts @@ -43,7 +43,6 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: // [OpenRouter] 2025-10-02: do not throw, rather let it fail if upstream has issues // openAIDialect === 'openrouter' || /* OpenRouter FC support is not good (as of 2024-07-15) */ openAIDialect === 'perplexity'; - const hotFixVndORIncludeReasoning = openAIDialect === 'openrouter'; // [OpenRouter, 2025-01-24] has a special `include_reasoning` field to show the chain of thought // Model incompatibilities -> Hotfixes @@ -90,10 +89,6 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: user: undefined, }; - // [OpenRouter, 2025-01-24] - if (hotFixVndORIncludeReasoning) - payload.include_reasoning = true; - // Top-P instead of temperature if (model.topP !== undefined) { delete payload.temperature; @@ -224,20 +219,35 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: } } - // [OpenRouter] -> [Anthropic] via OpenAI API - https://openrouter.ai/docs/use-cases/reasoning-tokens - if (openAIDialect === 'openrouter' && model.vndAntThinkingBudget !== undefined) { + // [OpenRouter, 2025-11-11] Unified reasoning parameter - supports both token-based and effort-based control + if (openAIDialect === 'openrouter') { - // vndAntThinkingBudget's presence indicates a user preference: - // - [x] a number, which is the budget in tokens - // - [ ] null: shall disable thinking, but openrouter does not support this? - if (model.vndAntThinkingBudget === null) { - // simply not setting the reasoning field downgrades this to a non-thinking model - // console.warn('OpenRouter does not support disabling thinking of Anthropic models. Using default.'); - } else { - payload.reasoning = { - max_tokens: model.vndAntThinkingBudget || 1024, - }; + // Anthropic via OpenRouter + if (model.vndAntThinkingBudget !== undefined) { + // vndAntThinkingBudget's presence indicates a user preference: + // - a number: explicit token budget (1024-32000) + // - null: disable thinking (don't set reasoning field) + if (model.vndAntThinkingBudget === null) { + // If null, don't set reasoning field at all (disables thinking) + } else + payload.reasoning = { max_tokens: model.vndAntThinkingBudget || 8192 }; } + // Gemini via OpenRouter + else if (model.vndGeminiThinkingBudget !== undefined) + payload.reasoning = { max_tokens: model.vndGeminiThinkingBudget || 8192 }; + // OpenAI via OpenRouter + else if (model.vndOaiReasoningEffort && model.vndOaiReasoningEffort !== 'minimal') + payload.reasoning = { effort: model.vndOaiReasoningEffort }; + + // FIX double-reasoning request - remove reasoning_effort after transferring it to reasoning (unless already set) + if (payload.reasoning_effort && payload.reasoning_effort !== 'minimal') { + // we don't know which one takes precedence, so we prioritize .reasoning (OpenRouter) even if .reasoning_effort (OpenAI) is present + if (!payload.reasoning) + payload.reasoning = { effort: payload.reasoning_effort }; + // Fix for `Only one of "reasoning" and "reasoning_effort" may be provided` + delete payload.reasoning_effort; + } + } if (hotFixOpenAIOFamily) diff --git a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts index 7ac513e7e..0434e8c5e 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts @@ -196,11 +196,27 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct deltaHasReasoning = true; } - // delta: Reasoning [OpenRouter, 2025-01-24] - else if (typeof delta.reasoning === 'string') { + // delta: Reasoning Details (Structured) [OpenRouter, 2025-11-11] + else if (Array.isArray(delta.reasoning_details)) { - pt.appendReasoningText(delta.reasoning); - deltaHasReasoning = true; + for (const reasoningDetail of delta.reasoning_details) { + // Extract text from reasoning blocks based on type + if (reasoningDetail.type === 'reasoning.text' && typeof reasoningDetail.text === 'string') { + pt.appendReasoningText(reasoningDetail.text); + deltaHasReasoning = true; + } + // Summaries can also be shown as reasoning + else if (reasoningDetail.type === 'reasoning.summary' && typeof reasoningDetail.summary === 'string') { + pt.appendReasoningText(`[Summary] ${reasoningDetail.summary}`); + deltaHasReasoning = true; + } + // 'encrypted' type - reasoning happened but not returned, skip + else if (reasoningDetail.type === 'reasoning.encrypted') { + // NOTE: Anthropic supports this, and we do too, but.. not now + // reasoning happened but not returned, skip + } else + console.log('AIX: OpenAI-dispatch: unexpected reasoning detail type:', reasoningDetail); + } } @@ -425,9 +441,19 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction } else if (message.content !== undefined && message.content !== null) throw new Error(`unexpected message content type: ${typeof message.content}`); - // [OpenRouter, 2025-06-05] Handle reasoning field from OpenRouter - if (typeof message.reasoning === 'string') - pt.appendReasoningText(message.reasoning); + // [OpenRouter, 2025-11-11] Handle structured reasoning_details + if (Array.isArray(message.reasoning_details)) { + for (const reasoningDetail of message.reasoning_details) { + if (reasoningDetail.type === 'reasoning.text' && typeof reasoningDetail.text === 'string') { + pt.appendReasoningText(reasoningDetail.text); + } else if (reasoningDetail.type === 'reasoning.summary' && typeof reasoningDetail.summary === 'string') { + pt.appendReasoningText(`[Summary] ${reasoningDetail.summary}`); + } else if (reasoningDetail.type === 'reasoning.encrypted') { + // reasoning happened but not returned, skip + } else + console.log('AIX: OpenAI-dispatch-NS: unexpected reasoning detail type:', reasoningDetail); + } + } // message: Tool Calls for (const toolCall of (message.tool_calls || [])) { diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index 0d7ee32c2..2e2978f8e 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -108,6 +108,18 @@ export namespace OpenAIWire_ContentParts { }), }); + // [OpenRouter, 2025-11-11] Reasoning details - structured reasoning output + // https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-detail-types + export const OpenRouter_ReasoningDetail_schema = z.object({ + type: z.union([ + z.enum(['reasoning.summary', 'reasoning.text', 'reasoning.encrypted']), + z.string(), + ]), + text: z.string().optional(), // Actual reasoning text (for 'text' type) + summary: z.string().optional(), // Summary of reasoning (for 'summary' type) + // 'encrypted' type has no additional fields - indicates reasoning happened but not returned + }); + } export namespace OpenAIWire_Messages { @@ -164,10 +176,8 @@ export namespace OpenAIWire_Messages { id: z.string(), }).nullable().optional(), - /** - * [OpenRouter, 2025-06-05] The reasoning text generated by the model (e.g. with Anthropic thinking requests). - */ - reasoning: z.string().nullable().optional(), + /** [OpenRouter, 2025-11-11] Reasoning traces with multiple blocks (summary, text, encrypted). */ + reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).optional(), // function_call: // ignored, as it's deprecated // name: _optionalParticipantName, // omitted by choice: generally unsupported @@ -303,9 +313,12 @@ export namespace OpenAIWire_API_Chat_Completions { include_usage: z.boolean().optional(), // If set, an additional chunk will be streamed with a 'usage' field on the entire request. }).optional(), reasoning_effort: z.enum(['minimal', 'low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] [Perplexity, 2025-06-23] reasoning effort - include_reasoning: z.boolean().optional(), // [OpenRouter, 2025-01-24] enables reasoning tokens - reasoning: z.object({ // [OpenRouter, 2025-06-05] Reasoning parameter for Claude models - max_tokens: z.number().int().positive(), + // [OpenRouter, 2025-11-11] Unified reasoning parameter for all models + reasoning: z.object({ + max_tokens: z.number().int().positive().optional(), // Token-based control (Anthropic, Gemini): 1024-32000 + effort: z.enum(['low', 'medium', 'high']).optional(), // Effort-based control (OpenAI o1/o3, DeepSeek): allocates % of max_tokens + enabled: z.boolean().optional(), // Simple enable with medium effort defaults + exclude: z.boolean().optional(), // Use reasoning internally without returning it in response }).optional(), prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content. type: z.literal('content'), @@ -625,8 +638,8 @@ export namespace OpenAIWire_API_Chat_Completions { }))), // delta-reasoning content reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20] - reasoning: z.string().optional() // [OpenRouter, 2025-01-24] - .nullable(), // [OpenRouter, 2025-06-05] null on Anthropic text responses past the reasoning blocks + // [OpenRouter, 2025-11-11] Reasoning traces + reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).nullish(), // delta-tool-calls content tool_calls: z.array(ChunkDeltaToolCalls_schema).optional() .nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160