AIX: OpenRouter: write/parse the new reasoning request / reasoning_details response

Removes older system(s) from OpenRouter.
This commit is contained in:
Enrico Ros
2025-11-11 17:46:58 -08:00
parent babb1dd962
commit d6843d7fcf
3 changed files with 82 additions and 33 deletions
@@ -43,7 +43,6 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
// [OpenRouter] 2025-10-02: do not throw, rather let it fail if upstream has issues
// openAIDialect === 'openrouter' || /* OpenRouter FC support is not good (as of 2024-07-15) */
openAIDialect === 'perplexity';
const hotFixVndORIncludeReasoning = openAIDialect === 'openrouter'; // [OpenRouter, 2025-01-24] has a special `include_reasoning` field to show the chain of thought
// Model incompatibilities -> Hotfixes
@@ -90,10 +89,6 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
user: undefined,
};
// [OpenRouter, 2025-01-24]
if (hotFixVndORIncludeReasoning)
payload.include_reasoning = true;
// Top-P instead of temperature
if (model.topP !== undefined) {
delete payload.temperature;
@@ -224,20 +219,35 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
}
}
// [OpenRouter] -> [Anthropic] via OpenAI API - https://openrouter.ai/docs/use-cases/reasoning-tokens
if (openAIDialect === 'openrouter' && model.vndAntThinkingBudget !== undefined) {
// [OpenRouter, 2025-11-11] Unified reasoning parameter - supports both token-based and effort-based control
if (openAIDialect === 'openrouter') {
// vndAntThinkingBudget's presence indicates a user preference:
// - [x] a number, which is the budget in tokens
// - [ ] null: shall disable thinking, but openrouter does not support this?
if (model.vndAntThinkingBudget === null) {
// simply not setting the reasoning field downgrades this to a non-thinking model
// console.warn('OpenRouter does not support disabling thinking of Anthropic models. Using default.');
} else {
payload.reasoning = {
max_tokens: model.vndAntThinkingBudget || 1024,
};
// Anthropic via OpenRouter
if (model.vndAntThinkingBudget !== undefined) {
// vndAntThinkingBudget's presence indicates a user preference:
// - a number: explicit token budget (1024-32000)
// - null: disable thinking (don't set reasoning field)
if (model.vndAntThinkingBudget === null) {
// If null, don't set reasoning field at all (disables thinking)
} else
payload.reasoning = { max_tokens: model.vndAntThinkingBudget || 8192 };
}
// Gemini via OpenRouter
else if (model.vndGeminiThinkingBudget !== undefined)
payload.reasoning = { max_tokens: model.vndGeminiThinkingBudget || 8192 };
// OpenAI via OpenRouter
else if (model.vndOaiReasoningEffort && model.vndOaiReasoningEffort !== 'minimal')
payload.reasoning = { effort: model.vndOaiReasoningEffort };
// FIX double-reasoning request - remove reasoning_effort after transferring it to reasoning (unless already set)
if (payload.reasoning_effort && payload.reasoning_effort !== 'minimal') {
// we don't know which one takes precedence, so we prioritize .reasoning (OpenRouter) even if .reasoning_effort (OpenAI) is present
if (!payload.reasoning)
payload.reasoning = { effort: payload.reasoning_effort };
// Fix for `Only one of "reasoning" and "reasoning_effort" may be provided`
delete payload.reasoning_effort;
}
}
if (hotFixOpenAIOFamily)
@@ -196,11 +196,27 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
deltaHasReasoning = true;
}
// delta: Reasoning [OpenRouter, 2025-01-24]
else if (typeof delta.reasoning === 'string') {
// delta: Reasoning Details (Structured) [OpenRouter, 2025-11-11]
else if (Array.isArray(delta.reasoning_details)) {
pt.appendReasoningText(delta.reasoning);
deltaHasReasoning = true;
for (const reasoningDetail of delta.reasoning_details) {
// Extract text from reasoning blocks based on type
if (reasoningDetail.type === 'reasoning.text' && typeof reasoningDetail.text === 'string') {
pt.appendReasoningText(reasoningDetail.text);
deltaHasReasoning = true;
}
// Summaries can also be shown as reasoning
else if (reasoningDetail.type === 'reasoning.summary' && typeof reasoningDetail.summary === 'string') {
pt.appendReasoningText(`[Summary] ${reasoningDetail.summary}`);
deltaHasReasoning = true;
}
// 'encrypted' type - reasoning happened but not returned, skip
else if (reasoningDetail.type === 'reasoning.encrypted') {
// NOTE: Anthropic supports this, and we do too, but.. not now
// reasoning happened but not returned, skip
} else
console.log('AIX: OpenAI-dispatch: unexpected reasoning detail type:', reasoningDetail);
}
}
@@ -425,9 +441,19 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
} else if (message.content !== undefined && message.content !== null)
throw new Error(`unexpected message content type: ${typeof message.content}`);
// [OpenRouter, 2025-06-05] Handle reasoning field from OpenRouter
if (typeof message.reasoning === 'string')
pt.appendReasoningText(message.reasoning);
// [OpenRouter, 2025-11-11] Handle structured reasoning_details
if (Array.isArray(message.reasoning_details)) {
for (const reasoningDetail of message.reasoning_details) {
if (reasoningDetail.type === 'reasoning.text' && typeof reasoningDetail.text === 'string') {
pt.appendReasoningText(reasoningDetail.text);
} else if (reasoningDetail.type === 'reasoning.summary' && typeof reasoningDetail.summary === 'string') {
pt.appendReasoningText(`[Summary] ${reasoningDetail.summary}`);
} else if (reasoningDetail.type === 'reasoning.encrypted') {
// reasoning happened but not returned, skip
} else
console.log('AIX: OpenAI-dispatch-NS: unexpected reasoning detail type:', reasoningDetail);
}
}
// message: Tool Calls
for (const toolCall of (message.tool_calls || [])) {
@@ -108,6 +108,18 @@ export namespace OpenAIWire_ContentParts {
}),
});
// [OpenRouter, 2025-11-11] Reasoning details - structured reasoning output
// https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-detail-types
export const OpenRouter_ReasoningDetail_schema = z.object({
type: z.union([
z.enum(['reasoning.summary', 'reasoning.text', 'reasoning.encrypted']),
z.string(),
]),
text: z.string().optional(), // Actual reasoning text (for 'text' type)
summary: z.string().optional(), // Summary of reasoning (for 'summary' type)
// 'encrypted' type has no additional fields - indicates reasoning happened but not returned
});
}
export namespace OpenAIWire_Messages {
@@ -164,10 +176,8 @@ export namespace OpenAIWire_Messages {
id: z.string(),
}).nullable().optional(),
/**
* [OpenRouter, 2025-06-05] The reasoning text generated by the model (e.g. with Anthropic thinking requests).
*/
reasoning: z.string().nullable().optional(),
/** [OpenRouter, 2025-11-11] Reasoning traces with multiple blocks (summary, text, encrypted). */
reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).optional(),
// function_call: // ignored, as it's deprecated
// name: _optionalParticipantName, // omitted by choice: generally unsupported
@@ -303,9 +313,12 @@ export namespace OpenAIWire_API_Chat_Completions {
include_usage: z.boolean().optional(), // If set, an additional chunk will be streamed with a 'usage' field on the entire request.
}).optional(),
reasoning_effort: z.enum(['minimal', 'low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] [Perplexity, 2025-06-23] reasoning effort
include_reasoning: z.boolean().optional(), // [OpenRouter, 2025-01-24] enables reasoning tokens
reasoning: z.object({ // [OpenRouter, 2025-06-05] Reasoning parameter for Claude models
max_tokens: z.number().int().positive(),
// [OpenRouter, 2025-11-11] Unified reasoning parameter for all models
reasoning: z.object({
max_tokens: z.number().int().positive().optional(), // Token-based control (Anthropic, Gemini): 1024-32000
effort: z.enum(['low', 'medium', 'high']).optional(), // Effort-based control (OpenAI o1/o3, DeepSeek): allocates % of max_tokens
enabled: z.boolean().optional(), // Simple enable with medium effort defaults
exclude: z.boolean().optional(), // Use reasoning internally without returning it in response
}).optional(),
prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content.
type: z.literal('content'),
@@ -625,8 +638,8 @@ export namespace OpenAIWire_API_Chat_Completions {
}))),
// delta-reasoning content
reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20]
reasoning: z.string().optional() // [OpenRouter, 2025-01-24]
.nullable(), // [OpenRouter, 2025-06-05] null on Anthropic text responses past the reasoning blocks
// [OpenRouter, 2025-11-11] Reasoning traces
reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).nullish(),
// delta-tool-calls content
tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
.nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160