mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 14:10:15 -07:00
AIX: OpenRouter: write/parse the new reasoning request / reasoning_details response
Removes older system(s) from OpenRouter.
This commit is contained in:
@@ -43,7 +43,6 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
// [OpenRouter] 2025-10-02: do not throw, rather let it fail if upstream has issues
|
||||
// openAIDialect === 'openrouter' || /* OpenRouter FC support is not good (as of 2024-07-15) */
|
||||
openAIDialect === 'perplexity';
|
||||
const hotFixVndORIncludeReasoning = openAIDialect === 'openrouter'; // [OpenRouter, 2025-01-24] has a special `include_reasoning` field to show the chain of thought
|
||||
|
||||
// Model incompatibilities -> Hotfixes
|
||||
|
||||
@@ -90,10 +89,6 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
user: undefined,
|
||||
};
|
||||
|
||||
// [OpenRouter, 2025-01-24]
|
||||
if (hotFixVndORIncludeReasoning)
|
||||
payload.include_reasoning = true;
|
||||
|
||||
// Top-P instead of temperature
|
||||
if (model.topP !== undefined) {
|
||||
delete payload.temperature;
|
||||
@@ -224,20 +219,35 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
}
|
||||
}
|
||||
|
||||
// [OpenRouter] -> [Anthropic] via OpenAI API - https://openrouter.ai/docs/use-cases/reasoning-tokens
|
||||
if (openAIDialect === 'openrouter' && model.vndAntThinkingBudget !== undefined) {
|
||||
// [OpenRouter, 2025-11-11] Unified reasoning parameter - supports both token-based and effort-based control
|
||||
if (openAIDialect === 'openrouter') {
|
||||
|
||||
// vndAntThinkingBudget's presence indicates a user preference:
|
||||
// - [x] a number, which is the budget in tokens
|
||||
// - [ ] null: shall disable thinking, but openrouter does not support this?
|
||||
if (model.vndAntThinkingBudget === null) {
|
||||
// simply not setting the reasoning field downgrades this to a non-thinking model
|
||||
// console.warn('OpenRouter does not support disabling thinking of Anthropic models. Using default.');
|
||||
} else {
|
||||
payload.reasoning = {
|
||||
max_tokens: model.vndAntThinkingBudget || 1024,
|
||||
};
|
||||
// Anthropic via OpenRouter
|
||||
if (model.vndAntThinkingBudget !== undefined) {
|
||||
// vndAntThinkingBudget's presence indicates a user preference:
|
||||
// - a number: explicit token budget (1024-32000)
|
||||
// - null: disable thinking (don't set reasoning field)
|
||||
if (model.vndAntThinkingBudget === null) {
|
||||
// If null, don't set reasoning field at all (disables thinking)
|
||||
} else
|
||||
payload.reasoning = { max_tokens: model.vndAntThinkingBudget || 8192 };
|
||||
}
|
||||
// Gemini via OpenRouter
|
||||
else if (model.vndGeminiThinkingBudget !== undefined)
|
||||
payload.reasoning = { max_tokens: model.vndGeminiThinkingBudget || 8192 };
|
||||
// OpenAI via OpenRouter
|
||||
else if (model.vndOaiReasoningEffort && model.vndOaiReasoningEffort !== 'minimal')
|
||||
payload.reasoning = { effort: model.vndOaiReasoningEffort };
|
||||
|
||||
// FIX double-reasoning request - remove reasoning_effort after transferring it to reasoning (unless already set)
|
||||
if (payload.reasoning_effort && payload.reasoning_effort !== 'minimal') {
|
||||
// we don't know which one takes precedence, so we prioritize .reasoning (OpenRouter) even if .reasoning_effort (OpenAI) is present
|
||||
if (!payload.reasoning)
|
||||
payload.reasoning = { effort: payload.reasoning_effort };
|
||||
// Fix for `Only one of "reasoning" and "reasoning_effort" may be provided`
|
||||
delete payload.reasoning_effort;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (hotFixOpenAIOFamily)
|
||||
|
||||
@@ -196,11 +196,27 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
|
||||
deltaHasReasoning = true;
|
||||
|
||||
}
|
||||
// delta: Reasoning [OpenRouter, 2025-01-24]
|
||||
else if (typeof delta.reasoning === 'string') {
|
||||
// delta: Reasoning Details (Structured) [OpenRouter, 2025-11-11]
|
||||
else if (Array.isArray(delta.reasoning_details)) {
|
||||
|
||||
pt.appendReasoningText(delta.reasoning);
|
||||
deltaHasReasoning = true;
|
||||
for (const reasoningDetail of delta.reasoning_details) {
|
||||
// Extract text from reasoning blocks based on type
|
||||
if (reasoningDetail.type === 'reasoning.text' && typeof reasoningDetail.text === 'string') {
|
||||
pt.appendReasoningText(reasoningDetail.text);
|
||||
deltaHasReasoning = true;
|
||||
}
|
||||
// Summaries can also be shown as reasoning
|
||||
else if (reasoningDetail.type === 'reasoning.summary' && typeof reasoningDetail.summary === 'string') {
|
||||
pt.appendReasoningText(`[Summary] ${reasoningDetail.summary}`);
|
||||
deltaHasReasoning = true;
|
||||
}
|
||||
// 'encrypted' type - reasoning happened but not returned, skip
|
||||
else if (reasoningDetail.type === 'reasoning.encrypted') {
|
||||
// NOTE: Anthropic supports this, and we do too, but.. not now
|
||||
// reasoning happened but not returned, skip
|
||||
} else
|
||||
console.log('AIX: OpenAI-dispatch: unexpected reasoning detail type:', reasoningDetail);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -425,9 +441,19 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
|
||||
} else if (message.content !== undefined && message.content !== null)
|
||||
throw new Error(`unexpected message content type: ${typeof message.content}`);
|
||||
|
||||
// [OpenRouter, 2025-06-05] Handle reasoning field from OpenRouter
|
||||
if (typeof message.reasoning === 'string')
|
||||
pt.appendReasoningText(message.reasoning);
|
||||
// [OpenRouter, 2025-11-11] Handle structured reasoning_details
|
||||
if (Array.isArray(message.reasoning_details)) {
|
||||
for (const reasoningDetail of message.reasoning_details) {
|
||||
if (reasoningDetail.type === 'reasoning.text' && typeof reasoningDetail.text === 'string') {
|
||||
pt.appendReasoningText(reasoningDetail.text);
|
||||
} else if (reasoningDetail.type === 'reasoning.summary' && typeof reasoningDetail.summary === 'string') {
|
||||
pt.appendReasoningText(`[Summary] ${reasoningDetail.summary}`);
|
||||
} else if (reasoningDetail.type === 'reasoning.encrypted') {
|
||||
// reasoning happened but not returned, skip
|
||||
} else
|
||||
console.log('AIX: OpenAI-dispatch-NS: unexpected reasoning detail type:', reasoningDetail);
|
||||
}
|
||||
}
|
||||
|
||||
// message: Tool Calls
|
||||
for (const toolCall of (message.tool_calls || [])) {
|
||||
|
||||
@@ -108,6 +108,18 @@ export namespace OpenAIWire_ContentParts {
|
||||
}),
|
||||
});
|
||||
|
||||
// [OpenRouter, 2025-11-11] Reasoning details - structured reasoning output
|
||||
// https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-detail-types
|
||||
export const OpenRouter_ReasoningDetail_schema = z.object({
|
||||
type: z.union([
|
||||
z.enum(['reasoning.summary', 'reasoning.text', 'reasoning.encrypted']),
|
||||
z.string(),
|
||||
]),
|
||||
text: z.string().optional(), // Actual reasoning text (for 'text' type)
|
||||
summary: z.string().optional(), // Summary of reasoning (for 'summary' type)
|
||||
// 'encrypted' type has no additional fields - indicates reasoning happened but not returned
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
export namespace OpenAIWire_Messages {
|
||||
@@ -164,10 +176,8 @@ export namespace OpenAIWire_Messages {
|
||||
id: z.string(),
|
||||
}).nullable().optional(),
|
||||
|
||||
/**
|
||||
* [OpenRouter, 2025-06-05] The reasoning text generated by the model (e.g. with Anthropic thinking requests).
|
||||
*/
|
||||
reasoning: z.string().nullable().optional(),
|
||||
/** [OpenRouter, 2025-11-11] Reasoning traces with multiple blocks (summary, text, encrypted). */
|
||||
reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).optional(),
|
||||
|
||||
// function_call: // ignored, as it's deprecated
|
||||
// name: _optionalParticipantName, // omitted by choice: generally unsupported
|
||||
@@ -303,9 +313,12 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
include_usage: z.boolean().optional(), // If set, an additional chunk will be streamed with a 'usage' field on the entire request.
|
||||
}).optional(),
|
||||
reasoning_effort: z.enum(['minimal', 'low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] [Perplexity, 2025-06-23] reasoning effort
|
||||
include_reasoning: z.boolean().optional(), // [OpenRouter, 2025-01-24] enables reasoning tokens
|
||||
reasoning: z.object({ // [OpenRouter, 2025-06-05] Reasoning parameter for Claude models
|
||||
max_tokens: z.number().int().positive(),
|
||||
// [OpenRouter, 2025-11-11] Unified reasoning parameter for all models
|
||||
reasoning: z.object({
|
||||
max_tokens: z.number().int().positive().optional(), // Token-based control (Anthropic, Gemini): 1024-32000
|
||||
effort: z.enum(['low', 'medium', 'high']).optional(), // Effort-based control (OpenAI o1/o3, DeepSeek): allocates % of max_tokens
|
||||
enabled: z.boolean().optional(), // Simple enable with medium effort defaults
|
||||
exclude: z.boolean().optional(), // Use reasoning internally without returning it in response
|
||||
}).optional(),
|
||||
prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content.
|
||||
type: z.literal('content'),
|
||||
@@ -625,8 +638,8 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
}))),
|
||||
// delta-reasoning content
|
||||
reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20]
|
||||
reasoning: z.string().optional() // [OpenRouter, 2025-01-24]
|
||||
.nullable(), // [OpenRouter, 2025-06-05] null on Anthropic text responses past the reasoning blocks
|
||||
// [OpenRouter, 2025-11-11] Reasoning traces
|
||||
reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).nullish(),
|
||||
// delta-tool-calls content
|
||||
tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
|
||||
.nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160
|
||||
|
||||
Reference in New Issue
Block a user