Use reasoning instead of thinking for openrouter

This commit is contained in:
Sam Jones
2025-06-05 17:29:43 +01:00
parent b28a282aba
commit b699a665a1
4 changed files with 43 additions and 22 deletions
@@ -132,18 +132,15 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
thinkingBudget = model.vndAntThinkingBudget;
}
// Add thinking parameter if the model supports it
// Add reasoning parameter for Claude 4 thinking capability via OpenRouter
if (openAIDialect === 'openrouter' && (thinkingBudget !== undefined || hasThinkingSuffix)) {
// Use explicitly configured budget if provided, otherwise fall back to default
const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 256;
const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 1024;
payload.thinking = {
type: "enabled",
budget_tokens: finalThinkingBudget,
// OpenRouter expects reasoning.max_tokens for Anthropic's thinking feature
payload.reasoning = {
max_tokens: finalThinkingBudget,
};
// Debug log to show the final request to OpenRouter
console.log(`[DEBUG] OpenRouter request for model: ${payload.model}, thinking budget: ${finalThinkingBudget}`);
}
if (hotFixOpenAIOFamily)
@@ -152,6 +149,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
if (hotFixRemoveStreamOptions)
payload = _fixRemoveStreamOptions(payload);
// Preemptive error detection with server-side payload validation before sending it upstream
const validated = OpenAIWire_API_Chat_Completions.Request_schema.safeParse(payload);
if (!validated.success) {
@@ -98,7 +98,9 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming),
},
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(),
chatGenerateParse: streaming
? createOpenAIChatCompletionsChunkParser()
: createOpenAIChatCompletionsParserNS(),
};
}
}
@@ -64,9 +64,9 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
// Throws on malformed event data
// ```Can you extend the Zod chunk response object parsing (all optional) to include the missing data? The following is an exampel of the object I received:```
const chunkData = JSON.parse(eventData); // this is here just for ease of breakpoint, otherwise it could be inlined
// [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid)
if (_forwardOpenRouterDataError(chunkData, pt))
if (_isOpenRouterResponse(chunkData) && _forwardOpenRouterDataError(chunkData, pt))
return;
const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(chunkData);
@@ -259,15 +259,7 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
// Throws on malformed event data
const completeData = JSON.parse(eventData);
// [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid)
if (_forwardOpenRouterDataError(completeData, pt))
return;
// [OpenAI] we don't know yet if warning messages are sent in non-streaming - for now we log
if (completeData.warning)
console.log('AIX: OpenAI-dispatch-NS warning:', completeData.warning);
// Parse the complete response
const json = OpenAIWire_API_Chat_Completions.Response_schema.parse(completeData);
@@ -296,6 +288,11 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
if (!message)
throw new Error(`server response missing content (finish_reason: ${finish_reason})`);
// Handle reasoning field from OpenRouter
if (typeof message.reasoning === 'string') {
pt.appendReasoningText(message.reasoning);
}
// message: Text
if (typeof message.content === 'string') {
if (message.content) {
@@ -450,6 +447,21 @@ function _fromOpenAIUsage(usage: OpenAIWire_API_Chat_Completions.Response['usage
return metricsUpdate;
}
/**
* Check if the response is from OpenRouter based on its structure or provider information
*/
function _isOpenRouterResponse(parsedData: any): boolean {
if (!parsedData) return false;
// Check for OpenRouter-specific properties
if (parsedData.provider) return true;
// Check for error metadata which is OpenRouter-specific
if (parsedData.error?.metadata?.provider_name) return true;
return false;
}
/**
* If there's an error in the pre-decoded message, push it down to the particle transmitter.
*/
@@ -480,4 +492,4 @@ function _forwardOpenRouterDataError(parsedData: any, pt: IParticleTransmitter)
// Transmit the error as text - note: throw if you want to transmit as 'error'
pt.setDialectTerminatingIssue(errorMessage, IssueSymbols.Generic);
return true;
}
}
@@ -151,6 +151,10 @@ export namespace OpenAIWire_Messages {
* [OpenAI, 2024-10-01] The refusal message generated by the model.
*/
refusal: z.string().nullable().optional(),
/**
* [OpenRouter, 2025-01-24] The reasoning/thinking content generated by the model.
*/
reasoning: z.string().nullable().optional(),
/**
* [OpenAI, 2024-10-17] Data about a previous audio response from the model. Usage depends on the context:
* - request (this schema): has an id, if present
@@ -350,6 +354,11 @@ export namespace OpenAIWire_API_Chat_Completions {
budget_tokens: z.number().int().positive(),
}).optional(),
// [OpenRouter] Reasoning parameter for Claude models
reasoning: z.object({
max_tokens: z.number().int().positive(),
}).optional(),
seed: z.number().int().optional(),
stop: z.array(z.string()).optional(), // Up to 4 sequences where the API will stop generating further tokens.
user: z.string().optional(),
@@ -546,7 +555,7 @@ export namespace OpenAIWire_API_Chat_Completions {
content: z.string().nullable().optional(),
// delta-reasoning content
reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20]
reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24]
reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24] could be null for Anthropic models
// delta-tool-calls content
tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
.nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160