OpenRouter: support for Anthropic thinking variants via the OpenAI protocol. #811

This commit is contained in:
Enrico Ros
2025-06-06 11:09:14 -07:00
parent b7bdae00f8
commit 4f63e98e7f
8 changed files with 82 additions and 170 deletions
+6 -15
View File
@@ -474,17 +474,6 @@ export function prettyShortChatModelName(model: string | undefined): string {
}
function _prettyAnthropicModelName(modelId: string): string | null {
// Check for OpenRouter Anthropic models (format: "anthropic/claude-...")
if (modelId.includes('anthropic/claude-')) {
const subStr = modelId.replace('anthropic/', '');
// Handle Claude 4 models from OpenRouter
if (subStr.includes('claude-opus-4')) return 'Claude 4 Opus';
if (subStr.includes('claude-sonnet-4')) return 'Claude 4 Sonnet';
if (subStr.includes('claude-haiku-4')) return 'Claude 4 Haiku';
}
// Handle direct Anthropic models
if (modelId.indexOf('claude-') === -1) return null; // not a Claude model
// must match any known prefix
@@ -500,10 +489,12 @@ function _prettyAnthropicModelName(modelId: string): string | null {
const subStr = modelId.slice(claudeIndex);
const version =
subStr.includes('-4-') ? '4'
: subStr.includes('-3-7-') ? '3.7'
: subStr.includes('-3-5-') ? '3.5'
: '3';
subStr.includes('-5') ? '5'
: subStr.includes('-4') ? '4'
: subStr.includes('-3-7') ? '3.7'
: subStr.includes('-3-5') ? '3.5'
: subStr.includes('-3') ? '3'
: '?';
if (subStr.includes(`-opus`)) return `Claude ${version} Opus`;
if (subStr.includes(`-sonnet`)) return `Claude ${version} Sonnet`;
+1 -3
View File
@@ -92,9 +92,7 @@ export function aixCreateModelFromLLMOptions(
...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
...(llmTopP !== undefined ? { topP: llmTopP } : {}),
...(llmForceNoStream ? { forceNoStream: llmForceNoStream } : {}),
...(llmVndAntThinkingBudget !== undefined ? {
vndAntThinkingBudget: llmVndAntThinkingBudget
} : {}),
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}),
...(llmVndGeminiShowThoughts ? { vndGeminiShowThoughts: llmVndGeminiShowThoughts } : {}),
...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}),
...(llmVndOaiReasoningEffort ? { vndOaiReasoningEffort: llmVndOaiReasoningEffort } : {}),
@@ -120,27 +120,21 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
};
}
// Handle Anthropic Claude thinking capability via OpenRouter
const hasThinkingSuffix = model.id.includes(':thinking');
if (hasThinkingSuffix && openAIDialect === 'openrouter') {
payload.model = model.id.replace(':thinking', '');
}
// Get thinking budget from the model's vndAntThinkingBudget property
let thinkingBudget: number | undefined;
if (typeof model.vndAntThinkingBudget === 'number') {
thinkingBudget = model.vndAntThinkingBudget;
}
// Add reasoning parameter for Claude 4 thinking capability via OpenRouter
if (openAIDialect === 'openrouter' && (thinkingBudget !== undefined || hasThinkingSuffix)) {
// Use explicitly configured budget if provided, otherwise fall back to default
const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 1024;
// OpenRouter expects reasoning.max_tokens for Anthropic's thinking feature
payload.reasoning = {
max_tokens: finalThinkingBudget,
};
// [Anthropic] via OpenAI API (OpenRouter) - https://openrouter.ai/docs/use-cases/reasoning-tokens
if (openAIDialect === 'openrouter' && model.vndAntThinkingBudget !== undefined) {
// vndAntThinkingBudget's presence indicates a user preference:
// - [x] a number, which is the budget in tokens
// - [ ] null: shall disable thinking, but openrouter does not support this?
if (model.vndAntThinkingBudget === null) {
// simply not setting the reasoning field downgrades this to a non-thinking model
// console.warn('OpenRouter does not support disabling thinking of Anthropic models. Using default.');
} else {
payload.reasoning = {
max_tokens: model.vndAntThinkingBudget || 1024,
};
}
}
if (hotFixOpenAIOFamily)
@@ -98,9 +98,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming),
},
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: streaming
? createOpenAIChatCompletionsChunkParser()
: createOpenAIChatCompletionsParserNS(),
chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(),
};
}
}
@@ -65,8 +65,8 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
// ```Can you extend the Zod chunk response object parsing (all optional) to include the missing data? The following is an exampel of the object I received:```
const chunkData = JSON.parse(eventData); // this is here just for ease of breakpoint, otherwise it could be inlined
// [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid)
if (_isOpenRouterResponse(chunkData) && _forwardOpenRouterDataError(chunkData, pt))
// [OpenRouter/others] transmits upstream errors pre-parsing (object wouldn't be valid)
if (_forwardOpenRouterDataError(chunkData, pt))
return;
const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(chunkData);
@@ -259,7 +259,15 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
// Throws on malformed event data
const completeData = JSON.parse(eventData);
// [OpenRouter/others] transmits upstream errors pre-parsing (object wouldn't be valid)
if (_forwardOpenRouterDataError(completeData, pt))
return;
// [OpenAI] we don't know yet if warning messages are sent in non-streaming - for now we log
if (completeData.warning)
console.log('AIX: OpenAI-dispatch-NS warning:', completeData.warning);
// Parse the complete response
const json = OpenAIWire_API_Chat_Completions.Response_schema.parse(completeData);
@@ -288,11 +296,6 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
if (!message)
throw new Error(`server response missing content (finish_reason: ${finish_reason})`);
// Handle reasoning field from OpenRouter
if (typeof message.reasoning === 'string') {
pt.appendReasoningText(message.reasoning);
}
// message: Text
if (typeof message.content === 'string') {
if (message.content) {
@@ -302,6 +305,10 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
} else if (message.content !== undefined && message.content !== null)
throw new Error(`unexpected message content type: ${typeof message.content}`);
// [OpenRouter, 2025-06-05] Handle reasoning field from OpenRouter
if (typeof message.reasoning === 'string')
pt.appendReasoningText(message.reasoning);
// message: Tool Calls
for (const toolCall of (message.tool_calls || [])) {
@@ -447,21 +454,6 @@ function _fromOpenAIUsage(usage: OpenAIWire_API_Chat_Completions.Response['usage
return metricsUpdate;
}
/**
* Check if the response is from OpenRouter based on its structure or provider information
*/
function _isOpenRouterResponse(parsedData: any): boolean {
if (!parsedData) return false;
// Check for OpenRouter-specific properties
if (parsedData.provider) return true;
// Check for error metadata which is OpenRouter-specific
if (parsedData.error?.metadata?.provider_name) return true;
return false;
}
/**
* If there's an error in the pre-decoded message, push it down to the particle transmitter.
*/
@@ -151,10 +151,6 @@ export namespace OpenAIWire_Messages {
* [OpenAI, 2024-10-01] The refusal message generated by the model.
*/
refusal: z.string().nullable().optional(),
/**
* [OpenRouter, 2025-01-24] The reasoning/thinking content generated by the model.
*/
reasoning: z.string().nullable().optional(),
/**
* [OpenAI, 2024-10-17] Data about a previous audio response from the model. Usage depends on the context:
* - request (this schema): has an id, if present
@@ -165,6 +161,11 @@ export namespace OpenAIWire_Messages {
id: z.string(),
}).nullable().optional(),
/**
* [OpenRouter, 2025-06-05] The reasoning text generated by the model (e.g. with Anthropic thinking requests).
*/
reasoning: z.string().nullable().optional(),
// function_call: // ignored, as it's deprecated
// name: _optionalParticipantName, // omitted by choice: generally unsupported
});
@@ -291,6 +292,9 @@ export namespace OpenAIWire_API_Chat_Completions {
}).optional(),
reasoning_effort: z.enum(['low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] reasoning effort, o1 models only for now
include_reasoning: z.boolean().optional(), // [OpenRouter, 2025-01-24] enables reasoning tokens
reasoning: z.object({ // [OpenRouter, 2025-06-05] Reasoning parameter for Claude models
max_tokens: z.number().int().positive(),
}).optional(),
prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content.
type: z.literal('content'),
content: z.union([z.string(), z.array(OpenAIWire_ContentParts.ContentPart_schema)]),
@@ -348,11 +352,6 @@ export namespace OpenAIWire_API_Chat_Completions {
}).nullable().optional(),
}).optional(),
// [OpenRouter] Reasoning parameter for Claude models
reasoning: z.object({
max_tokens: z.number().int().positive(),
}).optional(),
seed: z.number().int().optional(),
stop: z.array(z.string()).optional(), // Up to 4 sequences where the API will stop generating further tokens.
user: z.string().optional(),
@@ -549,7 +548,8 @@ export namespace OpenAIWire_API_Chat_Completions {
content: z.string().nullable().optional(),
// delta-reasoning content
reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20]
reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24] could be null for Anthropic models
reasoning: z.string().optional() // [OpenRouter, 2025-01-24]
.nullable(), // [OpenRouter, 2025-06-05] null on Anthropic text responses past the reasoning blocks
// delta-tool-calls content
tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
.nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160
@@ -1,6 +1,6 @@
import type { ModelDescriptionSchema } from '~/modules/llms/server/llm.server.types';
import { wireOpenrouterModelsListOutputSchema } from '~/modules/llms/server/openai/openrouter.wiretypes';
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { fromManualMapping } from '~/modules/llms/server/openai/models/models.data';
@@ -92,6 +92,35 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
});
}
export function openRouterInjectVariants(models: ModelDescriptionSchema[], model: ModelDescriptionSchema): ModelDescriptionSchema[] {
// keep the same list of models
models.push(model);
// inject thinking variants for Anthropic thinking models
const antThinkingModels = ['anthropic/claude-opus-4', 'anthropic/claude-sonnet-4', 'anthropic/claude-3-7-sonnet'];
if (antThinkingModels.includes(model.id)) {
// create a thinking variant for the model, by setting 'idVariant' and modifying the label/description
const thinkingVariant: ModelDescriptionSchema = {
...model,
idVariant: 'thinking',
label: `${model.label} (thinking)`,
description: `(extended thinking mode) ${model.description}`,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning],
// this is what makes it a thinking variant
parameterSpecs: [
...(model.parameterSpecs || []),
{ paramId: 'llmVndAntThinkingBudget', initialValue: 1024 },
],
};
models.push(thinkingVariant);
}
// no more variants to inject for now
return models;
}
/*
export function openRouterStatTokenizers(openRouterModels: any[]): void {
// parse all
@@ -25,7 +25,7 @@ import { lmStudioModelToModelDescription, localAIModelSortFn, localAIModelToMode
import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models';
import { openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './models/openpipe.models';
import { openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
import { perplexityAIModelDescriptions, perplexityAIModelSort } from './models/perplexity.models';
import { togetherAIModelsToModelDescriptions } from './models/together.models';
import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
@@ -183,18 +183,6 @@ export const llmOpenAIRouter = createTRPCRouter({
// [OpenAI-dialects]: fetch openAI-style for all but Azure (will be then used in each dialect)
const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire_API_Models_List.Response>(access, '/v1/models');
// Log raw models from OpenRouter API
if (access.dialect === 'openrouter') {
console.log('[DEBUG] openai.router.ts - Raw OpenRouter Models List (sample):', JSON.stringify(openAIWireModelsResponse?.data?.slice(0, 5), null, 2));
// Log all Anthropic Claude models to check the exact IDs
const claudeModels = openAIWireModelsResponse?.data?.filter((model: any) => model.id.startsWith('anthropic/claude'));
console.log('[DEBUG] openai.router.ts - All Claude Models:', JSON.stringify(claudeModels, null, 2));
// Count how many models we have total from OpenRouter
console.log('[DEBUG] openai.router.ts - Total models from OpenRouter:', openAIWireModelsResponse?.data?.length);
}
// [Together] missing the .data property
if (access.dialect === 'togetherai')
return { models: togetherAIModelsToModelDescriptions(openAIWireModelsResponse) };
@@ -286,90 +274,12 @@ export const llmOpenAIRouter = createTRPCRouter({
break;
case 'openrouter':
console.log('[DEBUG] openai.router.ts - Processing OpenRouter models');
// Look for any Claude 4 models to determine actual IDs
const allClaudeModelIds = openAIModels
.filter((m: any) => m.id.startsWith('anthropic/claude'))
.map((m: any) => m.id);
console.log('[DEBUG] openai.router.ts - All Claude model IDs:', allClaudeModelIds);
// Look specifically for Claude Opus 4 and Sonnet 4 models (using exact IDs)
const claudeOpus4Models = openAIModels.filter((m: any) =>
m.id === 'anthropic/claude-opus-4');
const claudeSonnet4Models = openAIModels.filter((m: any) =>
m.id === 'anthropic/claude-sonnet-4');
console.log('[DEBUG] openai.router.ts - Claude Opus 4 models:',
claudeOpus4Models.map((m: any) => m.id));
console.log('[DEBUG] openai.router.ts - Claude Sonnet 4 models:',
claudeSonnet4Models.map((m: any) => m.id));
// Define the Claude model IDs that should have thinking variants
// Use actual IDs found in the OpenRouter response if possible
const CLAUDE_MODELS_FOR_THINKING_VARIANT = [
// Try to find the actual model IDs dynamically if they exist
claudeOpus4Models.length > 0 ? claudeOpus4Models[0].id : 'anthropic/claude-opus-4',
claudeSonnet4Models.length > 0 ? claudeSonnet4Models[0].id : 'anthropic/claude-sonnet-4',
];
console.log('[DEBUG] openai.router.ts - Using these IDs for thinking variants:',
CLAUDE_MODELS_FOR_THINKING_VARIANT);
// openRouterStatTokenizers(openAIModels);
models = openAIModels
.sort(openRouterModelFamilySortFn)
.flatMap(rawOpenRouterModel => {
const standardDescription = openRouterModelToModelDescription(rawOpenRouterModel);
if (!standardDescription) {
return []; // Skip if standard model description fails
}
const modelIdFromOpenRouter = rawOpenRouterModel.id;
// Log when we find one of our target models
if (CLAUDE_MODELS_FOR_THINKING_VARIANT.includes(modelIdFromOpenRouter)) {
console.log('[DEBUG] openai.router.ts - Found target Claude model:', modelIdFromOpenRouter);
}
// Check if this model is one of our targets and doesn't already have a thinking indicator
if (CLAUDE_MODELS_FOR_THINKING_VARIANT.includes(modelIdFromOpenRouter) &&
!modelIdFromOpenRouter.includes(':thinking')) {
console.log('[DEBUG] openai.router.ts - Creating thinking variant for:', modelIdFromOpenRouter);
// Create the "thinking" variant based on the standard one
const thinkingDescription: ModelDescriptionSchema = {
...standardDescription,
id: `${standardDescription.id}:thinking`, // Append suffix for uniqueness
label: `${standardDescription.label} (thinking)`,
// Add parameter spec for the thinking budget
parameterSpecs: [
...(standardDescription.parameterSpecs || []),
{
paramId: 'llmVndAntThinkingBudget',
initialValue: 1024,
},
],
};
console.log('[DEBUG] openai.router.ts - Created standard model:', standardDescription.id, standardDescription.label);
console.log('[DEBUG] openai.router.ts - Created thinking model:', thinkingDescription.id, thinkingDescription.label);
return [standardDescription, thinkingDescription];
} else {
return [standardDescription]; // Only the standard version
}
})
.filter(desc => !!desc);
// Count how many models we have after processing
console.log('[DEBUG] openai.router.ts - Total final models after processing:', models.length);
// Log the mapped OpenRouter model descriptions (focusing on Anthropic models)
const claudeModelsAfterProcessing = models.filter(m => m.id && m.id.includes('anthropic/claude'));
console.log('[DEBUG] openai.router.ts - All Claude models after processing:',
JSON.stringify(claudeModelsAfterProcessing.map(m => ({ id: m.id, label: m.label })), null, 2));
.map(openRouterModelToModelDescription)
.filter(desc => !!desc)
.reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
break;
}