mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 14:10:15 -07:00
OpenRouter: support for Anthropic thinking variants via the OpenAI protocol. #811
This commit is contained in:
@@ -474,17 +474,6 @@ export function prettyShortChatModelName(model: string | undefined): string {
|
||||
}
|
||||
|
||||
function _prettyAnthropicModelName(modelId: string): string | null {
|
||||
// Check for OpenRouter Anthropic models (format: "anthropic/claude-...")
|
||||
if (modelId.includes('anthropic/claude-')) {
|
||||
const subStr = modelId.replace('anthropic/', '');
|
||||
|
||||
// Handle Claude 4 models from OpenRouter
|
||||
if (subStr.includes('claude-opus-4')) return 'Claude 4 Opus';
|
||||
if (subStr.includes('claude-sonnet-4')) return 'Claude 4 Sonnet';
|
||||
if (subStr.includes('claude-haiku-4')) return 'Claude 4 Haiku';
|
||||
}
|
||||
|
||||
// Handle direct Anthropic models
|
||||
if (modelId.indexOf('claude-') === -1) return null; // not a Claude model
|
||||
|
||||
// must match any known prefix
|
||||
@@ -500,10 +489,12 @@ function _prettyAnthropicModelName(modelId: string): string | null {
|
||||
|
||||
const subStr = modelId.slice(claudeIndex);
|
||||
const version =
|
||||
subStr.includes('-4-') ? '4'
|
||||
: subStr.includes('-3-7-') ? '3.7'
|
||||
: subStr.includes('-3-5-') ? '3.5'
|
||||
: '3';
|
||||
subStr.includes('-5') ? '5'
|
||||
: subStr.includes('-4') ? '4'
|
||||
: subStr.includes('-3-7') ? '3.7'
|
||||
: subStr.includes('-3-5') ? '3.5'
|
||||
: subStr.includes('-3') ? '3'
|
||||
: '?';
|
||||
|
||||
if (subStr.includes(`-opus`)) return `Claude ${version} Opus`;
|
||||
if (subStr.includes(`-sonnet`)) return `Claude ${version} Sonnet`;
|
||||
|
||||
@@ -92,9 +92,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
|
||||
...(llmTopP !== undefined ? { topP: llmTopP } : {}),
|
||||
...(llmForceNoStream ? { forceNoStream: llmForceNoStream } : {}),
|
||||
...(llmVndAntThinkingBudget !== undefined ? {
|
||||
vndAntThinkingBudget: llmVndAntThinkingBudget
|
||||
} : {}),
|
||||
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}),
|
||||
...(llmVndGeminiShowThoughts ? { vndGeminiShowThoughts: llmVndGeminiShowThoughts } : {}),
|
||||
...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}),
|
||||
...(llmVndOaiReasoningEffort ? { vndOaiReasoningEffort: llmVndOaiReasoningEffort } : {}),
|
||||
|
||||
@@ -120,27 +120,21 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
};
|
||||
}
|
||||
|
||||
// Handle Anthropic Claude thinking capability via OpenRouter
|
||||
const hasThinkingSuffix = model.id.includes(':thinking');
|
||||
if (hasThinkingSuffix && openAIDialect === 'openrouter') {
|
||||
payload.model = model.id.replace(':thinking', '');
|
||||
}
|
||||
|
||||
// Get thinking budget from the model's vndAntThinkingBudget property
|
||||
let thinkingBudget: number | undefined;
|
||||
if (typeof model.vndAntThinkingBudget === 'number') {
|
||||
thinkingBudget = model.vndAntThinkingBudget;
|
||||
}
|
||||
|
||||
// Add reasoning parameter for Claude 4 thinking capability via OpenRouter
|
||||
if (openAIDialect === 'openrouter' && (thinkingBudget !== undefined || hasThinkingSuffix)) {
|
||||
// Use explicitly configured budget if provided, otherwise fall back to default
|
||||
const finalThinkingBudget = thinkingBudget !== undefined ? thinkingBudget : 1024;
|
||||
|
||||
// OpenRouter expects reasoning.max_tokens for Anthropic's thinking feature
|
||||
payload.reasoning = {
|
||||
max_tokens: finalThinkingBudget,
|
||||
};
|
||||
|
||||
// [Anthropic] via OpenAI API (OpenRouter) - https://openrouter.ai/docs/use-cases/reasoning-tokens
|
||||
if (openAIDialect === 'openrouter' && model.vndAntThinkingBudget !== undefined) {
|
||||
|
||||
// vndAntThinkingBudget's presence indicates a user preference:
|
||||
// - [x] a number, which is the budget in tokens
|
||||
// - [ ] null: shall disable thinking, but openrouter does not support this?
|
||||
if (model.vndAntThinkingBudget === null) {
|
||||
// simply not setting the reasoning field downgrades this to a non-thinking model
|
||||
// console.warn('OpenRouter does not support disabling thinking of Anthropic models. Using default.');
|
||||
} else {
|
||||
payload.reasoning = {
|
||||
max_tokens: model.vndAntThinkingBudget || 1024,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (hotFixOpenAIOFamily)
|
||||
|
||||
@@ -98,9 +98,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
|
||||
body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming),
|
||||
},
|
||||
demuxerFormat: streaming ? 'fast-sse' : null,
|
||||
chatGenerateParse: streaming
|
||||
? createOpenAIChatCompletionsChunkParser()
|
||||
: createOpenAIChatCompletionsParserNS(),
|
||||
chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,8 +65,8 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
|
||||
// ```Can you extend the Zod chunk response object parsing (all optional) to include the missing data? The following is an exampel of the object I received:```
|
||||
const chunkData = JSON.parse(eventData); // this is here just for ease of breakpoint, otherwise it could be inlined
|
||||
|
||||
// [OpenRouter] transmits upstream errors pre-parsing (object wouldn't be valid)
|
||||
if (_isOpenRouterResponse(chunkData) && _forwardOpenRouterDataError(chunkData, pt))
|
||||
// [OpenRouter/others] transmits upstream errors pre-parsing (object wouldn't be valid)
|
||||
if (_forwardOpenRouterDataError(chunkData, pt))
|
||||
return;
|
||||
|
||||
const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(chunkData);
|
||||
@@ -259,7 +259,15 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
|
||||
|
||||
// Throws on malformed event data
|
||||
const completeData = JSON.parse(eventData);
|
||||
|
||||
|
||||
// [OpenRouter/others] transmits upstream errors pre-parsing (object wouldn't be valid)
|
||||
if (_forwardOpenRouterDataError(completeData, pt))
|
||||
return;
|
||||
|
||||
// [OpenAI] we don't know yet if warning messages are sent in non-streaming - for now we log
|
||||
if (completeData.warning)
|
||||
console.log('AIX: OpenAI-dispatch-NS warning:', completeData.warning);
|
||||
|
||||
// Parse the complete response
|
||||
const json = OpenAIWire_API_Chat_Completions.Response_schema.parse(completeData);
|
||||
|
||||
@@ -288,11 +296,6 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
|
||||
if (!message)
|
||||
throw new Error(`server response missing content (finish_reason: ${finish_reason})`);
|
||||
|
||||
// Handle reasoning field from OpenRouter
|
||||
if (typeof message.reasoning === 'string') {
|
||||
pt.appendReasoningText(message.reasoning);
|
||||
}
|
||||
|
||||
// message: Text
|
||||
if (typeof message.content === 'string') {
|
||||
if (message.content) {
|
||||
@@ -302,6 +305,10 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
|
||||
} else if (message.content !== undefined && message.content !== null)
|
||||
throw new Error(`unexpected message content type: ${typeof message.content}`);
|
||||
|
||||
// [OpenRouter, 2025-06-05] Handle reasoning field from OpenRouter
|
||||
if (typeof message.reasoning === 'string')
|
||||
pt.appendReasoningText(message.reasoning);
|
||||
|
||||
// message: Tool Calls
|
||||
for (const toolCall of (message.tool_calls || [])) {
|
||||
|
||||
@@ -447,21 +454,6 @@ function _fromOpenAIUsage(usage: OpenAIWire_API_Chat_Completions.Response['usage
|
||||
return metricsUpdate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the response is from OpenRouter based on its structure or provider information
|
||||
*/
|
||||
function _isOpenRouterResponse(parsedData: any): boolean {
|
||||
if (!parsedData) return false;
|
||||
|
||||
// Check for OpenRouter-specific properties
|
||||
if (parsedData.provider) return true;
|
||||
|
||||
// Check for error metadata which is OpenRouter-specific
|
||||
if (parsedData.error?.metadata?.provider_name) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* If there's an error in the pre-decoded message, push it down to the particle transmitter.
|
||||
*/
|
||||
|
||||
@@ -151,10 +151,6 @@ export namespace OpenAIWire_Messages {
|
||||
* [OpenAI, 2024-10-01] The refusal message generated by the model.
|
||||
*/
|
||||
refusal: z.string().nullable().optional(),
|
||||
/**
|
||||
* [OpenRouter, 2025-01-24] The reasoning/thinking content generated by the model.
|
||||
*/
|
||||
reasoning: z.string().nullable().optional(),
|
||||
/**
|
||||
* [OpenAI, 2024-10-17] Data about a previous audio response from the model. Usage depends on the context:
|
||||
* - request (this schema): has an id, if present
|
||||
@@ -165,6 +161,11 @@ export namespace OpenAIWire_Messages {
|
||||
id: z.string(),
|
||||
}).nullable().optional(),
|
||||
|
||||
/**
|
||||
* [OpenRouter, 2025-06-05] The reasoning text generated by the model (e.g. with Anthropic thinking requests).
|
||||
*/
|
||||
reasoning: z.string().nullable().optional(),
|
||||
|
||||
// function_call: // ignored, as it's deprecated
|
||||
// name: _optionalParticipantName, // omitted by choice: generally unsupported
|
||||
});
|
||||
@@ -291,6 +292,9 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
}).optional(),
|
||||
reasoning_effort: z.enum(['low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] reasoning effort, o1 models only for now
|
||||
include_reasoning: z.boolean().optional(), // [OpenRouter, 2025-01-24] enables reasoning tokens
|
||||
reasoning: z.object({ // [OpenRouter, 2025-06-05] Reasoning parameter for Claude models
|
||||
max_tokens: z.number().int().positive(),
|
||||
}).optional(),
|
||||
prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content.
|
||||
type: z.literal('content'),
|
||||
content: z.union([z.string(), z.array(OpenAIWire_ContentParts.ContentPart_schema)]),
|
||||
@@ -348,11 +352,6 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
}).nullable().optional(),
|
||||
}).optional(),
|
||||
|
||||
// [OpenRouter] Reasoning parameter for Claude models
|
||||
reasoning: z.object({
|
||||
max_tokens: z.number().int().positive(),
|
||||
}).optional(),
|
||||
|
||||
seed: z.number().int().optional(),
|
||||
stop: z.array(z.string()).optional(), // Up to 4 sequences where the API will stop generating further tokens.
|
||||
user: z.string().optional(),
|
||||
@@ -549,7 +548,8 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
content: z.string().nullable().optional(),
|
||||
// delta-reasoning content
|
||||
reasoning_content: z.string().nullable().optional(), // [Deepseek, 2025-01-20]
|
||||
reasoning: z.string().nullable().optional(), // [OpenRouter, 2025-01-24] could be null for Anthropic models
|
||||
reasoning: z.string().optional() // [OpenRouter, 2025-01-24]
|
||||
.nullable(), // [OpenRouter, 2025-06-05] null on Anthropic text responses past the reasoning blocks
|
||||
// delta-tool-calls content
|
||||
tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
|
||||
.nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { ModelDescriptionSchema } from '~/modules/llms/server/llm.server.types';
|
||||
import { wireOpenrouterModelsListOutputSchema } from '~/modules/llms/server/openai/openrouter.wiretypes';
|
||||
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
import { fromManualMapping } from '~/modules/llms/server/openai/models/models.data';
|
||||
|
||||
|
||||
@@ -92,6 +92,35 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
|
||||
});
|
||||
}
|
||||
|
||||
export function openRouterInjectVariants(models: ModelDescriptionSchema[], model: ModelDescriptionSchema): ModelDescriptionSchema[] {
|
||||
// keep the same list of models
|
||||
models.push(model);
|
||||
|
||||
// inject thinking variants for Anthropic thinking models
|
||||
const antThinkingModels = ['anthropic/claude-opus-4', 'anthropic/claude-sonnet-4', 'anthropic/claude-3-7-sonnet'];
|
||||
if (antThinkingModels.includes(model.id)) {
|
||||
|
||||
// create a thinking variant for the model, by setting 'idVariant' and modifying the label/description
|
||||
const thinkingVariant: ModelDescriptionSchema = {
|
||||
...model,
|
||||
idVariant: 'thinking',
|
||||
label: `${model.label} (thinking)`,
|
||||
description: `(extended thinking mode) ${model.description}`,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning],
|
||||
// this is what makes it a thinking variant
|
||||
parameterSpecs: [
|
||||
...(model.parameterSpecs || []),
|
||||
{ paramId: 'llmVndAntThinkingBudget', initialValue: 1024 },
|
||||
],
|
||||
};
|
||||
|
||||
models.push(thinkingVariant);
|
||||
}
|
||||
|
||||
// no more variants to inject for now
|
||||
return models;
|
||||
}
|
||||
|
||||
/*
|
||||
export function openRouterStatTokenizers(openRouterModels: any[]): void {
|
||||
// parse all
|
||||
|
||||
@@ -25,7 +25,7 @@ import { lmStudioModelToModelDescription, localAIModelSortFn, localAIModelToMode
|
||||
import { mistralModelsSort, mistralModelToModelDescription } from './models/mistral.models';
|
||||
import { openAIModelFilter, openAIModelToModelDescription, openAISortModels } from './models/openai.models';
|
||||
import { openPipeModelDescriptions, openPipeModelSort, openPipeModelToModelDescriptions } from './models/openpipe.models';
|
||||
import { openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
|
||||
import { openRouterInjectVariants, openRouterModelFamilySortFn, openRouterModelToModelDescription } from './models/openrouter.models';
|
||||
import { perplexityAIModelDescriptions, perplexityAIModelSort } from './models/perplexity.models';
|
||||
import { togetherAIModelsToModelDescriptions } from './models/together.models';
|
||||
import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
|
||||
@@ -183,18 +183,6 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
// [OpenAI-dialects]: fetch openAI-style for all but Azure (will be then used in each dialect)
|
||||
const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire_API_Models_List.Response>(access, '/v1/models');
|
||||
|
||||
// Log raw models from OpenRouter API
|
||||
if (access.dialect === 'openrouter') {
|
||||
console.log('[DEBUG] openai.router.ts - Raw OpenRouter Models List (sample):', JSON.stringify(openAIWireModelsResponse?.data?.slice(0, 5), null, 2));
|
||||
|
||||
// Log all Anthropic Claude models to check the exact IDs
|
||||
const claudeModels = openAIWireModelsResponse?.data?.filter((model: any) => model.id.startsWith('anthropic/claude'));
|
||||
console.log('[DEBUG] openai.router.ts - All Claude Models:', JSON.stringify(claudeModels, null, 2));
|
||||
|
||||
// Count how many models we have total from OpenRouter
|
||||
console.log('[DEBUG] openai.router.ts - Total models from OpenRouter:', openAIWireModelsResponse?.data?.length);
|
||||
}
|
||||
|
||||
// [Together] missing the .data property
|
||||
if (access.dialect === 'togetherai')
|
||||
return { models: togetherAIModelsToModelDescriptions(openAIWireModelsResponse) };
|
||||
@@ -286,90 +274,12 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
break;
|
||||
|
||||
case 'openrouter':
|
||||
console.log('[DEBUG] openai.router.ts - Processing OpenRouter models');
|
||||
|
||||
// Look for any Claude 4 models to determine actual IDs
|
||||
const allClaudeModelIds = openAIModels
|
||||
.filter((m: any) => m.id.startsWith('anthropic/claude'))
|
||||
.map((m: any) => m.id);
|
||||
|
||||
console.log('[DEBUG] openai.router.ts - All Claude model IDs:', allClaudeModelIds);
|
||||
|
||||
// Look specifically for Claude Opus 4 and Sonnet 4 models (using exact IDs)
|
||||
const claudeOpus4Models = openAIModels.filter((m: any) =>
|
||||
m.id === 'anthropic/claude-opus-4');
|
||||
const claudeSonnet4Models = openAIModels.filter((m: any) =>
|
||||
m.id === 'anthropic/claude-sonnet-4');
|
||||
|
||||
console.log('[DEBUG] openai.router.ts - Claude Opus 4 models:',
|
||||
claudeOpus4Models.map((m: any) => m.id));
|
||||
console.log('[DEBUG] openai.router.ts - Claude Sonnet 4 models:',
|
||||
claudeSonnet4Models.map((m: any) => m.id));
|
||||
|
||||
// Define the Claude model IDs that should have thinking variants
|
||||
// Use actual IDs found in the OpenRouter response if possible
|
||||
const CLAUDE_MODELS_FOR_THINKING_VARIANT = [
|
||||
// Try to find the actual model IDs dynamically if they exist
|
||||
claudeOpus4Models.length > 0 ? claudeOpus4Models[0].id : 'anthropic/claude-opus-4',
|
||||
claudeSonnet4Models.length > 0 ? claudeSonnet4Models[0].id : 'anthropic/claude-sonnet-4',
|
||||
];
|
||||
|
||||
console.log('[DEBUG] openai.router.ts - Using these IDs for thinking variants:',
|
||||
CLAUDE_MODELS_FOR_THINKING_VARIANT);
|
||||
|
||||
// openRouterStatTokenizers(openAIModels);
|
||||
models = openAIModels
|
||||
.sort(openRouterModelFamilySortFn)
|
||||
.flatMap(rawOpenRouterModel => {
|
||||
const standardDescription = openRouterModelToModelDescription(rawOpenRouterModel);
|
||||
if (!standardDescription) {
|
||||
return []; // Skip if standard model description fails
|
||||
}
|
||||
|
||||
const modelIdFromOpenRouter = rawOpenRouterModel.id;
|
||||
|
||||
// Log when we find one of our target models
|
||||
if (CLAUDE_MODELS_FOR_THINKING_VARIANT.includes(modelIdFromOpenRouter)) {
|
||||
console.log('[DEBUG] openai.router.ts - Found target Claude model:', modelIdFromOpenRouter);
|
||||
}
|
||||
|
||||
// Check if this model is one of our targets and doesn't already have a thinking indicator
|
||||
if (CLAUDE_MODELS_FOR_THINKING_VARIANT.includes(modelIdFromOpenRouter) &&
|
||||
!modelIdFromOpenRouter.includes(':thinking')) {
|
||||
console.log('[DEBUG] openai.router.ts - Creating thinking variant for:', modelIdFromOpenRouter);
|
||||
|
||||
// Create the "thinking" variant based on the standard one
|
||||
const thinkingDescription: ModelDescriptionSchema = {
|
||||
...standardDescription,
|
||||
id: `${standardDescription.id}:thinking`, // Append suffix for uniqueness
|
||||
label: `${standardDescription.label} (thinking)`,
|
||||
// Add parameter spec for the thinking budget
|
||||
parameterSpecs: [
|
||||
...(standardDescription.parameterSpecs || []),
|
||||
{
|
||||
paramId: 'llmVndAntThinkingBudget',
|
||||
initialValue: 1024,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
console.log('[DEBUG] openai.router.ts - Created standard model:', standardDescription.id, standardDescription.label);
|
||||
console.log('[DEBUG] openai.router.ts - Created thinking model:', thinkingDescription.id, thinkingDescription.label);
|
||||
|
||||
return [standardDescription, thinkingDescription];
|
||||
} else {
|
||||
return [standardDescription]; // Only the standard version
|
||||
}
|
||||
})
|
||||
.filter(desc => !!desc);
|
||||
|
||||
// Count how many models we have after processing
|
||||
console.log('[DEBUG] openai.router.ts - Total final models after processing:', models.length);
|
||||
|
||||
// Log the mapped OpenRouter model descriptions (focusing on Anthropic models)
|
||||
const claudeModelsAfterProcessing = models.filter(m => m.id && m.id.includes('anthropic/claude'));
|
||||
console.log('[DEBUG] openai.router.ts - All Claude models after processing:',
|
||||
JSON.stringify(claudeModelsAfterProcessing.map(m => ({ id: m.id, label: m.label })), null, 2));
|
||||
.map(openRouterModelToModelDescription)
|
||||
.filter(desc => !!desc)
|
||||
.reduce(openRouterInjectVariants, [] as ModelDescriptionSchema[]);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user