diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts index 2d8eecc..f89249a 100644 --- a/src/proxy/anthropic.ts +++ b/src/proxy/anthropic.ts @@ -10,6 +10,7 @@ import { ProxyResHandlerWithBody } from "./middleware/response"; import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory"; import { ProxyReqManager } from "./middleware/request/proxy-req-manager"; import { claudeModels } from "../shared/claude-models"; +import { validateClaude41OpusParameters } from "../shared/claude-4-1-validation"; let modelsCache: any = null; let modelsCacheTime = 0; @@ -172,8 +173,12 @@ function maybeReassignModel(req: Request) { * https://docs.anthropic.com/en/release-notes/api#july-15th-2024 * * Also adds the required beta header for 1-hour cache duration if requested. + * Also validates Claude 4.1 Opus parameters (temperature/top_p). */ function setAnthropicBetaHeader(req: Request) { + // Validate Claude 4.1 Opus parameters before processing + validateClaude41OpusParameters(req); + const { max_tokens_to_sample } = req.body; // Initialize beta headers array diff --git a/src/proxy/aws-claude.ts b/src/proxy/aws-claude.ts index f80f27b..a34ac5d 100644 --- a/src/proxy/aws-claude.ts +++ b/src/proxy/aws-claude.ts @@ -12,6 +12,8 @@ import { } from "./middleware/request"; import { ProxyResHandlerWithBody } from "./middleware/response"; import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory"; +import { ProxyReqManager } from "./middleware/request/proxy-req-manager"; +import { validateClaude41OpusParameters } from "../shared/claude-4-1-validation"; const awsBlockingResponseHandler: ProxyResHandlerWithBody = async ( _proxyRes, @@ -167,6 +169,9 @@ awsClaudeRouter.post( * strategies are used to try to map a non-AWS model name to AWS model ID. */ function maybeReassignModel(req: Request) { + // Validate Claude 4.1 Opus parameters before processing + validateClaude41OpusParameters(req); + const model = req.body.model; // If it looks like an AWS model, use it as-is @@ -202,7 +207,7 @@ function maybeReassignModel(req: Request) { // New format: claude-sonnet-4-20250514 // match[9] = sonnet-/opus-/haiku- // match[10] = 4 (major version) - // match[12] = minor version (if any) + // match[12] = minor version (if any, from [.-](\d) pattern) // match[14] = revision (latest or date) const modelType = match[9]?.match(/([a-z]+)/)?.[1] || ""; name = modelType; @@ -210,6 +215,13 @@ function maybeReassignModel(req: Request) { minor = match[12]; rev = match[14]; + // Special case: if revision is a single digit and no minor version, + // treat revision as minor version (e.g., claude-opus-4-1 -> version 4.1) + if (!minor && rev && /^\d$/.test(rev)) { + minor = rev; + rev = undefined; + } + // Handle instant case for completeness const instant = match[1]; if (instant) { @@ -311,6 +323,16 @@ function maybeReassignModel(req: Request) { // in claude-models.ts. It will fall through and throw an error. } break; + case "4.1": + // Mapping "claude-4.1-..." variants to their actual AWS Bedrock IDs + // as defined in src/shared/claude-models.ts. + switch (name) { + case "opus": + req.body.model = "anthropic.claude-opus-4-1-20250805-v1:0"; + return; + // No sonnet or haiku variants for 4.1 yet + } + break; } throw new Error(`Provided model name (${model}) could not be mapped to a known AWS Claude model ID.`); diff --git a/src/proxy/gcp.ts b/src/proxy/gcp.ts index 45388c4..6543141 100644 --- a/src/proxy/gcp.ts +++ b/src/proxy/gcp.ts @@ -9,6 +9,7 @@ import { } from "./middleware/request"; import { ProxyResHandlerWithBody } from "./middleware/response"; import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory"; +import { validateClaude41OpusParameters } from "../shared/claude-4-1-validation"; const LATEST_GCP_SONNET_MINOR_VERSION = "20240229"; @@ -26,11 +27,12 @@ const getModelsResponse = () => { const variants = [ "claude-3-haiku@20240307", "claude-3-5-haiku@20241022", - "claude-3-sonnet@20240229", "claude-3-5-sonnet@20240620", "claude-3-5-sonnet-v2@20241022", "claude-3-7-sonnet@20250219", - "claude-3-opus@20240229", + "claude-sonnet-4@20250514", + "claude-opus-4@20250514", + "claude-opus-4-1@20250805", ]; const models = variants.map((id) => ({ @@ -129,6 +131,9 @@ gcpRouter.post( * strategies are used to try to map a non-GCP model name to GCP model ID. */ function maybeReassignModel(req: Request) { + // Validate Claude 4.1 Opus parameters before processing + validateClaude41OpusParameters(req); + const model = req.body.model; const DEFAULT_MODEL = "claude-3-5-sonnet-v2@20241022"; @@ -143,14 +148,33 @@ function maybeReassignModel(req: Request) { // - claude-3-5-haiku // - claude-3-5-haiku-latest // - claude-3-5-sonnet-20240620 - const pattern = /^claude-(\d+)[.-]?(\d)?-(sonnet|opus|haiku)(?:-(latest|\d+))?/i; + // - claude-opus-4-1 (new format) + // - claude-4.1-opus (alternative format) + const pattern = /^claude-(?:(\d+)[.-]?(\d)?-(sonnet|opus|haiku)(?:-(latest|\d+))?|(opus|sonnet|haiku)-(\d+)[.-]?(\d)?(?:-(latest|\d+))?)/i; const match = model.match(pattern); if (!match) { req.body.model = DEFAULT_MODEL; return; } - const [_, major, minor, flavor, rev] = match; + // Handle both formats: claude-3-5-sonnet and claude-opus-4-1 + const [_, major1, minor1, flavor1, rev1, flavor2, major2, minor2, rev2] = match; + + let major, minor, flavor, rev; + if (major1) { + // Old format: claude-3-5-sonnet + major = major1; + minor = minor1; + flavor = flavor1; + rev = rev1; + } else { + // New format: claude-opus-4-1 + major = major2; + minor = minor2; + flavor = flavor2; + rev = rev2; + } + const ver = minor ? `${major}.${minor}` : major; switch (ver) { @@ -201,6 +225,30 @@ function maybeReassignModel(req: Request) { } break; + case "4": + case "4.0": + switch (flavor) { + case "opus": + req.body.model = "claude-opus-4@20250514"; + return; + case "sonnet": + req.body.model = "claude-sonnet-4@20250514"; + return; + default: + req.body.model = DEFAULT_MODEL; + } + break; + + case "4.1": + switch (flavor) { + case "opus": + req.body.model = "claude-opus-4-1@20250805"; + return; + default: + req.body.model = DEFAULT_MODEL; + } + break; + default: req.body.model = DEFAULT_MODEL; } diff --git a/src/service-info.ts b/src/service-info.ts index 78e9817..19f1dfa 100644 --- a/src/service-info.ts +++ b/src/service-info.ts @@ -445,8 +445,10 @@ function addKeyToAggregates(k: KeyPoolKey) { addToFamily(`aws-claude__awsSonnet4`, 1); } else if (id.includes("claude-3-opus")) { addToFamily(`aws-claude__awsOpus3`, 1); + addToFamily(`aws-claude-opus__awsOpus3`, 1); } else if (id.includes("opus-4")) { addToFamily(`aws-claude__awsOpus4`, 1); + addToFamily(`aws-claude-opus__awsOpus4`, 1); } else if (id.includes("claude-v2")) { addToFamily(`aws-claude__awsClaude2`, 1); } diff --git a/src/shared/claude-4-1-validation.ts b/src/shared/claude-4-1-validation.ts new file mode 100644 index 0000000..d9f1516 --- /dev/null +++ b/src/shared/claude-4-1-validation.ts @@ -0,0 +1,82 @@ +import { Request } from "express"; + +/** + * Claude Opus 4.1 has stricter API validation that doesn't allow both temperature + * and top_p parameters to be specified simultaneously. This function validates and + * adjusts the request parameters for Claude Opus 4.1 models ONLY. + * + * Rules: + * - If both parameters are at default values (1.0), omit top_p + * - If only one parameter is at default, omit the default one + * - If both are non-default, throw an error + */ +export function validateClaude41OpusParameters(req: Request): void { + const model = req.body.model; + + // Only apply this validation to Claude Opus 4.1 models + if (!isClaude41OpusModel(model)) { + return; + } + + const temperature = req.body.temperature; + const topP = req.body.top_p; + + // If neither parameter is specified, no validation needed + if (temperature === undefined && topP === undefined) { + return; + } + + // Default values for Claude API + const DEFAULT_TEMPERATURE = 1.0; + const DEFAULT_TOP_P = 1.0; + + const tempIsDefault = temperature === undefined || temperature === DEFAULT_TEMPERATURE; + const topPIsDefault = topP === undefined || topP === DEFAULT_TOP_P; + + // If both are at default values, omit top_p (keep temperature) + if (tempIsDefault && topPIsDefault) { + delete req.body.top_p; + req.log?.info("Claude Opus 4.1: Both temperature and top_p at default, omitting top_p"); + return; + } + + // If only one is at default, omit the default one + if (tempIsDefault && !topPIsDefault) { + delete req.body.temperature; + req.log?.info("Claude Opus 4.1: Temperature at default, omitting temperature"); + return; + } + + if (!tempIsDefault && topPIsDefault) { + delete req.body.top_p; + req.log?.info("Claude Opus 4.1: top_p at default, omitting top_p"); + return; + } + + // If both are non-default, throw an error + if (!tempIsDefault && !topPIsDefault) { + throw new Error( + "Claude Opus 4.1 does not support both temperature and top_p parameters being set to non-default values simultaneously. " + + "Please specify only one of these parameters or set one to its default value (1.0)." + ); + } +} + +/** + * Checks if the given model is a Claude Opus 4.1 model. + * This includes all provider formats for Claude Opus 4.1 ONLY. + */ +function isClaude41OpusModel(model: string): boolean { + if (!model) return false; + + // Anthropic API format + if (model.includes("claude-opus-4-1")) return true; + + // AWS Bedrock format + if (model.includes("anthropic.claude-opus-4-1")) return true; + + // GCP Vertex AI format + if (model.includes("claude-opus-4-1@")) return true; + + return false; +} diff --git a/src/shared/claude-models.ts b/src/shared/claude-models.ts index 050a1ab..e445f93 100644 --- a/src/shared/claude-models.ts +++ b/src/shared/claude-models.ts @@ -19,16 +19,22 @@ export const claudeModels: ClaudeModelMapping[] = [ { awsId: "anthropic.claude-3-opus-20240229-v1:0", anthropicId: "claude-3-opus-latest", displayName: "Claude 3 Opus (Latest)" }, { awsId: "anthropic.claude-sonnet-4-20250514-v1:0", anthropicId: "claude-sonnet-4-20250514", displayName: "Claude 4 Sonnet" }, { awsId: "anthropic.claude-sonnet-4-20250514-v1:0", anthropicId: "claude-sonnet-4-latest", displayName: "Claude 4 Sonnet (Latest)" }, - { awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-20250514", displayName: "Claude 4 Opus" }, - { awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-latest", displayName: "Claude 4 Opus (Latest)" }, + { awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-20250514", displayName: "Claude 4.0 Opus" }, + { awsId: "anthropic.claude-opus-4-1-20250805-v1:0", anthropicId: "claude-opus-4-1-20250805", displayName: "Claude 4.1 Opus" }, + { awsId: "anthropic.claude-opus-4-1-20250805-v1:0", anthropicId: "claude-opus-4-latest", displayName: "Claude 4 Opus (Latest)" }, + { awsId: "anthropic.claude-opus-4-1-20250805-v1:0", anthropicId: "claude-opus-4-1", displayName: "Claude 4.1 Opus" }, { awsId: "anthropic.claude-sonnet-4-20250514-v1:0", anthropicId: "claude-sonnet-4-0", displayName: "Claude 4 Sonnet" }, - { awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-0", displayName: "Claude 4 Opus" }, + { awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-0", displayName: "Claude 4.0 Opus" }, ]; export function findByAwsId(awsId: string): ClaudeModelMapping | undefined { return claudeModels.find(model => model.awsId === awsId); } +export function findByAnthropicId(anthropicId: string): ClaudeModelMapping | undefined { + return claudeModels.find(model => model.anthropicId === anthropicId); +} + export function getAllClaudeModels(): ClaudeModelMapping[] { return claudeModels; } \ No newline at end of file diff --git a/src/shared/key-management/aws/checker.ts b/src/shared/key-management/aws/checker.ts index 34c3850..23b5b25 100644 --- a/src/shared/key-management/aws/checker.ts +++ b/src/shared/key-management/aws/checker.ts @@ -27,6 +27,7 @@ const KNOWN_MODEL_IDS: ModuleAliasTuple[] = [ ["anthropic.claude-3-7-sonnet-20250219-v1:0"], ["anthropic.claude-sonnet-4-20250514-v1:0"], ["anthropic.claude-opus-4-20250514-v1:0"], + ["anthropic.claude-opus-4-1-20250805-v1:0"], ["mistral.mistral-7b-instruct-v0:2"], ["mistral.mixtral-8x7b-instruct-v0:1"], ["mistral.mistral-large-2402-v1:0"], diff --git a/src/shared/key-management/aws/provider.ts b/src/shared/key-management/aws/provider.ts index 408c710..80ba47e 100644 --- a/src/shared/key-management/aws/provider.ts +++ b/src/shared/key-management/aws/provider.ts @@ -3,6 +3,7 @@ import { config } from "../../../config"; import { logger } from "../../../logger"; import { PaymentRequiredError } from "../../errors"; import { AwsBedrockModelFamily, getAwsBedrockModelFamily } from "../../models"; +import { findByAnthropicId } from "../../claude-models"; import { createGenericGetLockoutPeriod, Key, KeyProvider } from ".."; import { prioritizeKeys } from "../prioritize-keys"; import { AwsKeyChecker } from "./checker"; @@ -96,6 +97,15 @@ export class AwsBedrockKeyProvider implements KeyProvider { // Claude 2 is the only model that breaks this convention; Anthropic calls // it claude-2 but AWS calls it claude-v2. if (model.includes("claude-2")) neededVariantId = "claude-v2"; + + // For Claude models, try to resolve aliases to AWS model IDs + if (model.includes("claude") && !model.includes("anthropic.")) { + const claudeMapping = findByAnthropicId(model); + if (claudeMapping) { + neededVariantId = claudeMapping.awsId; + } + } + const neededFamily = getAwsBedrockModelFamily(model); const availableKeys = this.keys.filter((k) => { diff --git a/src/shared/key-management/gcp/checker.ts b/src/shared/key-management/gcp/checker.ts index 308773d..1184690 100644 --- a/src/shared/key-management/gcp/checker.ts +++ b/src/shared/key-management/gcp/checker.ts @@ -42,19 +42,20 @@ export class GcpKeyChecker extends KeyCheckerBase { this.invokeModel("claude-3-haiku@20240307", key, true), this.invokeModel("claude-3-sonnet@20240229", key, true), this.invokeModel("claude-3-opus@20240229", key, true), + this.invokeModel("claude-opus-4-1@20250805", key, true), this.invokeModel("claude-3-5-sonnet-v2@20241022", key, true), ]; - const [sonnet, haiku, opus, sonnet35] = await Promise.all(checks); + const [sonnet, haiku, opus3, opus41, sonnet35] = await Promise.all(checks); this.log.debug( - { key: key.hash, sonnet, haiku, opus, sonnet35 }, + { key: key.hash, sonnet, haiku, opus3, opus41, sonnet35 }, "GCP model initial tests complete." ); const families: GcpModelFamily[] = []; if (sonnet || sonnet35 || haiku) families.push("gcp-claude"); - if (opus) families.push("gcp-claude-opus"); + if (opus3 || opus41) families.push("gcp-claude-opus"); if (families.length === 0) { this.log.warn( @@ -81,6 +82,7 @@ export class GcpKeyChecker extends KeyCheckerBase { await this.invokeModel("claude-3-5-sonnet-v2@20241022", key, false); } else { await this.invokeModel("claude-3-opus@20240229", key, false); + await this.invokeModel("claude-opus-4-1@20250805", key, false); } this.updateKey(key.hash, { lastChecked: Date.now() });