opus 4.1
This commit is contained in:
@@ -10,6 +10,7 @@ import { ProxyResHandlerWithBody } from "./middleware/response";
|
||||
import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory";
|
||||
import { ProxyReqManager } from "./middleware/request/proxy-req-manager";
|
||||
import { claudeModels } from "../shared/claude-models";
|
||||
import { validateClaude41OpusParameters } from "../shared/claude-4-1-validation";
|
||||
|
||||
let modelsCache: any = null;
|
||||
let modelsCacheTime = 0;
|
||||
@@ -172,8 +173,12 @@ function maybeReassignModel(req: Request) {
|
||||
* https://docs.anthropic.com/en/release-notes/api#july-15th-2024
|
||||
*
|
||||
* Also adds the required beta header for 1-hour cache duration if requested.
|
||||
* Also validates Claude 4.1 Opus parameters (temperature/top_p).
|
||||
*/
|
||||
function setAnthropicBetaHeader(req: Request) {
|
||||
// Validate Claude 4.1 Opus parameters before processing
|
||||
validateClaude41OpusParameters(req);
|
||||
|
||||
const { max_tokens_to_sample } = req.body;
|
||||
|
||||
// Initialize beta headers array
|
||||
|
||||
+23
-1
@@ -12,6 +12,8 @@ import {
|
||||
} from "./middleware/request";
|
||||
import { ProxyResHandlerWithBody } from "./middleware/response";
|
||||
import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory";
|
||||
import { ProxyReqManager } from "./middleware/request/proxy-req-manager";
|
||||
import { validateClaude41OpusParameters } from "../shared/claude-4-1-validation";
|
||||
|
||||
const awsBlockingResponseHandler: ProxyResHandlerWithBody = async (
|
||||
_proxyRes,
|
||||
@@ -167,6 +169,9 @@ awsClaudeRouter.post(
|
||||
* strategies are used to try to map a non-AWS model name to AWS model ID.
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
// Validate Claude 4.1 Opus parameters before processing
|
||||
validateClaude41OpusParameters(req);
|
||||
|
||||
const model = req.body.model;
|
||||
|
||||
// If it looks like an AWS model, use it as-is
|
||||
@@ -202,7 +207,7 @@ function maybeReassignModel(req: Request) {
|
||||
// New format: claude-sonnet-4-20250514
|
||||
// match[9] = sonnet-/opus-/haiku-
|
||||
// match[10] = 4 (major version)
|
||||
// match[12] = minor version (if any)
|
||||
// match[12] = minor version (if any, from [.-](\d) pattern)
|
||||
// match[14] = revision (latest or date)
|
||||
const modelType = match[9]?.match(/([a-z]+)/)?.[1] || "";
|
||||
name = modelType;
|
||||
@@ -210,6 +215,13 @@ function maybeReassignModel(req: Request) {
|
||||
minor = match[12];
|
||||
rev = match[14];
|
||||
|
||||
// Special case: if revision is a single digit and no minor version,
|
||||
// treat revision as minor version (e.g., claude-opus-4-1 -> version 4.1)
|
||||
if (!minor && rev && /^\d$/.test(rev)) {
|
||||
minor = rev;
|
||||
rev = undefined;
|
||||
}
|
||||
|
||||
// Handle instant case for completeness
|
||||
const instant = match[1];
|
||||
if (instant) {
|
||||
@@ -311,6 +323,16 @@ function maybeReassignModel(req: Request) {
|
||||
// in claude-models.ts. It will fall through and throw an error.
|
||||
}
|
||||
break;
|
||||
case "4.1":
|
||||
// Mapping "claude-4.1-..." variants to their actual AWS Bedrock IDs
|
||||
// as defined in src/shared/claude-models.ts.
|
||||
switch (name) {
|
||||
case "opus":
|
||||
req.body.model = "anthropic.claude-opus-4-1-20250805-v1:0";
|
||||
return;
|
||||
// No sonnet or haiku variants for 4.1 yet
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
throw new Error(`Provided model name (${model}) could not be mapped to a known AWS Claude model ID.`);
|
||||
|
||||
+52
-4
@@ -9,6 +9,7 @@ import {
|
||||
} from "./middleware/request";
|
||||
import { ProxyResHandlerWithBody } from "./middleware/response";
|
||||
import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory";
|
||||
import { validateClaude41OpusParameters } from "../shared/claude-4-1-validation";
|
||||
|
||||
const LATEST_GCP_SONNET_MINOR_VERSION = "20240229";
|
||||
|
||||
@@ -26,11 +27,12 @@ const getModelsResponse = () => {
|
||||
const variants = [
|
||||
"claude-3-haiku@20240307",
|
||||
"claude-3-5-haiku@20241022",
|
||||
"claude-3-sonnet@20240229",
|
||||
"claude-3-5-sonnet@20240620",
|
||||
"claude-3-5-sonnet-v2@20241022",
|
||||
"claude-3-7-sonnet@20250219",
|
||||
"claude-3-opus@20240229",
|
||||
"claude-sonnet-4@20250514",
|
||||
"claude-opus-4@20250514",
|
||||
"claude-opus-4-1@20250805",
|
||||
];
|
||||
|
||||
const models = variants.map((id) => ({
|
||||
@@ -129,6 +131,9 @@ gcpRouter.post(
|
||||
* strategies are used to try to map a non-GCP model name to GCP model ID.
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
// Validate Claude 4.1 Opus parameters before processing
|
||||
validateClaude41OpusParameters(req);
|
||||
|
||||
const model = req.body.model;
|
||||
const DEFAULT_MODEL = "claude-3-5-sonnet-v2@20241022";
|
||||
|
||||
@@ -143,14 +148,33 @@ function maybeReassignModel(req: Request) {
|
||||
// - claude-3-5-haiku
|
||||
// - claude-3-5-haiku-latest
|
||||
// - claude-3-5-sonnet-20240620
|
||||
const pattern = /^claude-(\d+)[.-]?(\d)?-(sonnet|opus|haiku)(?:-(latest|\d+))?/i;
|
||||
// - claude-opus-4-1 (new format)
|
||||
// - claude-4.1-opus (alternative format)
|
||||
const pattern = /^claude-(?:(\d+)[.-]?(\d)?-(sonnet|opus|haiku)(?:-(latest|\d+))?|(opus|sonnet|haiku)-(\d+)[.-]?(\d)?(?:-(latest|\d+))?)/i;
|
||||
const match = model.match(pattern);
|
||||
if (!match) {
|
||||
req.body.model = DEFAULT_MODEL;
|
||||
return;
|
||||
}
|
||||
|
||||
const [_, major, minor, flavor, rev] = match;
|
||||
// Handle both formats: claude-3-5-sonnet and claude-opus-4-1
|
||||
const [_, major1, minor1, flavor1, rev1, flavor2, major2, minor2, rev2] = match;
|
||||
|
||||
let major, minor, flavor, rev;
|
||||
if (major1) {
|
||||
// Old format: claude-3-5-sonnet
|
||||
major = major1;
|
||||
minor = minor1;
|
||||
flavor = flavor1;
|
||||
rev = rev1;
|
||||
} else {
|
||||
// New format: claude-opus-4-1
|
||||
major = major2;
|
||||
minor = minor2;
|
||||
flavor = flavor2;
|
||||
rev = rev2;
|
||||
}
|
||||
|
||||
const ver = minor ? `${major}.${minor}` : major;
|
||||
|
||||
switch (ver) {
|
||||
@@ -201,6 +225,30 @@ function maybeReassignModel(req: Request) {
|
||||
}
|
||||
break;
|
||||
|
||||
case "4":
|
||||
case "4.0":
|
||||
switch (flavor) {
|
||||
case "opus":
|
||||
req.body.model = "claude-opus-4@20250514";
|
||||
return;
|
||||
case "sonnet":
|
||||
req.body.model = "claude-sonnet-4@20250514";
|
||||
return;
|
||||
default:
|
||||
req.body.model = DEFAULT_MODEL;
|
||||
}
|
||||
break;
|
||||
|
||||
case "4.1":
|
||||
switch (flavor) {
|
||||
case "opus":
|
||||
req.body.model = "claude-opus-4-1@20250805";
|
||||
return;
|
||||
default:
|
||||
req.body.model = DEFAULT_MODEL;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
req.body.model = DEFAULT_MODEL;
|
||||
}
|
||||
|
||||
@@ -445,8 +445,10 @@ function addKeyToAggregates(k: KeyPoolKey) {
|
||||
addToFamily(`aws-claude__awsSonnet4`, 1);
|
||||
} else if (id.includes("claude-3-opus")) {
|
||||
addToFamily(`aws-claude__awsOpus3`, 1);
|
||||
addToFamily(`aws-claude-opus__awsOpus3`, 1);
|
||||
} else if (id.includes("opus-4")) {
|
||||
addToFamily(`aws-claude__awsOpus4`, 1);
|
||||
addToFamily(`aws-claude-opus__awsOpus4`, 1);
|
||||
} else if (id.includes("claude-v2")) {
|
||||
addToFamily(`aws-claude__awsClaude2`, 1);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
import { Request } from "express";
|
||||
|
||||
/**
|
||||
* Claude Opus 4.1 has stricter API validation that doesn't allow both temperature
|
||||
* and top_p parameters to be specified simultaneously. This function validates and
|
||||
* adjusts the request parameters for Claude Opus 4.1 models ONLY.
|
||||
*
|
||||
* Rules:
|
||||
* - If both parameters are at default values (1.0), omit top_p
|
||||
* - If only one parameter is at default, omit the default one
|
||||
* - If both are non-default, throw an error
|
||||
*/
|
||||
export function validateClaude41OpusParameters(req: Request): void {
|
||||
const model = req.body.model;
|
||||
|
||||
// Only apply this validation to Claude Opus 4.1 models
|
||||
if (!isClaude41OpusModel(model)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const temperature = req.body.temperature;
|
||||
const topP = req.body.top_p;
|
||||
|
||||
// If neither parameter is specified, no validation needed
|
||||
if (temperature === undefined && topP === undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Default values for Claude API
|
||||
const DEFAULT_TEMPERATURE = 1.0;
|
||||
const DEFAULT_TOP_P = 1.0;
|
||||
|
||||
const tempIsDefault = temperature === undefined || temperature === DEFAULT_TEMPERATURE;
|
||||
const topPIsDefault = topP === undefined || topP === DEFAULT_TOP_P;
|
||||
|
||||
// If both are at default values, omit top_p (keep temperature)
|
||||
if (tempIsDefault && topPIsDefault) {
|
||||
delete req.body.top_p;
|
||||
req.log?.info("Claude Opus 4.1: Both temperature and top_p at default, omitting top_p");
|
||||
return;
|
||||
}
|
||||
|
||||
// If only one is at default, omit the default one
|
||||
if (tempIsDefault && !topPIsDefault) {
|
||||
delete req.body.temperature;
|
||||
req.log?.info("Claude Opus 4.1: Temperature at default, omitting temperature");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!tempIsDefault && topPIsDefault) {
|
||||
delete req.body.top_p;
|
||||
req.log?.info("Claude Opus 4.1: top_p at default, omitting top_p");
|
||||
return;
|
||||
}
|
||||
|
||||
// If both are non-default, throw an error
|
||||
if (!tempIsDefault && !topPIsDefault) {
|
||||
throw new Error(
|
||||
"Claude Opus 4.1 does not support both temperature and top_p parameters being set to non-default values simultaneously. " +
|
||||
"Please specify only one of these parameters or set one to its default value (1.0)."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given model is a Claude Opus 4.1 model.
|
||||
* This includes all provider formats for Claude Opus 4.1 ONLY.
|
||||
*/
|
||||
function isClaude41OpusModel(model: string): boolean {
|
||||
if (!model) return false;
|
||||
|
||||
// Anthropic API format
|
||||
if (model.includes("claude-opus-4-1")) return true;
|
||||
|
||||
// AWS Bedrock format
|
||||
if (model.includes("anthropic.claude-opus-4-1")) return true;
|
||||
|
||||
// GCP Vertex AI format
|
||||
if (model.includes("claude-opus-4-1@")) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -19,16 +19,22 @@ export const claudeModels: ClaudeModelMapping[] = [
|
||||
{ awsId: "anthropic.claude-3-opus-20240229-v1:0", anthropicId: "claude-3-opus-latest", displayName: "Claude 3 Opus (Latest)" },
|
||||
{ awsId: "anthropic.claude-sonnet-4-20250514-v1:0", anthropicId: "claude-sonnet-4-20250514", displayName: "Claude 4 Sonnet" },
|
||||
{ awsId: "anthropic.claude-sonnet-4-20250514-v1:0", anthropicId: "claude-sonnet-4-latest", displayName: "Claude 4 Sonnet (Latest)" },
|
||||
{ awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-20250514", displayName: "Claude 4 Opus" },
|
||||
{ awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-latest", displayName: "Claude 4 Opus (Latest)" },
|
||||
{ awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-20250514", displayName: "Claude 4.0 Opus" },
|
||||
{ awsId: "anthropic.claude-opus-4-1-20250805-v1:0", anthropicId: "claude-opus-4-1-20250805", displayName: "Claude 4.1 Opus" },
|
||||
{ awsId: "anthropic.claude-opus-4-1-20250805-v1:0", anthropicId: "claude-opus-4-latest", displayName: "Claude 4 Opus (Latest)" },
|
||||
{ awsId: "anthropic.claude-opus-4-1-20250805-v1:0", anthropicId: "claude-opus-4-1", displayName: "Claude 4.1 Opus" },
|
||||
{ awsId: "anthropic.claude-sonnet-4-20250514-v1:0", anthropicId: "claude-sonnet-4-0", displayName: "Claude 4 Sonnet" },
|
||||
{ awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-0", displayName: "Claude 4 Opus" },
|
||||
{ awsId: "anthropic.claude-opus-4-20250514-v1:0", anthropicId: "claude-opus-4-0", displayName: "Claude 4.0 Opus" },
|
||||
];
|
||||
|
||||
export function findByAwsId(awsId: string): ClaudeModelMapping | undefined {
|
||||
return claudeModels.find(model => model.awsId === awsId);
|
||||
}
|
||||
|
||||
export function findByAnthropicId(anthropicId: string): ClaudeModelMapping | undefined {
|
||||
return claudeModels.find(model => model.anthropicId === anthropicId);
|
||||
}
|
||||
|
||||
export function getAllClaudeModels(): ClaudeModelMapping[] {
|
||||
return claudeModels;
|
||||
}
|
||||
@@ -27,6 +27,7 @@ const KNOWN_MODEL_IDS: ModuleAliasTuple[] = [
|
||||
["anthropic.claude-3-7-sonnet-20250219-v1:0"],
|
||||
["anthropic.claude-sonnet-4-20250514-v1:0"],
|
||||
["anthropic.claude-opus-4-20250514-v1:0"],
|
||||
["anthropic.claude-opus-4-1-20250805-v1:0"],
|
||||
["mistral.mistral-7b-instruct-v0:2"],
|
||||
["mistral.mixtral-8x7b-instruct-v0:1"],
|
||||
["mistral.mistral-large-2402-v1:0"],
|
||||
|
||||
@@ -3,6 +3,7 @@ import { config } from "../../../config";
|
||||
import { logger } from "../../../logger";
|
||||
import { PaymentRequiredError } from "../../errors";
|
||||
import { AwsBedrockModelFamily, getAwsBedrockModelFamily } from "../../models";
|
||||
import { findByAnthropicId } from "../../claude-models";
|
||||
import { createGenericGetLockoutPeriod, Key, KeyProvider } from "..";
|
||||
import { prioritizeKeys } from "../prioritize-keys";
|
||||
import { AwsKeyChecker } from "./checker";
|
||||
@@ -96,6 +97,15 @@ export class AwsBedrockKeyProvider implements KeyProvider<AwsBedrockKey> {
|
||||
// Claude 2 is the only model that breaks this convention; Anthropic calls
|
||||
// it claude-2 but AWS calls it claude-v2.
|
||||
if (model.includes("claude-2")) neededVariantId = "claude-v2";
|
||||
|
||||
// For Claude models, try to resolve aliases to AWS model IDs
|
||||
if (model.includes("claude") && !model.includes("anthropic.")) {
|
||||
const claudeMapping = findByAnthropicId(model);
|
||||
if (claudeMapping) {
|
||||
neededVariantId = claudeMapping.awsId;
|
||||
}
|
||||
}
|
||||
|
||||
const neededFamily = getAwsBedrockModelFamily(model);
|
||||
|
||||
const availableKeys = this.keys.filter((k) => {
|
||||
|
||||
@@ -42,19 +42,20 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
|
||||
this.invokeModel("claude-3-haiku@20240307", key, true),
|
||||
this.invokeModel("claude-3-sonnet@20240229", key, true),
|
||||
this.invokeModel("claude-3-opus@20240229", key, true),
|
||||
this.invokeModel("claude-opus-4-1@20250805", key, true),
|
||||
this.invokeModel("claude-3-5-sonnet-v2@20241022", key, true),
|
||||
];
|
||||
|
||||
const [sonnet, haiku, opus, sonnet35] = await Promise.all(checks);
|
||||
const [sonnet, haiku, opus3, opus41, sonnet35] = await Promise.all(checks);
|
||||
|
||||
this.log.debug(
|
||||
{ key: key.hash, sonnet, haiku, opus, sonnet35 },
|
||||
{ key: key.hash, sonnet, haiku, opus3, opus41, sonnet35 },
|
||||
"GCP model initial tests complete."
|
||||
);
|
||||
|
||||
const families: GcpModelFamily[] = [];
|
||||
if (sonnet || sonnet35 || haiku) families.push("gcp-claude");
|
||||
if (opus) families.push("gcp-claude-opus");
|
||||
if (opus3 || opus41) families.push("gcp-claude-opus");
|
||||
|
||||
if (families.length === 0) {
|
||||
this.log.warn(
|
||||
@@ -81,6 +82,7 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
|
||||
await this.invokeModel("claude-3-5-sonnet-v2@20241022", key, false);
|
||||
} else {
|
||||
await this.invokeModel("claude-3-opus@20240229", key, false);
|
||||
await this.invokeModel("claude-opus-4-1@20250805", key, false);
|
||||
}
|
||||
|
||||
this.updateKey(key.hash, { lastChecked: Date.now() });
|
||||
|
||||
Reference in New Issue
Block a user