diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts index c1e037d..7f9228c 100644 --- a/src/proxy/anthropic.ts +++ b/src/proxy/anthropic.ts @@ -196,10 +196,11 @@ function setAnthropicBetaHeader(req: Request) { betaHeaders.push("extended-cache-ttl-2025-04-11"); } - // Add 1M context beta header for Claude Sonnet 4/Opus 4 if context > 200k tokens + // Add 1M context beta header for Claude Sonnet 4/Opus 4/Haiku 4.5 if context > 200k tokens const supportsBigContext = model?.includes("claude-sonnet-4") || - model?.includes("claude-opus-4"); + model?.includes("claude-opus-4") || + model?.includes("claude-haiku-4"); if (supportsBigContext && req.promptTokens && req.outputTokens) { const contextTokens = req.promptTokens + req.outputTokens; if (contextTokens > 200000) { @@ -226,7 +227,7 @@ function addWebSearchTool(req: Request) { // Check if this is a Claude model that supports web search and if web search is enabled const isClaude35 = req.body.model?.includes("claude-3-5") || req.body.model?.includes("claude-3.5"); const isClaude37 = req.body.model?.includes("claude-3-7") || req.body.model?.includes("claude-3.7"); - const isClaude4 = req.body.model?.includes("claude-sonnet-4") || req.body.model?.includes("claude-opus-4"); + const isClaude4 = req.body.model?.includes("claude-sonnet-4") || req.body.model?.includes("claude-opus-4") || req.body.model?.includes("claude-haiku-4"); const useWebSearch = (isClaude35 || isClaude37 || isClaude4) && Boolean(req.body.enable_web_search); if (useWebSearch) { @@ -328,7 +329,8 @@ const preprocessAnthropicTextRequest: RequestHandler = (req, res, next) => { const model = req.body.model; const isClaude4Model = model?.includes("claude-sonnet-4") || - model?.includes("claude-opus-4"); + model?.includes("claude-opus-4") || + model?.includes("claude-haiku-4"); if (model?.startsWith("claude-3") || isClaude4Model) { textToChatPreprocessor(req, res, next); } else { @@ -363,7 +365,8 @@ const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => { const model = req.body.model; const isClaude4 = model?.includes("claude-sonnet-4") || - model?.includes("claude-opus-4"); + model?.includes("claude-opus-4") || + model?.includes("claude-haiku-4"); if (model?.includes("claude-3") || isClaude4) { oaiToChatPreprocessor(req, res, next); } else { diff --git a/src/proxy/aws-claude.ts b/src/proxy/aws-claude.ts index f2e5c12..91a434f 100644 --- a/src/proxy/aws-claude.ts +++ b/src/proxy/aws-claude.ts @@ -104,7 +104,8 @@ const preprocessAwsTextRequest: RequestHandler = (req, res, next) => { const model = req.body.model; const isClaude4Model = model?.includes("claude-sonnet-4") || - model?.includes("claude-opus-4"); + model?.includes("claude-opus-4") || + model?.includes("claude-haiku-4"); if (model?.includes("claude-3") || isClaude4Model) { textToChatPreprocessor(req, res, next); } else { @@ -130,7 +131,8 @@ const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => { const model = req.body.model; const isClaude4Model = model?.includes("claude-sonnet-4") || - model?.includes("claude-opus-4"); + model?.includes("claude-opus-4") || + model?.includes("claude-haiku-4"); if (model?.includes("claude-3") || isClaude4Model) { oaiToAwsChatPreprocessor(req, res, next); } else { @@ -345,10 +347,13 @@ function maybeReassignModel(req: Request) { // Mapping "claude-4.5-..." variants to their actual AWS Bedrock IDs // as defined in src/shared/claude-models.ts. switch (name) { + case "haiku": + req.body.model = "anthropic.claude-haiku-4-5-20251001-v1:0"; + return; case "sonnet": req.body.model = "anthropic.claude-sonnet-4-5-20250929-v1:0"; return; - // No opus or haiku variants for 4.5 yet + // No opus variant for 4.5 yet } break; } diff --git a/src/proxy/gcp.ts b/src/proxy/gcp.ts index 1366dd2..f392e85 100644 --- a/src/proxy/gcp.ts +++ b/src/proxy/gcp.ts @@ -34,6 +34,7 @@ const getModelsResponse = () => { "claude-opus-4@20250514", "claude-opus-4-1@20250805", "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", ]; const models = variants.map((id) => ({ @@ -252,6 +253,9 @@ function maybeReassignModel(req: Request) { case "4.5": switch (flavor) { + case "haiku": + req.body.model = "claude-haiku-4-5@20251001"; + return; case "sonnet": req.body.model = "claude-sonnet-4-5@20250929"; return; diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts index 5f3fece..e1f83ce 100644 --- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts +++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts @@ -123,13 +123,13 @@ export const validateContextSize: RequestPreprocessor = async (req) => { modelMax = 200000; } else if (model.match(/^claude-3/)) { modelMax = 200000; - } else if (model.match(/^claude-(?:sonnet|opus)-4(?:-5)?/)) { + } else if (model.match(/^claude-(?:sonnet|opus|haiku)-4(?:-5)?/)) { modelMax = 1000000; } else if (model.match(/^gemini-/)) { modelMax = 1024000; } else if (model.match(/^anthropic\.claude-3/)) { modelMax = 200000; - } else if (model.match(/^anthropic\.claude-(?:sonnet|opus)-4(?:-5)?/)) { + } else if (model.match(/^anthropic\.claude-(?:sonnet|opus|haiku)-4(?:-5)?/)) { modelMax = 1000000; } else if (model.match(/^anthropic\.claude-v2:\d/)) { modelMax = 200000; diff --git a/src/service-info.ts b/src/service-info.ts index 5b63dc9..91d0caf 100644 --- a/src/service-info.ts +++ b/src/service-info.ts @@ -534,7 +534,9 @@ function addKeyToAggregates(k: KeyPoolKey) { addToFamily(`aws-claude__awsSonnet3_5`, 1); } else if (id.includes("claude-3-7-sonnet")) { addToFamily(`aws-claude__awsSonnet3_7`, 1); - } else if (id.includes("claude-3-haiku")) { + } else if (id.includes("claude-3-haiku") || id.includes("claude-3-5-haiku")) { + addToFamily(`aws-claude__awsHaiku`, 1); + } else if (id.includes("haiku-4-5")) { addToFamily(`aws-claude__awsHaiku`, 1); } else if (id.includes("sonnet-4-5")) { addToFamily(`aws-claude__awsSonnet4_5`, 1); diff --git a/src/shared/claude-4-1-validation.ts b/src/shared/claude-4-1-validation.ts index 7616f66..0746bb7 100644 --- a/src/shared/claude-4-1-validation.ts +++ b/src/shared/claude-4-1-validation.ts @@ -73,7 +73,10 @@ function isClaude41OpusModel(model: string): boolean { if (model.includes("claude-opus-4-1")) return true; // sonnet45 - if (model.includes("claude-sonnet-4-5")) return true + if (model.includes("claude-sonnet-4-5")) return true; + + // haiku45 + if (model.includes("claude-haiku-4-5")) return true; return false; } diff --git a/src/shared/claude-models.ts b/src/shared/claude-models.ts index e482769..6c98b2f 100644 --- a/src/shared/claude-models.ts +++ b/src/shared/claude-models.ts @@ -28,6 +28,9 @@ export const claudeModels: ClaudeModelMapping[] = [ { awsId: "anthropic.claude-sonnet-4-5-20250929-v1:0", anthropicId: "claude-sonnet-4-5-20250929", displayName: "Claude 4.5 Sonnet" }, { awsId: "anthropic.claude-sonnet-4-5-20250929-v1:0", anthropicId: "claude-sonnet-4-5", displayName: "Claude 4.5 Sonnet" }, { awsId: "anthropic.claude-sonnet-4-5-20250929-v1:0", anthropicId: "claude-sonnet-4-5-latest", displayName: "Claude 4.5 Sonnet (Latest)" }, + { awsId: "anthropic.claude-haiku-4-5-20251001-v1:0", anthropicId: "claude-haiku-4-5-20251001", displayName: "Claude 4.5 Haiku" }, + { awsId: "anthropic.claude-haiku-4-5-20251001-v1:0", anthropicId: "claude-haiku-4-5", displayName: "Claude 4.5 Haiku" }, + { awsId: "anthropic.claude-haiku-4-5-20251001-v1:0", anthropicId: "claude-haiku-4-5-latest", displayName: "Claude 4.5 Haiku (Latest)" }, ]; export function findByAwsId(awsId: string): ClaudeModelMapping | undefined { diff --git a/src/shared/key-management/aws/checker.ts b/src/shared/key-management/aws/checker.ts index d29685e..e76e69b 100644 --- a/src/shared/key-management/aws/checker.ts +++ b/src/shared/key-management/aws/checker.ts @@ -29,6 +29,7 @@ const KNOWN_MODEL_IDS: ModuleAliasTuple[] = [ ["anthropic.claude-opus-4-20250514-v1:0"], ["anthropic.claude-opus-4-1-20250805-v1:0"], ["anthropic.claude-sonnet-4-5-20250929-v1:0"], + ["anthropic.claude-haiku-4-5-20251001-v1:0"], ["mistral.mistral-7b-instruct-v0:2"], ["mistral.mixtral-8x7b-instruct-v0:1"], ["mistral.mistral-large-2402-v1:0"], diff --git a/src/shared/key-management/gcp/checker.ts b/src/shared/key-management/gcp/checker.ts index adeb105..50716e0 100644 --- a/src/shared/key-management/gcp/checker.ts +++ b/src/shared/key-management/gcp/checker.ts @@ -45,17 +45,18 @@ export class GcpKeyChecker extends KeyCheckerBase { this.invokeModel("claude-opus-4-1@20250805", key, true), this.invokeModel("claude-3-5-sonnet-v2@20241022", key, true), this.invokeModel("claude-sonnet-4-5@20250929", key, true), + this.invokeModel("claude-haiku-4-5@20251001", key, true), ]; - const [sonnet, haiku, opus3, opus41, sonnet35, sonnet45] = await Promise.all(checks); + const [sonnet, haiku, opus3, opus41, sonnet35, sonnet45, haiku45] = await Promise.all(checks); this.log.debug( - { key: key.hash, sonnet, haiku, opus3, opus41, sonnet35, sonnet45 }, + { key: key.hash, sonnet, haiku, opus3, opus41, sonnet35, sonnet45, haiku45 }, "GCP model initial tests complete." ); const families: GcpModelFamily[] = []; - if (sonnet || sonnet35 || sonnet45 || haiku) families.push("gcp-claude"); + if (sonnet || sonnet35 || sonnet45 || haiku || haiku45) families.push("gcp-claude"); if (opus3 || opus41) families.push("gcp-claude-opus"); if (families.length === 0) {