From bace589984330203c30085716d766f859ea7d366 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 6 Apr 2026 04:26:43 -0700 Subject: [PATCH] Fix provider model inventories --- src/proxy/anthropic.ts | 100 ++++++++++++------ src/proxy/aws.ts | 63 ++++++++--- src/proxy/gcp.ts | 6 +- src/proxy/openai.ts | 68 ++++++++++-- .../key-management/anthropic/checker.ts | 67 +++++++++++- .../key-management/anthropic/provider.ts | 14 ++- src/shared/key-management/aws/checker.ts | 73 ++++++++++--- src/shared/key-management/aws/provider.ts | 11 +- src/shared/key-management/openai/checker.ts | 9 +- src/shared/models.ts | 4 +- 10 files changed, 324 insertions(+), 91 deletions(-) diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts index b24b455..ef33530 100644 --- a/src/proxy/anthropic.ts +++ b/src/proxy/anthropic.ts @@ -1,7 +1,13 @@ import { Request, Response, RequestHandler, Router } from "express"; import { createProxyMiddleware } from "http-proxy-middleware"; import { config } from "../config"; +import { keyPool, AnthropicKey } from "../shared/key-management"; import { logger } from "../logger"; +import { + AnthropicModelFamily, + getClaudeModelFamily, + ModelFamily, +} from "../shared/models"; import { createQueueMiddleware } from "./queue"; import { ipLimiter } from "./rate-limit"; import { handleProxyError } from "./middleware/common"; @@ -21,46 +27,70 @@ import { sendErrorToClient } from "./middleware/response/error-generator"; let modelsCache: any = null; let modelsCacheTime = 0; -const getModelsResponse = () => { - if (new Date().getTime() - modelsCacheTime < 1000 * 60) { - return modelsCache; +export const KNOWN_ANTHROPIC_MODELS = [ + "claude-3-haiku-20240307", + "claude-haiku-4-5-20251001", + "claude-opus-4-1-20250805", + "claude-opus-4-20250514", + "claude-opus-4-5-20251101", + "claude-opus-4-6", + "claude-sonnet-4-20250514", + "claude-sonnet-4-5-20250929", + "claude-sonnet-4-6", +]; + +export function generateModelList(models = KNOWN_ANTHROPIC_MODELS) { + let availableFamilies = new Set(); + const availableModelIds = new Set(); + for (const key of keyPool.list()) { + if (key.isDisabled || key.service !== "anthropic") continue; + const anthropicKey = key as AnthropicKey; + anthropicKey.modelFamilies.forEach((family) => + availableFamilies.add(family) + ); + anthropicKey.modelIds.forEach((id) => availableModelIds.add(id)); } - if (!config.anthropicKey) return { object: "list", data: [] }; + const allowed = new Set(config.allowedModelFamilies); + availableFamilies = new Set( + [...availableFamilies].filter((family) => allowed.has(family)) + ); - const claudeVariants = [ - "claude-2.0", - "claude-2.1", - "claude-sonnet-4-5", - "claude-sonnet-4-5-20250929", - "claude-haiku-4-5", - "claude-haiku-4-5-20251001", - "claude-opus-4-1", - "claude-opus-4-1-20250805", - "claude-opus-4-20250514", - "claude-sonnet-4-20250514", - "claude-3-5-haiku-20241022", - "claude-3-5-haiku-latest", - ]; + const usingExactModelIds = availableModelIds.size > 0; + const sourceModels = usingExactModelIds + ? [...availableModelIds].sort() + : models; - const models = claudeVariants.map((id) => ({ - id, - object: "model", - created: new Date().getTime(), - owned_by: "anthropic", - permission: [], - root: "claude", - parent: null, - })); - - modelsCache = { object: "list", data: models }; - modelsCacheTime = new Date().getTime(); - - return modelsCache; -}; + return sourceModels + .map((id) => ({ + id, + object: "model", + created: new Date().getTime(), + owned_by: "anthropic", + permission: [], + root: "claude", + parent: null, + })) + .filter((model) => { + if (usingExactModelIds) { + return ( + allowed.has(getClaudeModelFamily(model.id)) && + availableModelIds.has(model.id) + ); + } + return availableFamilies.has(getClaudeModelFamily(model.id)); + }); +} const handleModelRequest: RequestHandler = (_req, res) => { - res.status(200).json(getModelsResponse()); + if (new Date().getTime() - modelsCacheTime < 1000 * 60) { + return res.status(200).json(modelsCache); + } + + const result = config.anthropicKey ? generateModelList() : []; + modelsCache = { object: "list", data: result }; + modelsCacheTime = new Date().getTime(); + res.status(200).json(modelsCache); }; /** Only used for non-streaming requests. */ @@ -350,7 +380,7 @@ function maybeReassignModel(req: Request) { lower.startsWith("o1") || lower.startsWith("o3") || lower.startsWith("o4") || - lower === "computer-use-preview" + lower.startsWith("computer-use-preview") ) { req.body.model = "claude-sonnet-4-5-20250929"; } diff --git a/src/proxy/aws.ts b/src/proxy/aws.ts index 93d1eb8..da14ea4 100644 --- a/src/proxy/aws.ts +++ b/src/proxy/aws.ts @@ -2,7 +2,13 @@ import { Request, RequestHandler, Response, Router } from "express"; import { createProxyMiddleware } from "http-proxy-middleware"; import { v4 } from "uuid"; import { config } from "../config"; +import { keyPool, AwsBedrockKey } from "../shared/key-management"; import { logger } from "../logger"; +import { + AwsBedrockModelFamily, + getAwsBedrockModelFamily, + ModelFamily, +} from "../shared/models"; import { createQueueMiddleware } from "./queue"; import { ipLimiter } from "./rate-limit"; import { handleProxyError } from "./middleware/common"; @@ -16,7 +22,10 @@ import { ProxyResHandlerWithBody, createOnProxyResHandler, } from "./middleware/response"; -import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatResponseToOpenAI } from "./anthropic"; +import { + transformAnthropicChatResponseToAnthropicText, + transformAnthropicChatResponseToOpenAI, +} from "./anthropic"; import { sendErrorToClient } from "./middleware/response/error-generator"; const LATEST_AWS_V2_MINOR_VERSION = "1"; @@ -37,6 +46,21 @@ const getModelsResponse = () => { if (!config.awsCredentials) return { object: "list", data: [] }; + let availableFamilies = new Set(); + const availableModelIds = new Set(); + for (const key of keyPool.list()) { + if (key.isDisabled || key.service !== "aws") continue; + const awsKey = key as AwsBedrockKey; + awsKey.modelFamilies.forEach((family) => availableFamilies.add(family)); + awsKey.modelIds.forEach((id) => availableModelIds.add(id)); + } + + const allowed = new Set(config.allowedModelFamilies); + availableFamilies = new Set( + [...availableFamilies].filter((family) => allowed.has(family)) + ); + const usingExactModelIds = availableModelIds.size > 0; + // https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html const variants = [ "anthropic.claude-v2", @@ -48,16 +72,29 @@ const getModelsResponse = () => { AWS_CLAUDE_SONNET_4, AWS_CLAUDE_OPUS_4, ]; + const sourceModels = usingExactModelIds + ? [...availableModelIds].sort() + : variants; - const models = variants.map((id) => ({ - id, - object: "model", - created: new Date().getTime(), - owned_by: "anthropic", - permission: [], - root: "claude", - parent: null, - })); + const models = sourceModels + .map((id) => ({ + id, + object: "model", + created: new Date().getTime(), + owned_by: "anthropic", + permission: [], + root: "claude", + parent: null, + })) + .filter((model) => { + if (usingExactModelIds) { + return ( + allowed.has(getAwsBedrockModelFamily(model.id)) && + availableModelIds.has(model.id) + ); + } + return availableFamilies.has(getAwsBedrockModelFamily(model.id)); + }); modelsCache = { object: "list", data: models }; modelsCacheTime = new Date().getTime(); @@ -243,7 +280,7 @@ awsRouter.post( * - frontends sending Anthropic model names that AWS doesn't recognize * - frontends sending OpenAI model names because they expect the proxy to * translate them - * + * * If client sends AWS model ID it will be used verbatim. Otherwise, various * strategies are used to try to map a non-AWS model name to AWS model ID. */ @@ -302,7 +339,7 @@ function maybeReassignModel(req: Request) { lower.startsWith("o1") || lower.startsWith("o3") || lower.startsWith("o4") || - lower === "computer-use-preview" + lower.startsWith("computer-use-preview") ) { req.body.model = AWS_CLAUDE_SONNET_45; return; @@ -329,7 +366,7 @@ function maybeReassignModel(req: Request) { req.body.model = "anthropic.claude-instant-v1"; return; } - + const ver = minor ? `${major}.${minor}` : major; switch (ver) { case "1": diff --git a/src/proxy/gcp.ts b/src/proxy/gcp.ts index 03b37b2..2f875ba 100644 --- a/src/proxy/gcp.ts +++ b/src/proxy/gcp.ts @@ -148,7 +148,7 @@ gcpRouter.post( * - frontends sending Anthropic model names that GCP doesn't recognize * - frontends sending OpenAI model names because they expect the proxy to * translate them - * + * * If client sends GCP model ID it will be used verbatim. Otherwise, various * strategies are used to try to map a non-GCP model name to GCP model ID. */ @@ -208,7 +208,7 @@ function maybeReassignModel(req: Request) { lower.startsWith("o1") || lower.startsWith("o3") || lower.startsWith("o4") || - lower === "computer-use-preview" + lower.startsWith("computer-use-preview") ) { req.body.model = GCP_CLAUDE_SONNET_45; return; @@ -230,7 +230,7 @@ function maybeReassignModel(req: Request) { } const [_, _cl, instant, _v, major, _sep, minor, _ctx, name, _rev] = match; - + const ver = minor ? `${major}.${minor}` : major; switch (ver) { case "3": diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts index 59822ad..562b0b7 100644 --- a/src/proxy/openai.ts +++ b/src/proxy/openai.ts @@ -33,6 +33,12 @@ export const KNOWN_OPENAI_MODELS = [ "gpt-5.2-chat-latest", "gpt-5.2-pro", "gpt-5.2-codex", + "gpt-5.4", + "gpt-5.4-pro", + "gpt-5.4-mini", + "gpt-5.4-nano", + "gpt-5.3-chat-latest", + "gpt-5.3-codex", "gpt-5.1", "gpt-5.1-chat", "gpt-5.1-codex", @@ -48,9 +54,12 @@ export const KNOWN_OPENAI_MODELS = [ "gpt-4.1-2025-04-14", "gpt-4.1-mini", "gpt-4.1-nano", + "gpt-4.1-mini-2025-04-14", + "gpt-4.1-nano-2025-04-14", "o3-pro", "o3-deep-research", "computer-use-preview", + "computer-use-preview-2025-03-11", "o4-mini", "o4-mini-deep-research", "o3", @@ -59,8 +68,13 @@ export const KNOWN_OPENAI_MODELS = [ "o1-pro", "gpt-4o", "gpt-4o-2024-08-06", + "gpt-4o-2024-11-20", "gpt-4o-mini", "gpt-4o-2024-05-13", + "gpt-4o-audio-preview", + "gpt-4o-mini-audio-preview", + "gpt-4o-search-preview", + "gpt-4o-mini-search-preview", "gpt-4-turbo", // alias for latest gpt4-turbo stable "gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision "gpt-4", @@ -74,13 +88,45 @@ export const KNOWN_OPENAI_MODELS = [ "text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002", - "gpt-image-1.5", - "gpt-image-1", - "gpt-image-1-mini", - "dall-e-3", - "dall-e-2", ]; +const UNSUPPORTED_OPENAI_MODEL_PATTERNS = [ + /^babbage-002$/, + /^chatgpt-image-latest$/, + /^davinci-002$/, + /^gpt-(?:4o(?:-mini)?-)?realtime(?:-|$)/, + /^gpt-(?:4o(?:-mini)?-)?transcribe(?:-|$)/, + /^gpt-.*-tts(?:-|$)/, + /^omni-moderation(?:-|$)/, + /^sora-/, + /^tts-1(?:-|$)/, + /^whisper-1$/, +]; + +const SUPPORTED_OPENAI_MODEL_PATTERNS = [ + /^gpt-5(?:\.\d+)?(?:[-.].+)?$/, + /^o\d(?:[-.].+)?$/, + /^computer-use-preview(?:-\d{4}-\d{2}-\d{2})?$/, + /^gpt-4\.1(?:[-.].+)?$/, + /^gpt-4o(?:[-.].+)?$/, + /^gpt-4-turbo(?:-\d{4}-\d{2}-\d{2})?$/, + /^gpt-4-32k(?:-\d{4})?$/, + /^gpt-4(?:-\d{4})?$/, + /^gpt-3\.5-turbo(?:[-.].+)?$/, + /^text-embedding-(ada-002|3-small|3-large)$/, + /^gpt-image-1(?:[-.].+)?$/, + /^dall-e-\d$/, +]; + +function isSupportedOpenAIModelId(modelId: string) { + return ( + !UNSUPPORTED_OPENAI_MODEL_PATTERNS.some((pattern) => + pattern.test(modelId) + ) && + SUPPORTED_OPENAI_MODEL_PATTERNS.some((pattern) => pattern.test(modelId)) + ); +} + let modelsCache: any = null; let modelsCacheTime = 0; @@ -103,9 +149,18 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) { [...availableFamilies].filter((x) => allowed.has(x)) ); const usingExactModelIds = availableModelIds.size > 0; + const supportedFamilies = new Set( + models.map((model) => getOpenAIModelFamily(model)) + ); const sourceModels = usingExactModelIds - ? [...new Set([...models, ...availableModelIds])] + ? [...availableModelIds] + .filter( + (model) => + isSupportedOpenAIModelId(model) && + supportedFamilies.has(getOpenAIModelFamily(model)) + ) + .sort() : models; return sourceModels @@ -130,6 +185,7 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) { .filter((model) => { if (usingExactModelIds) { return ( + isSupportedOpenAIModelId(model.id) && allowed.has(getOpenAIModelFamily(model.id)) && availableModelIds.has(model.id) ); diff --git a/src/shared/key-management/anthropic/checker.ts b/src/shared/key-management/anthropic/checker.ts index 1727178..86628dd 100644 --- a/src/shared/key-management/anthropic/checker.ts +++ b/src/shared/key-management/anthropic/checker.ts @@ -1,11 +1,13 @@ import axios, { AxiosError, AxiosResponse } from "axios"; import { KeyCheckerBase } from "../key-checker-base"; import type { AnthropicKey, AnthropicKeyProvider } from "./provider"; +import { AnthropicModelFamily, getClaudeModelFamily } from "../../models"; const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds const KEY_CHECK_PERIOD = 1000 * 60 * 60 * 6; // 6 hours +const GET_MODELS_URL = "https://api.anthropic.com/v1/models"; const POST_MESSAGES_URL = "https://api.anthropic.com/v1/messages"; -const TEST_MODEL = "claude-3-sonnet-20240229"; +const DEFAULT_TEST_MODEL = "claude-3-haiku-20240307"; const SYSTEM = "Obey all instructions from the user."; const DETECTION_PROMPT = [ { @@ -35,6 +37,10 @@ type MessageResponse = { content: { type: "text"; text: string }[]; }; +type GetModelsResponse = { + data: { id: string }[]; +}; + type AnthropicAPIError = { error: { type: string; message: string }; }; @@ -52,11 +58,21 @@ export class AnthropicKeyChecker extends KeyCheckerBase { } protected async testKeyOrFail(key: AnthropicKey) { - const [{ pozzed, tier }] = await Promise.all([this.testLiveness(key)]); - const updates = { isPozzed: pozzed, tier }; + const isInitialCheck = !key.lastChecked; + const provisionedModels = isInitialCheck + ? await this.getProvisionedModels(key) + : key.modelFamilies; + const keyFromPool = this.keys.find((k) => k.hash === key.hash)!; + const liveness = await this.testLiveness(keyFromPool); + + const updates = { + isPozzed: liveness.pozzed, + tier: liveness.tier, + modelFamilies: provisionedModels, + }; this.updateKey(key.hash, updates); this.log.info( - { key: key.hash, tier, models: key.modelFamilies }, + { key: key.hash, tier: liveness.tier, models: keyFromPool.modelIds }, "Checked key." ); } @@ -131,7 +147,7 @@ export class AnthropicKeyChecker extends KeyCheckerBase { key: AnthropicKey ): Promise<{ pozzed: boolean; tier: AnthropicKey["tier"] }> { const payload = { - model: TEST_MODEL, + model: this.getLivenessModel(key), max_tokens: 40, temperature: 0, stream: false, @@ -162,6 +178,47 @@ export class AnthropicKeyChecker extends KeyCheckerBase { } } + private async getProvisionedModels( + key: AnthropicKey + ): Promise { + const { data } = await axios.get(GET_MODELS_URL, { + headers: AnthropicKeyChecker.getRequestHeaders(key), + }); + + const modelIds = data.data.map(({ id }) => id); + const families = new Set(); + modelIds.forEach((id) => families.add(getClaudeModelFamily(id))); + + const familiesArray = [...families]; + const keyFromPool = this.keys.find((k) => k.hash === key.hash)!; + this.updateKey(key.hash, { + modelIds, + modelFamilies: familiesArray, + lastChecked: keyFromPool.lastChecked, + }); + return familiesArray; + } + + private getLivenessModel(key: AnthropicKey) { + const preferredModels = [ + "claude-3-haiku-20240307", + "claude-haiku-4-5-20251001", + "claude-sonnet-4-20250514", + "claude-sonnet-4-5-20250929", + "claude-sonnet-4-6", + "claude-opus-4-1-20250805", + "claude-opus-4-20250514", + "claude-opus-4-5-20251101", + "claude-opus-4-6", + ]; + + return ( + preferredModels.find((model) => key.modelIds.includes(model)) || + key.modelIds[0] || + DEFAULT_TEST_MODEL + ); + } + static errorIsAnthropicAPIError( error: AxiosError ): error is AxiosError { diff --git a/src/shared/key-management/anthropic/provider.ts b/src/shared/key-management/anthropic/provider.ts index 057686e..4cc8b51 100644 --- a/src/shared/key-management/anthropic/provider.ts +++ b/src/shared/key-management/anthropic/provider.ts @@ -23,6 +23,8 @@ type AnthropicKeyUsage = { export interface AnthropicKey extends Key, AnthropicKeyUsage { readonly service: "anthropic"; readonly modelFamilies: AnthropicModelFamily[]; + /** Exact model IDs reported by the models API for this key. */ + modelIds: string[]; /** The time at which this key was last rate limited. */ rateLimitedAt: number; /** The time until which this key is rate limited. */ @@ -108,6 +110,7 @@ export class AnthropicKeyProvider implements KeyProvider { key, service: this.service, modelFamilies: ["claude", "claude-opus"], + modelIds: [], isDisabled: false, isOverQuota: false, isRevoked: false, @@ -145,11 +148,18 @@ export class AnthropicKeyProvider implements KeyProvider { } public get(rawModel: string) { - this.log.debug({ model: rawModel }, "Selecting key"); const needsMultimodal = rawModel.endsWith("-multimodal"); + const model = needsMultimodal + ? rawModel.replace(/-multimodal$/, "") + : rawModel; + this.log.debug({ model: rawModel, exactModel: model }, "Selecting key"); const availableKeys = this.keys.filter((k) => { - return !k.isDisabled && (!needsMultimodal || k.allowsMultimodality); + return ( + !k.isDisabled && + (!needsMultimodal || k.allowsMultimodality) && + (!k.modelIds.length || k.modelIds.includes(model)) + ); }); if (availableKeys.length === 0) { diff --git a/src/shared/key-management/aws/checker.ts b/src/shared/key-management/aws/checker.ts index bb8fb87..d1a07bb 100644 --- a/src/shared/key-management/aws/checker.ts +++ b/src/shared/key-management/aws/checker.ts @@ -17,6 +17,13 @@ const GET_INVOCATION_LOGGING_CONFIG_URL = (region: string) => `https://bedrock.${region}.amazonaws.com/logging/modelinvocations`; const POST_INVOKE_MODEL_URL = (region: string, model: string) => `https://${AMZ_HOST.replace("%REGION%", region)}/model/${model}/invoke`; +const AWS_CLAUDE_V2 = "anthropic.claude-v2"; +const AWS_CLAUDE_SONNET_45 = "anthropic.claude-sonnet-4-5-20250929-v1:0"; +const AWS_CLAUDE_HAIKU_45 = "anthropic.claude-haiku-4-5-20251001-v1:0"; +const AWS_CLAUDE_OPUS_41 = "anthropic.claude-opus-4-1-20250805-v1:0"; +const AWS_CLAUDE_35_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0"; +const AWS_CLAUDE_SONNET_4 = "anthropic.claude-sonnet-4-20250514-v1:0"; +const AWS_CLAUDE_OPUS_4 = "anthropic.claude-opus-4-20250514-v1:0"; const TEST_MESSAGES = [ { role: "user", content: "Hi!" }, { role: "assistant", content: "Hello!" }, @@ -53,28 +60,60 @@ export class AwsKeyChecker extends KeyCheckerBase { const isInitialCheck = !key.lastChecked; if (isInitialCheck) { checks = [ - this.invokeModel("anthropic.claude-v2", key), - this.invokeModel("anthropic.claude-sonnet-4-5-20250929-v1:0", key), - this.invokeModel("anthropic.claude-haiku-4-5-20251001-v1:0", key), - this.invokeModel("anthropic.claude-opus-4-1-20250805-v1:0", key), - this.invokeModel("anthropic.claude-3-5-haiku-20241022-v1:0", key), + this.invokeModel(AWS_CLAUDE_V2, key), + this.invokeModel(AWS_CLAUDE_SONNET_45, key), + this.invokeModel(AWS_CLAUDE_HAIKU_45, key), + this.invokeModel(AWS_CLAUDE_OPUS_41, key), + this.invokeModel(AWS_CLAUDE_35_HAIKU, key), + this.invokeModel(AWS_CLAUDE_SONNET_4, key), + this.invokeModel(AWS_CLAUDE_OPUS_4, key), ]; } checks.unshift(this.checkLoggingConfiguration(key)); - const [_logging, claudeV2, sonnet, haiku, opus, sonnet35] = - await Promise.all(checks); + const [ + _logging, + claudeV2, + sonnet45, + haiku45, + opus41, + haiku35, + sonnet4, + opus4, + ] = await Promise.all(checks); this.log.debug( - { key: key.hash, _logging, claudeV2, sonnet, haiku, opus, sonnet35 }, + { + key: key.hash, + _logging, + claudeV2, + sonnet45, + haiku45, + opus41, + haiku35, + sonnet4, + opus4, + }, "AWS model tests complete." ); if (isInitialCheck) { + const modelIds = [ + claudeV2 && AWS_CLAUDE_V2, + sonnet45 && AWS_CLAUDE_SONNET_45, + haiku45 && AWS_CLAUDE_HAIKU_45, + opus41 && AWS_CLAUDE_OPUS_41, + haiku35 && AWS_CLAUDE_35_HAIKU, + sonnet4 && AWS_CLAUDE_SONNET_4, + opus4 && AWS_CLAUDE_OPUS_4, + ].filter(Boolean) as string[]; + const families: AwsBedrockModelFamily[] = []; - if (claudeV2 || sonnet || sonnet35 || haiku) families.push("aws-claude"); - if (opus) families.push("aws-claude-opus"); + if (claudeV2 || sonnet45 || haiku35 || haiku45 || sonnet4) { + families.push("aws-claude"); + } + if (opus41 || opus4) families.push("aws-claude-opus"); if (families.length === 0) { this.log.warn( @@ -85,9 +124,10 @@ export class AwsKeyChecker extends KeyCheckerBase { } this.updateKey(key.hash, { - sonnetEnabled: sonnet, - haikuEnabled: haiku, - sonnet35Enabled: sonnet35, + sonnetEnabled: sonnet45 || sonnet4, + haikuEnabled: haiku45 || haiku35, + sonnet35Enabled: haiku35, + modelIds, modelFamilies: families, }); } @@ -95,9 +135,10 @@ export class AwsKeyChecker extends KeyCheckerBase { this.log.info( { key: key.hash, - sonnet, - haiku, + sonnet45, + haiku45, families: key.modelFamilies, + models: key.modelIds, logged: key.awsLoggingStatus, }, "Checked key." @@ -203,7 +244,7 @@ export class AwsKeyChecker extends KeyCheckerBase { ) { return false; } - + // ResourceNotFound typically indicates that the tested model cannot be used // on the configured region for this set of credentials. if (status === 404) { diff --git a/src/shared/key-management/aws/provider.ts b/src/shared/key-management/aws/provider.ts index fe23809..3579141 100644 --- a/src/shared/key-management/aws/provider.ts +++ b/src/shared/key-management/aws/provider.ts @@ -13,6 +13,8 @@ type AwsBedrockKeyUsage = { export interface AwsBedrockKey extends Key, AwsBedrockKeyUsage { readonly service: "aws"; readonly modelFamilies: AwsBedrockModelFamily[]; + /** Exact Bedrock model IDs that have been verified for this key. */ + modelIds: string[]; /** The time at which this key was last rate limited. */ rateLimitedAt: number; /** The time until which this key is rate limited. */ @@ -63,6 +65,7 @@ export class AwsBedrockKeyProvider implements KeyProvider { key, service: this.service, modelFamilies: ["aws-claude"], + modelIds: [], isDisabled: false, isRevoked: false, promptCount: 0, @@ -115,12 +118,14 @@ export class AwsBedrockKeyProvider implements KeyProvider { const availableKeys = this.keys.filter((k) => { const isNotLogged = k.awsLoggingStatus !== "enabled"; + const hasExactInventory = k.modelIds.length > 0; return ( !k.isDisabled && (isNotLogged || config.allowAwsLogging) && - (k.sonnetEnabled || !needsSonnet) && // sonnet and haiku are both under aws-claude, while opus is not - (k.haikuEnabled || !needsHaiku) && - (k.sonnet35Enabled || !needsSonnet35) && + (!hasExactInventory || k.modelIds.includes(model)) && + (hasExactInventory || k.sonnetEnabled || !needsSonnet) && // sonnet and haiku are both under aws-claude, while opus is not + (hasExactInventory || k.haikuEnabled || !needsHaiku) && + (hasExactInventory || k.sonnet35Enabled || !needsSonnet35) && k.modelFamilies.includes(neededFamily) ); }); diff --git a/src/shared/key-management/openai/checker.ts b/src/shared/key-management/openai/checker.ts index b5cef57..406e7a5 100644 --- a/src/shared/key-management/openai/checker.ts +++ b/src/shared/key-management/openai/checker.ts @@ -7,6 +7,7 @@ import { getOpenAIModelFamily } from "../../models"; const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions"; +const POST_COMPLETIONS_URL = "https://api.openai.com/v1/completions"; const GET_MODELS_URL = "https://api.openai.com/v1/models"; const GET_ORGANIZATIONS_URL = "https://api.openai.com/v1/organizations"; @@ -302,13 +303,9 @@ export class OpenAIKeyChecker extends KeyCheckerBase { // the same trial key and even the text completion quota is exhausted, but // it should work better than the alternative. - const payload = { - model: "babbage-002", - max_tokens: -1, - messages: [{ role: "user", content: "" }], - }; + const payload = { model: "babbage-002", max_tokens: -1, prompt: "" }; const { headers, data } = await axios.post( - POST_CHAT_COMPLETIONS_URL, + POST_COMPLETIONS_URL, payload, { headers: OpenAIKeyChecker.getHeaders(key), diff --git a/src/shared/models.ts b/src/shared/models.ts index 3587e07..2f234d8 100644 --- a/src/shared/models.ts +++ b/src/shared/models.ts @@ -92,7 +92,7 @@ export const LLM_SERVICES = (( export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = { "^gpt-5(\\.\\d+)?([-.].+)?$": "gpt5", "^o\\d([-.].+)?$": "o-series", - "^computer-use-preview$": "o-series", + "^computer-use-preview(?:-\\d{4}-\\d{2}-\\d{2})?$": "o-series", "^gpt-4\\.1([-.].+)?$": "gpt4o", "^gpt-4o": "gpt4o", "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo", @@ -116,7 +116,7 @@ export const MODEL_FAMILY_SERVICE: { gpt4: "openai", "gpt4-turbo": "openai", "gpt4-32k": "openai", - "gpt4o": "openai", + gpt4o: "openai", gpt5: "openai", "o-series": "openai", "dall-e": "openai",