diff --git a/.env.example b/.env.example index 3ca2fe6..66fe59a 100644 --- a/.env.example +++ b/.env.example @@ -40,11 +40,11 @@ NODE_ENV=production # Which model types users are allowed to access. # The following model families are recognized: -# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-dall-e +# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | gpt5 | o-series | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-gpt5 | azure-o-series | azure-dall-e # By default, all models are allowed except for 'dall-e' / 'azure-dall-e'. # To allow DALL-E image generation, uncomment the line below and add 'dall-e' or # 'azure-dall-e' to the list of allowed model families. -# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o +# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,gpt5,o-series,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o,azure-gpt5,azure-o-series # Which services can be used to process prompts containing images via multimodal # models. The following services are recognized: @@ -115,10 +115,14 @@ NODE_ENV=production # TOKEN_QUOTA_GPT4=0 # TOKEN_QUOTA_GPT4_32K=0 # TOKEN_QUOTA_GPT4_TURBO=0 +# TOKEN_QUOTA_GPT5=0 +# TOKEN_QUOTA_O_SERIES=0 # TOKEN_QUOTA_CLAUDE=0 # TOKEN_QUOTA_GEMINI_PRO=0 # TOKEN_QUOTA_AWS_CLAUDE=0 # TOKEN_QUOTA_GCP_CLAUDE=0 +# TOKEN_QUOTA_AZURE_GPT5=0 +# TOKEN_QUOTA_AZURE_O_SERIES=0 # "Tokens" for image-generation models are counted at a rate of 100000 tokens # per US$1.00 generated, which is similar to the cost of GPT-4 Turbo. # DALL-E 3 costs around US$0.10 per image (10000 tokens). diff --git a/docs/aws-configuration.md b/docs/aws-configuration.md index 7f3c38c..a732c7b 100644 --- a/docs/aws-configuration.md +++ b/docs/aws-configuration.md @@ -45,11 +45,16 @@ You can also request Claude Instant, but support for this isn't fully implemente ### Supported model IDs Users can send these model IDs to the proxy to invoke the corresponding models. - **Claude** - - `anthropic.claude-v1` (~18k context, claude 1.3 -- EOL 2024-02-28) - `anthropic.claude-v2` (~100k context, claude 2.0) - `anthropic.claude-v2:1` (~200k context, claude 2.1) -- **Claude Instant** - - `anthropic.claude-instant-v1` (~100k context, claude instant 1.2) + - `anthropic.claude-haiku-4-5-20251001-v1:0` + - `anthropic.claude-sonnet-4-5-20250929-v1:0` + - `anthropic.claude-opus-4-1-20250805-v1:0` + - `anthropic.claude-3-5-haiku-20241022-v1:0` + - `anthropic.claude-sonnet-4-20250514-v1:0` + - `anthropic.claude-opus-4-20250514-v1:0` + +For OpenAI-compatible callers, the proxy will also remap newer Claude-style names such as `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, `claude-opus-4-1-20250805`, and `claude-3-5-haiku-20241022` to the corresponding Bedrock model IDs. ## Note regarding logging diff --git a/docs/azure-configuration.md b/docs/azure-configuration.md index 5f94581..505dfcc 100644 --- a/docs/azure-configuration.md +++ b/docs/azure-configuration.md @@ -20,7 +20,9 @@ AZURE_CREDENTIALS=contoso-ml:gpt4-8k:0123456789abcdef0123456789abcdef,northwind- Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model. ### Supported model IDs -Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. For the most part they work the same with Azure. GPT-3.5 Turbo has an ID of "gpt-35-turbo" because Azure doesn't allow periods in model names, but the proxy should automatically convert this to the correct ID. +Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. The proxy now understands newer Azure-backed OpenAI model families such as GPT-4o, GPT-4.1, GPT-5 / GPT-5.2, o-series reasoning models, and GPT Image deployments including `gpt-image-1.5`, plus the newer Responses API route at `/proxy/azure/openai/v1/responses`. + +GPT-3.5 Turbo still has an Azure-specific ID of `gpt-35-turbo` because Azure doesn't allow periods in model names, but the proxy will automatically normalize that for you. As noted above, you can only use model IDs for which a deployment has been created and added to the proxy. diff --git a/docs/gcp-configuration.md b/docs/gcp-configuration.md index bf5ceb2..e90fce2 100644 --- a/docs/gcp-configuration.md +++ b/docs/gcp-configuration.md @@ -29,7 +29,11 @@ GCP_CREDENTIALS=my-first-project:xxx@yyy.com:us-east5:-----BEGIN PRIVATE KEY---- ## Supported model IDs Users can send these model IDs to the proxy to invoke the corresponding models. - **Claude** - - `claude-3-haiku@20240307` - - `claude-3-sonnet@20240229` - - `claude-3-opus@20240229` - - `claude-3-5-sonnet@20240620` \ No newline at end of file + - `claude-haiku-4-5@20251001` + - `claude-sonnet-4-5@20250929` + - `claude-opus-4-1@20250805` + - `claude-3-5-haiku@20241022` + - `claude-sonnet-4@20250514` + - `claude-opus-4@20250514` + +For OpenAI-compatible callers, the proxy will also remap Claude-style aliases like `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, and `claude-opus-4-1-20250805` to the corresponding Vertex AI model IDs. diff --git a/scripts/oai-reverse-proxy.http b/scripts/oai-reverse-proxy.http index 18b0942..90fdb60 100644 --- a/scripts/oai-reverse-proxy.http +++ b/scripts/oai-reverse-proxy.http @@ -7,8 +7,8 @@ Authorization: Bearer {{oai-key-1}} Content-Type: application/json { - "model": "gpt-3.5-turbo", - "max_tokens": 30, + "model": "gpt-4.1-mini", + "max_completion_tokens": 30, "stream": false, "messages": [ { @@ -18,6 +18,19 @@ Content-Type: application/json ] } +### +# @name OpenAI -- Responses API +POST https://api.openai.com/v1/responses +Authorization: Bearer {{oai-key-1}} +Content-Type: application/json + +{ + "model": "gpt-5.2", + "reasoning": { "effort": "medium" }, + "max_output_tokens": 80, + "input": "This is a test prompt." +} + ### # @name OpenAI -- Text Completions POST https://api.openai.com/v1/completions @@ -38,7 +51,7 @@ Authorization: Bearer {{oai-key-1}} Content-Type: application/json { - "model": "text-embedding-ada-002", + "model": "text-embedding-3-small", "input": "This is a test embedding input." } @@ -81,8 +94,8 @@ Authorization: Bearer {{proxy-key}} Content-Type: application/json { - "model": "gpt-4-1106-preview", - "max_tokens": 20, + "model": "gpt-4.1", + "max_completion_tokens": 20, "stream": true, "temperature": 1, "seed": 123, @@ -94,6 +107,20 @@ Content-Type: application/json ] } +### +# @name Proxy / OpenAI -- Native Responses API +POST {{proxy-host}}/proxy/openai/v1/responses +Authorization: Bearer {{proxy-key}} +Content-Type: application/json + +{ + "model": "gpt-5.2", + "reasoning": { "effort": "medium" }, + "max_output_tokens": 64, + "stream": false, + "input": "Summarize the purpose of this reverse proxy in one sentence." +} + ### # @name Proxy / OpenAI -- Native Text Completions POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions @@ -142,7 +169,7 @@ Authorization: Bearer {{proxy-key}} Content-Type: application/json { - "model": "text-embedding-ada-002", + "model": "text-embedding-3-small", "input": "This is a test embedding input." } @@ -185,7 +212,7 @@ Authorization: Bearer {{proxy-key}} Content-Type: application/json { - "model": "gpt-3.5-turbo", + "model": "gpt-5.2", "max_tokens": 20, "stream": false, "temperature": 0, @@ -197,6 +224,23 @@ Content-Type: application/json ] } +### +# @name Proxy / Google AI -- OpenAI-Compat Image Generation +POST {{proxy-host}}/proxy/google-ai/v1/chat/completions +Authorization: Bearer {{proxy-key}} +Content-Type: application/json + +{ + "model": "gemini-2.5-flash-image", + "stream": false, + "messages": [ + { + "role": "user", + "content": "Generate a flat vector-style illustration of a red fox reading a newspaper at a cafe table." + } + ] +} + ### # @name Proxy / AWS Claude -- Native Completion POST {{proxy-host}}/proxy/aws/claude/v1/complete diff --git a/src/config.ts b/src/config.ts index be7bb1d..bff00fd 100644 --- a/src/config.ts +++ b/src/config.ts @@ -434,6 +434,8 @@ export const config: Config = { "gpt4-32k", "gpt4-turbo", "gpt4o", + "gpt5", + "o-series", "claude", "claude-opus", "gemini-pro", @@ -450,6 +452,8 @@ export const config: Config = { "azure-gpt4-32k", "azure-gpt4-turbo", "azure-gpt4o", + "azure-gpt5", + "azure-o-series", ]), rejectPhrases: parseCsv(getEnvWithDefault("REJECT_PHRASES", "")), rejectMessage: getEnvWithDefault( diff --git a/src/info-page.ts b/src/info-page.ts index 788e845..1da4235 100644 --- a/src/info-page.ts +++ b/src/info-page.ts @@ -17,6 +17,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = { "gpt4-32k": "GPT-4 32k", "gpt4-turbo": "GPT-4 Turbo", gpt4o: "GPT-4o", + gpt5: "GPT-5", + "o-series": "o-Series", "dall-e": "DALL-E", claude: "Claude (Sonnet)", "claude-opus": "Claude (Opus)", @@ -34,6 +36,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = { "azure-gpt4-32k": "Azure GPT-4 32k", "azure-gpt4-turbo": "Azure GPT-4 Turbo", "azure-gpt4o": "Azure GPT-4o", + "azure-gpt5": "Azure GPT-5", + "azure-o-series": "Azure o-Series", "azure-dall-e": "Azure DALL-E", }; diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts index 643efef..b24b455 100644 --- a/src/proxy/anthropic.ts +++ b/src/proxy/anthropic.ts @@ -29,24 +29,18 @@ const getModelsResponse = () => { if (!config.anthropicKey) return { object: "list", data: [] }; const claudeVariants = [ - "claude-v1", - "claude-v1-100k", - "claude-instant-v1", - "claude-instant-v1-100k", - "claude-v1.3", - "claude-v1.3-100k", - "claude-v1.2", - "claude-v1.0", - "claude-instant-v1.1", - "claude-instant-v1.1-100k", - "claude-instant-v1.0", - "claude-2", "claude-2.0", "claude-2.1", - "claude-3-haiku-20240307", - "claude-3-opus-20240229", - "claude-3-sonnet-20240229", - "claude-3-5-sonnet-20240620" + "claude-sonnet-4-5", + "claude-sonnet-4-5-20250929", + "claude-haiku-4-5", + "claude-haiku-4-5-20251001", + "claude-opus-4-1", + "claude-opus-4-1-20250805", + "claude-opus-4-20250514", + "claude-sonnet-4-20250514", + "claude-3-5-haiku-20241022", + "claude-3-5-haiku-latest", ]; const models = claudeVariants.map((id) => ({ @@ -230,7 +224,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware({ * (claude-3 based models do not support the old text completion endpoint). */ const preprocessAnthropicTextRequest: RequestHandler = (req, res, next) => { - if (req.body.model?.startsWith("claude-3")) { + if (requiresAnthropicMessagesApi(req.body.model)) { textToChatPreprocessor(req, res, next); } else { nativeTextPreprocessor(req, res, next); @@ -255,7 +249,7 @@ const oaiToChatPreprocessor = createPreprocessorMiddleware({ */ const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => { maybeReassignModel(req); - if (req.body.model?.includes("claude-3")) { + if (requiresAnthropicMessagesApi(req.body.model)) { oaiToChatPreprocessor(req, res, next); } else { oaiToTextPreprocessor(req, res, next); @@ -315,7 +309,8 @@ function handleAnthropicTextCompatRequest( const type = req.params.type; const action = req.params.action; const alreadyInChatFormat = Boolean(req.body.messages); - const compatModel = `claude-3-${type}-20240229`; + const compatModel = + type === "opus" ? "claude-opus-4-1-20250805" : "claude-sonnet-4-5-20250929"; req.log.info( { type, inputModel: req.body.model, compatModel, alreadyInChatFormat }, "Handling Anthropic compatibility request" @@ -349,8 +344,20 @@ function handleAnthropicTextCompatRequest( */ function maybeReassignModel(req: Request) { const model = req.body.model; - if (!model.startsWith("gpt-")) return; - req.body.model = "claude-3-sonnet-20240229"; + const lower = String(model).toLowerCase(); + if ( + lower.startsWith("gpt-") || + lower.startsWith("o1") || + lower.startsWith("o3") || + lower.startsWith("o4") || + lower === "computer-use-preview" + ) { + req.body.model = "claude-sonnet-4-5-20250929"; + } +} + +function requiresAnthropicMessagesApi(model?: string) { + return /^claude-(?:3|sonnet|opus)/.test(model ?? ""); } export const anthropic = anthropicRouter; diff --git a/src/proxy/aws.ts b/src/proxy/aws.ts index c02ff0e..93d1eb8 100644 --- a/src/proxy/aws.ts +++ b/src/proxy/aws.ts @@ -20,6 +20,12 @@ import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatRe import { sendErrorToClient } from "./middleware/response/error-generator"; const LATEST_AWS_V2_MINOR_VERSION = "1"; +const AWS_CLAUDE_SONNET_45 = "anthropic.claude-sonnet-4-5-20250929-v1:0"; +const AWS_CLAUDE_HAIKU_45 = "anthropic.claude-haiku-4-5-20251001-v1:0"; +const AWS_CLAUDE_OPUS_41 = "anthropic.claude-opus-4-1-20250805-v1:0"; +const AWS_CLAUDE_SONNET_4 = "anthropic.claude-sonnet-4-20250514-v1:0"; +const AWS_CLAUDE_OPUS_4 = "anthropic.claude-opus-4-20250514-v1:0"; +const AWS_CLAUDE_35_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0"; let modelsCache: any = null; let modelsCacheTime = 0; @@ -35,10 +41,12 @@ const getModelsResponse = () => { const variants = [ "anthropic.claude-v2", "anthropic.claude-v2:1", - "anthropic.claude-3-haiku-20240307-v1:0", - "anthropic.claude-3-sonnet-20240229-v1:0", - "anthropic.claude-3-5-sonnet-20240620-v1:0", - "anthropic.claude-3-opus-20240229-v1:0", + AWS_CLAUDE_HAIKU_45, + AWS_CLAUDE_SONNET_45, + AWS_CLAUDE_OPUS_41, + AWS_CLAUDE_35_HAIKU, + AWS_CLAUDE_SONNET_4, + AWS_CLAUDE_OPUS_4, ]; const models = variants.map((id) => ({ @@ -164,7 +172,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware( * (claude-3 based models do not support the old text completion endpoint). */ const preprocessAwsTextRequest: RequestHandler = (req, res, next) => { - if (req.body.model?.includes("claude-3")) { + if (requiresAnthropicMessagesApi(req.body.model)) { textToChatPreprocessor(req, res, next); } else { nativeTextPreprocessor(req, res, next); @@ -186,7 +194,7 @@ const oaiToAwsChatPreprocessor = createPreprocessorMiddleware( * or the new Claude chat completion endpoint, based on the requested model. */ const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => { - if (req.body.model?.includes("claude-3")) { + if (requiresAnthropicMessagesApi(req.body.model)) { oaiToAwsChatPreprocessor(req, res, next); } else { oaiToAwsTextPreprocessor(req, res, next); @@ -241,12 +249,65 @@ awsRouter.post( */ function maybeReassignModel(req: Request) { const model = req.body.model; + const lower = String(model).toLowerCase(); // If it looks like an AWS model, use it as-is if (model.includes("anthropic.claude")) { return; } + if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) { + req.body.model = AWS_CLAUDE_OPUS_41; + return; + } + if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) { + req.body.model = AWS_CLAUDE_OPUS_41; + return; + } + if (lower.includes("opus-4")) { + req.body.model = AWS_CLAUDE_OPUS_4; + return; + } + if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) { + req.body.model = AWS_CLAUDE_HAIKU_45; + return; + } + if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) { + req.body.model = AWS_CLAUDE_SONNET_45; + return; + } + if (lower.includes("sonnet-4")) { + req.body.model = AWS_CLAUDE_SONNET_4; + return; + } + if (lower.includes("3-5") && lower.includes("haiku")) { + req.body.model = AWS_CLAUDE_35_HAIKU; + return; + } + if (lower.includes("opus")) { + req.body.model = AWS_CLAUDE_OPUS_41; + return; + } + if (lower.includes("haiku")) { + req.body.model = AWS_CLAUDE_HAIKU_45; + return; + } + if (lower.includes("sonnet")) { + req.body.model = AWS_CLAUDE_SONNET_45; + return; + } + + if ( + lower.startsWith("gpt-") || + lower.startsWith("o1") || + lower.startsWith("o3") || + lower.startsWith("o4") || + lower === "computer-use-preview" + ) { + req.body.model = AWS_CLAUDE_SONNET_45; + return; + } + // Anthropic model names can look like: // - claude-v1 // - claude-2.1 @@ -282,20 +343,22 @@ function maybeReassignModel(req: Request) { case "3": case "3.0": if (name.includes("opus")) { - req.body.model = "anthropic.claude-3-opus-20240229-v1:0"; + req.body.model = AWS_CLAUDE_OPUS_41; } else if (name.includes("haiku")) { - req.body.model = "anthropic.claude-3-haiku-20240307-v1:0"; + req.body.model = AWS_CLAUDE_HAIKU_45; } else { - req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0"; + req.body.model = AWS_CLAUDE_SONNET_45; } return; case "3.5": - req.body.model = "anthropic.claude-3-5-sonnet-20240620-v1:0"; + req.body.model = name.includes("haiku") + ? AWS_CLAUDE_35_HAIKU + : AWS_CLAUDE_SONNET_45; return; } - // Fallback to Claude 2.1 - req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`; + // Fallback to Claude Sonnet 4 + req.body.model = AWS_CLAUDE_SONNET_45; return; } @@ -306,7 +369,7 @@ export function handleCompatibilityRequest( ) { const action = req.params.action; const alreadyInChatFormat = Boolean(req.body.messages); - const compatModel = "anthropic.claude-3-5-sonnet-20240620-v1:0"; + const compatModel = AWS_CLAUDE_SONNET_4; req.log.info( { inputModel: req.body.model, compatModel, alreadyInChatFormat }, "Handling AWS compatibility request" @@ -335,3 +398,7 @@ export function handleCompatibilityRequest( } export const aws = awsRouter; + +function requiresAnthropicMessagesApi(model?: string) { + return /claude-(?:3|sonnet|opus)/.test(model ?? ""); +} diff --git a/src/proxy/azure.ts b/src/proxy/azure.ts index e8a6155..d540828 100644 --- a/src/proxy/azure.ts +++ b/src/proxy/azure.ts @@ -32,20 +32,29 @@ function getModelsResponse() { } let available = new Set(); + const availableModelIds = new Set(); for (const key of keyPool.list()) { if (key.isDisabled || key.service !== "azure") continue; + const azureKey = key as any; key.modelFamilies.forEach((family) => available.add(family as AzureOpenAIModelFamily) ); + azureKey.modelIds?.forEach((id: string) => availableModelIds.add(id)); } const allowed = new Set(config.allowedModelFamilies); available = new Set([...available].filter((x) => allowed.has(x))); - const models = KNOWN_OPENAI_MODELS.map((id) => ({ - id, - object: "model", - created: new Date().getTime(), - owned_by: "azure", + const usingExactModelIds = availableModelIds.size > 0; + + const sourceModels = usingExactModelIds + ? [...new Set([...KNOWN_OPENAI_MODELS, ...availableModelIds])] + : KNOWN_OPENAI_MODELS; + + const models = sourceModels.map((id) => ({ + id, + object: "model", + created: new Date().getTime(), + owned_by: "azure", permission: [ { id: "modelperm-" + id, @@ -58,7 +67,12 @@ function getModelsResponse() { ], root: id, parent: null, - })).filter((model) => available.has(getAzureOpenAIModelFamily(model.id))); + })).filter((model) => { + if (usingExactModelIds) { + return availableModelIds.has(model.id); + } + return available.has(getAzureOpenAIModelFamily(model.id)); + }); modelsCache = { object: "list", data: models }; modelsCacheTime = new Date().getTime(); @@ -115,6 +129,16 @@ azureOpenAIRouter.post( }), azureOpenAIProxy ); +azureOpenAIRouter.post( + "/v1/responses", + ipLimiter, + createPreprocessorMiddleware({ + inApi: "openai-responses", + outApi: "openai-responses", + service: "azure", + }), + azureOpenAIProxy +); azureOpenAIRouter.post( "/v1/images/generations", ipLimiter, diff --git a/src/proxy/gcp.ts b/src/proxy/gcp.ts index a9b622b..03b37b2 100644 --- a/src/proxy/gcp.ts +++ b/src/proxy/gcp.ts @@ -19,7 +19,12 @@ import { import { transformAnthropicChatResponseToOpenAI } from "./anthropic"; import { sendErrorToClient } from "./middleware/response/error-generator"; -const LATEST_GCP_SONNET_MINOR_VERSION = "20240229"; +const GCP_CLAUDE_SONNET_45 = "claude-sonnet-4-5@20250929"; +const GCP_CLAUDE_HAIKU_45 = "claude-haiku-4-5@20251001"; +const GCP_CLAUDE_OPUS_41 = "claude-opus-4-1@20250805"; +const GCP_CLAUDE_SONNET_4 = "claude-sonnet-4@20250514"; +const GCP_CLAUDE_OPUS_4 = "claude-opus-4@20250514"; +const GCP_CLAUDE_35_HAIKU = "claude-3-5-haiku@20241022"; let modelsCache: any = null; let modelsCacheTime = 0; @@ -33,10 +38,12 @@ const getModelsResponse = () => { // https://docs.anthropic.com/en/docs/about-claude/models const variants = [ - "claude-3-haiku@20240307", - "claude-3-sonnet@20240229", - "claude-3-opus@20240229", - "claude-3-5-sonnet@20240620", + GCP_CLAUDE_HAIKU_45, + GCP_CLAUDE_SONNET_45, + GCP_CLAUDE_OPUS_41, + GCP_CLAUDE_35_HAIKU, + GCP_CLAUDE_SONNET_4, + GCP_CLAUDE_OPUS_4, ]; const models = variants.map((id) => ({ @@ -147,6 +154,7 @@ gcpRouter.post( */ function maybeReassignModel(req: Request) { const model = req.body.model; + const lower = String(model).toLowerCase(); // If it looks like an GCP model, use it as-is // if (model.includes("anthropic.claude")) { @@ -154,6 +162,58 @@ function maybeReassignModel(req: Request) { return; } + if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) { + req.body.model = GCP_CLAUDE_OPUS_41; + return; + } + if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) { + req.body.model = GCP_CLAUDE_OPUS_41; + return; + } + if (lower.includes("opus-4")) { + req.body.model = GCP_CLAUDE_OPUS_4; + return; + } + if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) { + req.body.model = GCP_CLAUDE_HAIKU_45; + return; + } + if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) { + req.body.model = GCP_CLAUDE_SONNET_45; + return; + } + if (lower.includes("sonnet-4")) { + req.body.model = GCP_CLAUDE_SONNET_4; + return; + } + if (lower.includes("3-5") && lower.includes("haiku")) { + req.body.model = GCP_CLAUDE_35_HAIKU; + return; + } + if (lower.includes("opus")) { + req.body.model = GCP_CLAUDE_OPUS_41; + return; + } + if (lower.includes("haiku")) { + req.body.model = GCP_CLAUDE_HAIKU_45; + return; + } + if (lower.includes("sonnet")) { + req.body.model = GCP_CLAUDE_SONNET_45; + return; + } + + if ( + lower.startsWith("gpt-") || + lower.startsWith("o1") || + lower.startsWith("o3") || + lower.startsWith("o4") || + lower === "computer-use-preview" + ) { + req.body.model = GCP_CLAUDE_SONNET_45; + return; + } + // Anthropic model names can look like: // - claude-v1 // - claude-2.1 @@ -165,7 +225,7 @@ function maybeReassignModel(req: Request) { // If there's no match, fallback to Claude3 Sonnet as it is most likely to be // available on GCP. if (!match) { - req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`; + req.body.model = GCP_CLAUDE_SONNET_4; return; } @@ -176,20 +236,22 @@ function maybeReassignModel(req: Request) { case "3": case "3.0": if (name.includes("opus")) { - req.body.model = "claude-3-opus@20240229"; + req.body.model = GCP_CLAUDE_OPUS_41; } else if (name.includes("haiku")) { - req.body.model = "claude-3-haiku@20240307"; + req.body.model = GCP_CLAUDE_HAIKU_45; } else { - req.body.model = "claude-3-sonnet@20240229"; + req.body.model = GCP_CLAUDE_SONNET_45; } return; case "3.5": - req.body.model = "claude-3-5-sonnet@20240620"; + req.body.model = name.includes("haiku") + ? GCP_CLAUDE_35_HAIKU + : GCP_CLAUDE_SONNET_45; return; } - // Fallback to Claude3 Sonnet - req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`; + // Fallback to Claude Sonnet 4 + req.body.model = GCP_CLAUDE_SONNET_45; return; } diff --git a/src/proxy/google-ai.ts b/src/proxy/google-ai.ts index 62783e6..b396e11 100644 --- a/src/proxy/google-ai.ts +++ b/src/proxy/google-ai.ts @@ -16,6 +16,11 @@ import { ProxyResHandlerWithBody, } from "./middleware/response"; import { addGoogleAIKey } from "./middleware/request/preprocessors/add-google-ai-key"; +import { BadRequestError } from "../shared/errors"; +import { + flattenGoogleAIContentParts, + isGoogleAIImageModel, +} from "../shared/api-schemas"; let modelsCache: any = null; let modelsCacheTime = 0; @@ -31,10 +36,15 @@ const getModelsResponse = () => { if (!config.googleAIKey) return { object: "list", data: [] }; const googleAIVariants = [ - "gemini-pro", - "gemini-1.0-pro", + "gemini-2.5-pro", + "gemini-2.5-flash", + "gemini-2.5-flash-lite", + "gemini-2.5-flash-image", + "gemini-3-pro-image-preview", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash", "gemini-1.5-pro", - "gemini-1.5-pro-latest", + "gemini-1.5-flash", ]; const models = googleAIVariants.map((id) => ({ @@ -83,7 +93,8 @@ function transformGoogleAIResponse( ): Record { const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0); const parts = resBody.candidates[0].content?.parts ?? [{ text: "" }]; - const content = parts[0].text.replace(/^(.{0,50}?): /, () => ""); + const content = flattenGoogleAIContentParts(parts) + .replace(/^(.{0,50}?): /, () => ""); return { id: "goo-" + v4(), object: "chat.completion", @@ -136,14 +147,19 @@ googleAIRouter.post( googleAIProxy ); -/** Replaces requests for non-Google AI models with gemini-pro-1.5-latest. */ +/** Replaces requests for non-Google AI models with Gemini 2.5 Flash. */ function maybeReassignModel(req: Request) { const requested = req.body.model; if (requested.includes("gemini")) { + if (req.body.stream && isGoogleAIImageModel(requested)) { + throw new BadRequestError( + "Streaming Gemini image-generation models is not currently supported by this proxy. Retry without `stream: true`." + ); + } return; } - req.log.info({ requested }, "Reassigning model to gemini-pro-1.5-latest"); - req.body.model = "gemini-pro-1.5-latest"; + req.log.info({ requested }, "Reassigning model to gemini-2.5-flash"); + req.body.model = "gemini-2.5-flash"; } export const googleAI = googleAIRouter; diff --git a/src/proxy/middleware/common.ts b/src/proxy/middleware/common.ts index 0274522..f58aba0 100644 --- a/src/proxy/middleware/common.ts +++ b/src/proxy/middleware/common.ts @@ -5,10 +5,15 @@ import { ZodError } from "zod"; import { generateErrorMessage } from "zod-error"; import { HttpError } from "../../shared/errors"; import { assertNever } from "../../shared/utils"; +import { + flattenGoogleAIContentParts, + flattenOpenAIResponsesOutput, +} from "../../shared/api-schemas"; import { QuotaExceededError } from "./request/preprocessors/apply-quota-limits"; import { sendErrorToClient } from "./response/error-generator"; const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions"; +const OPENAI_RESPONSES_ENDPOINT = "/v1/responses"; const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions"; const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings"; const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations"; @@ -22,6 +27,7 @@ export function isTextGenerationRequest(req: Request) { req.method === "POST" && [ OPENAI_CHAT_COMPLETION_ENDPOINT, + OPENAI_RESPONSES_ENDPOINT, OPENAI_TEXT_COMPLETION_ENDPOINT, ANTHROPIC_COMPLETION_ENDPOINT, ANTHROPIC_MESSAGES_ENDPOINT, @@ -224,6 +230,8 @@ export function getCompletionFromBody(req: Request, body: Record) { // Can be null if the model wants to invoke tools rather than return a // completion. return body.choices[0].message.content || ""; + case "openai-responses": + return flattenOpenAIResponsesOutput(body); case "openai-text": return body.choices[0].text; case "anthropic-chat": @@ -252,7 +260,7 @@ export function getCompletionFromBody(req: Request, body: Record) { if ("choices" in body) { return body.choices[0].message.content; } - return body.candidates[0].content.parts[0].text; + return flattenGoogleAIContentParts(body.candidates?.[0]?.content?.parts); case "openai-image": return body.data?.map((item: any) => item.url).join("\n"); default: @@ -267,6 +275,8 @@ export function getModelFromBody(req: Request, body: Record) { case "openai-text": case "mistral-ai": return body.model; + case "openai-responses": + return body.model || req.body.model; case "openai-image": return req.body.model; case "anthropic-chat": diff --git a/src/proxy/middleware/request/onproxyreq/add-key.ts b/src/proxy/middleware/request/onproxyreq/add-key.ts index 27b2dc3..4369264 100644 --- a/src/proxy/middleware/request/onproxyreq/add-key.ts +++ b/src/proxy/middleware/request/onproxyreq/add-key.ts @@ -47,6 +47,7 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => { assignedKey = keyPool.get("dall-e-3", service); break; case "openai": + case "openai-responses": case "google-ai": case "mistral-ai": throw new Error( @@ -109,9 +110,10 @@ export const addKeyForEmbeddingsRequest: HPMRequestCallback = ( throw new Error("Embeddings requests must be from OpenAI"); } - req.body = { input: req.body.input, model: "text-embedding-ada-002" }; + const model = req.body.model || "text-embedding-3-small"; + req.body = { input: req.body.input, model }; - const key = keyPool.get("text-embedding-ada-002", "openai") as OpenAIKey; + const key = keyPool.get(model, "openai") as OpenAIKey; req.key = key; req.log.info( diff --git a/src/proxy/middleware/request/preprocessors/add-azure-key.ts b/src/proxy/middleware/request/preprocessors/add-azure-key.ts index a0a0c19..6009b2f 100644 --- a/src/proxy/middleware/request/preprocessors/add-azure-key.ts +++ b/src/proxy/middleware/request/preprocessors/add-azure-key.ts @@ -6,7 +6,7 @@ import { import { RequestPreprocessor } from "../index"; export const addAzureKey: RequestPreprocessor = (req) => { - const validAPIs: APIFormat[] = ["openai", "openai-image"]; + const validAPIs: APIFormat[] = ["openai", "openai-responses", "openai-image"]; const apisValid = [req.outboundApi, req.inboundApi].every((api) => validAPIs.includes(api) ); @@ -50,6 +50,23 @@ export const addAzureKey: RequestPreprocessor = (req) => { const cred = req.key as AzureOpenAIKey; const { resourceName, deploymentId, apiKey } = getCredentialsFromKey(cred); + if (req.outboundApi === "openai-responses") { + req.body.model = deploymentId; + req.signedRequest = { + method: "POST", + protocol: "https:", + hostname: `${resourceName}.openai.azure.com`, + path: `/openai/v1/responses?api-version=preview`, + headers: { + ["host"]: `${resourceName}.openai.azure.com`, + ["content-type"]: "application/json", + ["api-key"]: apiKey, + }, + body: JSON.stringify(req.body), + }; + return; + } + const operation = req.outboundApi === "openai" ? "/chat/completions" : "/images/generations"; const apiVersion = diff --git a/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts b/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts index 130bedf..1ea31f2 100644 --- a/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts +++ b/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts @@ -6,6 +6,7 @@ import { GoogleAIChatMessage, MistralAIChatMessage, OpenAIChatMessage, + flattenOpenAIResponsesInput, } from "../../../../shared/api-schemas"; /** @@ -18,11 +19,23 @@ export const countPromptTokens: RequestPreprocessor = async (req) => { switch (service) { case "openai": { - req.outputTokens = req.body.max_tokens; + req.outputTokens = + req.body.max_completion_tokens ?? req.body.max_tokens ?? 0; const prompt: OpenAIChatMessage[] = req.body.messages; result = await countTokens({ req, prompt, service }); break; } + case "openai-responses": { + req.outputTokens = req.body.max_output_tokens ?? 0; + const prompt = [ + flattenOpenAIResponsesInput(req.body.instructions), + flattenOpenAIResponsesInput(req.body.input), + ] + .filter(Boolean) + .join("\n\n"); + result = await countTokens({ req, prompt, service }); + break; + } case "openai-text": { req.outputTokens = req.body.max_tokens; const prompt: string = req.body.prompt; diff --git a/src/proxy/middleware/request/preprocessors/language-filter.ts b/src/proxy/middleware/request/preprocessors/language-filter.ts index 9610cb4..6aae3b0 100644 --- a/src/proxy/middleware/request/preprocessors/language-filter.ts +++ b/src/proxy/middleware/request/preprocessors/language-filter.ts @@ -4,8 +4,10 @@ import { assertNever } from "../../../../shared/utils"; import { RequestPreprocessor } from "../index"; import { BadRequestError } from "../../../../shared/errors"; import { + GoogleAIChatMessage, MistralAIChatMessage, OpenAIChatMessage, + flattenOpenAIResponsesInput, flattenAnthropicMessages, } from "../../../../shared/api-schemas"; @@ -72,11 +74,27 @@ function getPromptFromRequest(req: Request) { return `${msg.role}: ${text}`; }) .join("\n\n"); + case "openai-responses": + return [ + flattenOpenAIResponsesInput(body.instructions), + flattenOpenAIResponsesInput(body.input), + ] + .filter(Boolean) + .join("\n\n"); case "openai-text": case "openai-image": return body.prompt; case "google-ai": - return body.prompt.text; + return body.contents + .map(({ parts, role }: GoogleAIChatMessage) => { + const text = parts + .map((part: any) => + "text" in part ? part.text : "[image omitted]" + ) + .join("\n"); + return `${role}: ${text}`; + }) + .join("\n\n"); default: assertNever(service); } diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts index ec87f46..b589200 100644 --- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts +++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts @@ -6,8 +6,8 @@ import { RequestPreprocessor } from "../index"; const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic; const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI; -const GOOGLE_AI_MAX_CONTEXT = 32000; -const MISTRAL_AI_MAX_CONTENT = 32768; +const GOOGLE_AI_MAX_CONTEXT = 1048576; +const MISTRAL_AI_MAX_CONTENT = 256000; /** * Assigns `req.promptTokens` and `req.outputTokens` based on the request body @@ -26,6 +26,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => { let proxyMax: number; switch (req.outboundApi) { case "openai": + case "openai-responses": case "openai-text": proxyMax = OPENAI_MAX_CONTEXT; break; @@ -54,6 +55,12 @@ export const validateContextSize: RequestPreprocessor = async (req) => { let modelMax: number; if (model.match(/gpt-3.5-turbo-16k/)) { modelMax = 16384; + } else if (model.match(/^gpt-5(\.|-|\b)/)) { + modelMax = 1050000; + } else if (model.match(/^o\d/)) { + modelMax = 200000; + } else if (model.match(/^gpt-4\.1/)) { + modelMax = 1047576; } else if (model.match(/^gpt-4o/)) { modelMax = 128000; } else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) { @@ -80,12 +87,27 @@ export const validateContextSize: RequestPreprocessor = async (req) => { modelMax = 200000; } else if (model.match(/^claude-3/)) { modelMax = 200000; + } else if (model.match(/^claude-(opus|sonnet|haiku)-4/)) { + modelMax = 200000; } else if (model.match(/^gemini-\d{3}$/)) { modelMax = GOOGLE_AI_MAX_CONTEXT; + } else if (model.match(/^gemini-(2\.5|2\.0)/)) { + modelMax = 1048576; } else if (model.match(/^mistral-(tiny|small|medium)$/)) { modelMax = MISTRAL_AI_MAX_CONTENT; + } else if ( + model.match( + /^(mistral|ministral|magistral|pixtral|codestral|devstral|voxtral)-/ + ) + ) { + modelMax = MISTRAL_AI_MAX_CONTENT; } else if (model.match(/^anthropic\.claude-3/)) { modelMax = 200000; + } else if ( + model.match(/^anthropic\.claude-(opus|sonnet|haiku)-4/) || + model.match(/^claude-(opus|sonnet|haiku)-4@/) + ) { + modelMax = 200000; } else if (model.match(/^anthropic\.claude-v2:\d/)) { modelMax = 200000; } else if (model.match(/^anthropic\.claude/)) { @@ -121,8 +143,8 @@ function assertRequestHasTokenCounts( req: Request ): asserts req is Request & { promptTokens: number; outputTokens: number } { z.object({ - promptTokens: z.number().int().min(1), - outputTokens: z.number().int().min(1), + promptTokens: z.number().int().min(0), + outputTokens: z.number().int().min(0), }) .nonstrict() .parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens }); diff --git a/src/proxy/middleware/request/preprocessors/validate-vision.ts b/src/proxy/middleware/request/preprocessors/validate-vision.ts index b72f6fd..e90d5e9 100644 --- a/src/proxy/middleware/request/preprocessors/validate-vision.ts +++ b/src/proxy/middleware/request/preprocessors/validate-vision.ts @@ -3,6 +3,7 @@ import { assertNever } from "../../../../shared/utils"; import { RequestPreprocessor } from "../index"; import { containsImageContent as containsImageContentOpenAI } from "../../../../shared/api-schemas/openai"; import { containsImageContent as containsImageContentAnthropic } from "../../../../shared/api-schemas/anthropic"; +import { containsOpenAIResponsesImageInput } from "../../../../shared/api-schemas"; import { ForbiddenError } from "../../../../shared/errors"; /** @@ -22,11 +23,20 @@ export const validateVision: RequestPreprocessor = async (req) => { case "openai": hasImage = containsImageContentOpenAI(req.body.messages); break; + case "openai-responses": + hasImage = + containsOpenAIResponsesImageInput(req.body.instructions) || + containsOpenAIResponsesImageInput(req.body.input); + break; case "anthropic-chat": hasImage = containsImageContentAnthropic(req.body.messages); break; - case "anthropic-text": case "google-ai": + hasImage = req.body.contents?.some((message: { parts: any[] }) => + message.parts?.some((part) => "inline_data" in part) + ); + break; + case "anthropic-text": case "mistral-ai": case "openai-image": case "openai-text": diff --git a/src/proxy/middleware/response/error-generator.ts b/src/proxy/middleware/response/error-generator.ts index c5c9735..1dea651 100644 --- a/src/proxy/middleware/response/error-generator.ts +++ b/src/proxy/middleware/response/error-generator.ts @@ -72,7 +72,15 @@ type ErrorGeneratorOptions = { }; export function tryInferFormat(body: any): APIFormat | "unknown" { - if (typeof body !== "object" || !body.model) { + if (typeof body !== "object") { + return "unknown"; + } + + if (body.object === "response" || Array.isArray(body.output)) { + return "openai-responses"; + } + + if (!body.model) { return "unknown"; } @@ -158,7 +166,30 @@ export function buildSpoofedCompletion({ switch (format) { case "openai": + case "openai-responses": case "mistral-ai": + if (format === "openai-responses") { + return { + id: "error-" + id, + object: "response", + created_at: Math.floor(Date.now() / 1000), + model, + status: "completed", + error: null, + incomplete_details: null, + output_text: content, + output: [ + { + id: "msg-error-" + id, + type: "message", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text: content, annotations: [] }], + }, + ], + usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 }, + }; + } return { id: "error-" + id, object: "chat.completion", @@ -248,7 +279,23 @@ export function buildSpoofedSSE({ switch (format) { case "openai": + case "openai-responses": case "mistral-ai": + if (format === "openai-responses") { + return ( + `data: ${JSON.stringify({ + type: "response.completed", + response: buildSpoofedCompletion({ + format, + title, + message, + obj, + reqId, + model, + }), + })}\n\n` + ); + } event = { id: "chatcmpl-" + id, object: "chat.completion.chunk", diff --git a/src/proxy/middleware/response/log-prompt.ts b/src/proxy/middleware/response/log-prompt.ts index cfa2aa0..42ab689 100644 --- a/src/proxy/middleware/response/log-prompt.ts +++ b/src/proxy/middleware/response/log-prompt.ts @@ -11,6 +11,7 @@ import { ProxyResHandlerWithBody } from "."; import { assertNever } from "../../../shared/utils"; import { AnthropicChatMessage, + flattenOpenAIResponsesInput, flattenAnthropicMessages, GoogleAIChatMessage, MistralAIChatMessage, OpenAIChatMessage, @@ -62,6 +63,7 @@ const getPromptForRequest = ( ): | string | OpenAIChatMessage[] + | { instructions?: unknown; input?: unknown } | { contents: GoogleAIChatMessage[] } | { system: string; messages: AnthropicChatMessage[] } | MistralAIChatMessage[] @@ -73,6 +75,11 @@ const getPromptForRequest = ( case "openai": case "mistral-ai": return req.body.messages; + case "openai-responses": + return { + instructions: req.body.instructions, + input: req.body.input, + }; case "anthropic-chat": return { system: req.body.system, messages: req.body.messages }; case "openai-text": @@ -99,6 +106,7 @@ const flattenMessages = ( | string | OaiImageResult | OpenAIChatMessage[] + | { instructions?: unknown; input?: unknown } | { contents: GoogleAIChatMessage[] } | { system: string; messages: AnthropicChatMessage[] } | MistralAIChatMessage[] @@ -114,12 +122,20 @@ const flattenMessages = ( return val.contents .map(({ parts, role }) => { const text = parts - .map((p) => p.text) + .map((p: any) => ("text" in p ? p.text : "(( Attached Image ))")) .join("\n"); return `${role}: ${text}`; }) .join("\n"); } + if (isOpenAIResponsesPrompt(val)) { + return [ + flattenOpenAIResponsesInput(val.instructions), + flattenOpenAIResponsesInput(val.input), + ] + .filter(Boolean) + .join("\n\n"); + } if (Array.isArray(val)) { return val .map(({ content, role }) => { @@ -140,6 +156,16 @@ const flattenMessages = ( return val.prompt.trim(); }; +function isOpenAIResponsesPrompt( + val: unknown +): val is { instructions?: unknown; input?: unknown } { + return ( + typeof val === "object" && + val !== null && + ("instructions" in val || "input" in val) + ); +} + function isGoogleAIChatPrompt( val: unknown ): val is { contents: GoogleAIChatMessage[] } { diff --git a/src/proxy/middleware/response/streaming/event-aggregator.ts b/src/proxy/middleware/response/streaming/event-aggregator.ts index d80c738..9b630b7 100644 --- a/src/proxy/middleware/response/streaming/event-aggregator.ts +++ b/src/proxy/middleware/response/streaming/event-aggregator.ts @@ -8,6 +8,7 @@ import { mergeEventsForOpenAIText, AnthropicV2StreamEvent, OpenAIChatCompletionStreamEvent, + OpenAIResponsesStreamEvent, } from "./index"; /** @@ -17,13 +18,36 @@ import { export class EventAggregator { private readonly format: APIFormat; private readonly events: OpenAIChatCompletionStreamEvent[]; + private responseBody: Record | null; + private responseEventCount: number; + private responseOutputText: string; constructor({ format }: { format: APIFormat }) { this.events = []; this.format = format; + this.responseBody = null; + this.responseEventCount = 0; + this.responseOutputText = ""; } - addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) { + addEvent( + event: + | OpenAIChatCompletionStreamEvent + | AnthropicV2StreamEvent + | OpenAIResponsesStreamEvent + ) { + if (eventIsOpenAIResponsesEvent(event)) { + this.responseEventCount++; + if (event.response && typeof event.response === "object") { + this.responseBody = event.response; + } + + if (event.type === "response.output_text.delta") { + this.responseOutputText += event.delta || event.text || ""; + } + return; + } + if (eventIsOpenAIEvent(event)) { this.events.push(event); } else { @@ -52,8 +76,15 @@ export class EventAggregator { getFinalResponse() { switch (this.format) { case "openai": + case "openai-responses": case "google-ai": case "mistral-ai": + if (this.format === "openai-responses") { + if (this.responseBody) { + return this.responseBody; + } + return { output_text: this.responseOutputText }; + } return mergeEventsForOpenAIChat(this.events); case "openai-text": return mergeEventsForOpenAIText(this.events); @@ -69,7 +100,7 @@ export class EventAggregator { } hasEvents() { - return this.events.length > 0; + return this.events.length > 0 || this.responseEventCount > 0; } } @@ -78,3 +109,9 @@ function eventIsOpenAIEvent( ): event is OpenAIChatCompletionStreamEvent { return event?.object === "chat.completion.chunk"; } + +function eventIsOpenAIResponsesEvent( + event: any +): event is OpenAIResponsesStreamEvent { + return typeof event?.type === "string" && event.type.startsWith("response."); +} diff --git a/src/proxy/middleware/response/streaming/index.ts b/src/proxy/middleware/response/streaming/index.ts index 402c233..c510d3e 100644 --- a/src/proxy/middleware/response/streaming/index.ts +++ b/src/proxy/middleware/response/streaming/index.ts @@ -26,6 +26,14 @@ export type OpenAIChatCompletionStreamEvent = { }[]; }; +export type OpenAIResponsesStreamEvent = { + type: string; + response?: Record; + delta?: string; + text?: string; + [key: string]: any; +}; + export type StreamingCompletionTransformer< T = OpenAIChatCompletionStreamEvent, S = any, @@ -42,6 +50,7 @@ export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-ant export { anthropicChatToOpenAI } from "./transformers/anthropic-chat-to-openai"; export { googleAIToOpenAI } from "./transformers/google-ai-to-openai"; export { passthroughToOpenAI } from "./transformers/passthrough-to-openai"; +export { passthroughToOpenAIResponses } from "./transformers/passthrough-to-openai-responses"; export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat"; export { mergeEventsForOpenAIText } from "./aggregators/openai-text"; export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text"; diff --git a/src/proxy/middleware/response/streaming/sse-message-transformer.ts b/src/proxy/middleware/response/streaming/sse-message-transformer.ts index 800b286..23582f7 100644 --- a/src/proxy/middleware/response/streaming/sse-message-transformer.ts +++ b/src/proxy/middleware/response/streaming/sse-message-transformer.ts @@ -10,8 +10,10 @@ import { anthropicV2ToOpenAI, googleAIToOpenAI, OpenAIChatCompletionStreamEvent, + OpenAIResponsesStreamEvent, openAITextToOpenAIChat, passthroughToOpenAI, + passthroughToOpenAIResponses, StreamingCompletionTransformer, } from "./index"; @@ -35,7 +37,9 @@ export class SSEMessageTransformer extends Transform { private readonly inputFormat: APIFormat; private readonly transformFn: StreamingCompletionTransformer< // TODO: Refactor transformers to not assume only OpenAI events as output - OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent + | OpenAIChatCompletionStreamEvent + | AnthropicV2StreamEvent + | OpenAIResponsesStreamEvent >; private readonly log; private readonly fallbackId: string; @@ -126,12 +130,14 @@ function getTransformer( // used for that case. requestApi: APIFormat = "openai" ): StreamingCompletionTransformer< - OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent + OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent | OpenAIResponsesStreamEvent > { switch (responseApi) { case "openai": case "mistral-ai": return passthroughToOpenAI; + case "openai-responses": + return passthroughToOpenAIResponses; case "openai-text": return openAITextToOpenAIChat; case "anthropic-text": diff --git a/src/proxy/middleware/response/streaming/transformers/passthrough-to-openai-responses.ts b/src/proxy/middleware/response/streaming/transformers/passthrough-to-openai-responses.ts new file mode 100644 index 0000000..360a4d0 --- /dev/null +++ b/src/proxy/middleware/response/streaming/transformers/passthrough-to-openai-responses.ts @@ -0,0 +1,43 @@ +import { + OpenAIResponsesStreamEvent, + SSEResponseTransformArgs, + StreamingCompletionTransformer, +} from "../index"; +import { parseEvent, ServerSentEvent } from "../parse-sse"; +import { logger } from "../../../../../logger"; + +const log = logger.child({ + module: "sse-transformer", + transformer: "openai-responses-to-openai-responses", +}); + +export const passthroughToOpenAIResponses: StreamingCompletionTransformer< + OpenAIResponsesStreamEvent +> = ( + params: SSEResponseTransformArgs +) => { + const { data } = params; + + const rawEvent = parseEvent(data); + if (!rawEvent.data || rawEvent.data === "[DONE]") { + return { position: -1 }; + } + + const responseEvent = asResponseEvent(rawEvent); + if (!responseEvent) { + return { position: -1 }; + } + + return { position: -1, event: responseEvent }; +}; + +function asResponseEvent( + event: ServerSentEvent +): OpenAIResponsesStreamEvent | null { + try { + return JSON.parse(event.data) as OpenAIResponsesStreamEvent; + } catch (error) { + log.warn({ error: error.stack, event }, "Received invalid event"); + } + return null; +} diff --git a/src/proxy/mistral-ai.ts b/src/proxy/mistral-ai.ts index 1e520cc..e18d85e 100644 --- a/src/proxy/mistral-ai.ts +++ b/src/proxy/mistral-ai.ts @@ -24,25 +24,29 @@ import { // https://docs.mistral.ai/platform/endpoints export const KNOWN_MISTRAL_AI_MODELS = [ - // Mistral 7b (open weight, legacy) - "open-mistral-7b", - "mistral-tiny-2312", - // Mixtral 8x7b (open weight, legacy) - "open-mixtral-8x7b", - "mistral-small-2312", - // Mixtral Small (newer 8x7b, closed weight) "mistral-small-latest", - "mistral-small-2402", - // Mistral Medium + "mistral-small-2603", + "mistral-small-2506", "mistral-medium-latest", - "mistral-medium-2312", - // Mistral Large + "mistral-medium-2508", + "mistral-medium-2505", + "magistral-medium-latest", + "magistral-medium-2507", + "magistral-small-2507", "mistral-large-latest", - "mistral-large-2402", - // Deprecated identifiers (2024-05-01) - "mistral-tiny", - "mistral-small", - "mistral-medium", + "mistral-large-2512", + "ministral-14b-2512", + "ministral-8b-latest", + "ministral-8b-2512", + "ministral-3b-latest", + "ministral-3b-2512", + "pixtral-large-latest", + "pixtral-large-2411", + "codestral-latest", + "codestral-2508", + "devstral-small-latest", + "devstral-small-2507", + "devstral-medium-2507", ]; let modelsCache: any = null; diff --git a/src/proxy/openai-image.ts b/src/proxy/openai-image.ts index a50160e..0761766 100644 --- a/src/proxy/openai-image.ts +++ b/src/proxy/openai-image.ts @@ -18,7 +18,13 @@ import { import { generateModelList } from "./openai"; import { OpenAIImageGenerationResult } from "../shared/file-storage/mirror-generated-image"; -const KNOWN_MODELS = ["dall-e-2", "dall-e-3"]; +const KNOWN_MODELS = [ + "dall-e-2", + "dall-e-3", + "gpt-image-1.5", + "gpt-image-1", + "gpt-image-1-mini", +]; let modelListCache: any = null; let modelListValid = 0; diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts index 5f16b44..59822ad 100644 --- a/src/proxy/openai.ts +++ b/src/proxy/openai.ts @@ -28,28 +28,57 @@ import { // https://platform.openai.com/docs/models/overview export const KNOWN_OPENAI_MODELS = [ + "gpt-5.2", + "gpt-5.2-chat", + "gpt-5.2-chat-latest", + "gpt-5.2-pro", + "gpt-5.2-codex", + "gpt-5.1", + "gpt-5.1-chat", + "gpt-5.1-codex", + "gpt-5.1-codex-mini", + "gpt-5.1-codex-max", + "gpt-5", + "gpt-5-chat", + "gpt-5-pro", + "gpt-5-codex", + "gpt-5-mini", + "gpt-5-nano", + "gpt-4.1", + "gpt-4.1-2025-04-14", + "gpt-4.1-mini", + "gpt-4.1-nano", + "o3-pro", + "o3-deep-research", + "computer-use-preview", + "o4-mini", + "o4-mini-deep-research", + "o3", + "o3-mini", + "o1", + "o1-pro", "gpt-4o", + "gpt-4o-2024-08-06", + "gpt-4o-mini", "gpt-4o-2024-05-13", "gpt-4-turbo", // alias for latest gpt4-turbo stable "gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision - "gpt-4-turbo-preview", // alias for latest turbo preview - "gpt-4-0125-preview", // gpt4-turbo preview 2 - "gpt-4-1106-preview", // gpt4-turbo preview 1 - "gpt-4-vision-preview", // gpt4-turbo preview 1 with vision "gpt-4", "gpt-4-0613", - "gpt-4-0314", // EOL 2024-06-13 "gpt-4-32k", - "gpt-4-32k-0314", // EOL 2024-06-13 "gpt-4-32k-0613", "gpt-3.5-turbo", - "gpt-3.5-turbo-0301", // EOL 2024-06-13 "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914", + "text-embedding-3-small", + "text-embedding-3-large", "text-embedding-ada-002", + "gpt-image-1.5", + "gpt-image-1", + "gpt-image-1-mini", + "dall-e-3", + "dall-e-2", ]; let modelsCache: any = null; @@ -59,11 +88,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) { // Get available families and snapshots let availableFamilies = new Set(); const availableSnapshots = new Set(); + const availableModelIds = new Set(); for (const key of keyPool.list()) { if (key.isDisabled || key.service !== "openai") continue; const asOpenAIKey = key as OpenAIKey; asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f)); asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s)); + asOpenAIKey.modelIds.forEach((id) => availableModelIds.add(id)); } // Remove disabled families @@ -71,8 +102,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) { availableFamilies = new Set( [...availableFamilies].filter((x) => allowed.has(x)) ); + const usingExactModelIds = availableModelIds.size > 0; - return models + const sourceModels = usingExactModelIds + ? [...new Set([...models, ...availableModelIds])] + : models; + + return sourceModels .map((id) => ({ id, object: "model", @@ -92,6 +128,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) { parent: null, })) .filter((model) => { + if (usingExactModelIds) { + return ( + allowed.has(getOpenAIModelFamily(model.id)) && + availableModelIds.has(model.id) + ); + } + // First check if the family is available const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id)); if (!hasFamily) return false; @@ -233,6 +276,16 @@ openaiRouter.post( }), openaiProxy ); +openaiRouter.post( + "/v1/responses", + ipLimiter, + createPreprocessorMiddleware({ + inApi: "openai-responses", + outApi: "openai-responses", + service: "openai", + }), + openaiProxy +); // Embeddings endpoint. openaiRouter.post( "/v1/embeddings", diff --git a/src/shared/api-schemas/anthropic.ts b/src/shared/api-schemas/anthropic.ts index bb279d2..08f9484 100644 --- a/src/shared/api-schemas/anthropic.ts +++ b/src/shared/api-schemas/anthropic.ts @@ -31,18 +31,24 @@ export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge( .int() .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)), }) -); +).passthrough(); const AnthropicV1MessageMultimodalContentSchema = z.array( z.union([ z.object({ type: z.literal("text"), text: z.string() }), z.object({ type: z.literal("image"), - source: z.object({ - type: z.literal("base64"), - media_type: z.string().max(100), - data: z.string(), - }), + source: z.union([ + z.object({ + type: z.literal("base64"), + media_type: z.string().max(100), + data: z.string(), + }), + z.object({ + type: z.literal("url"), + url: z.string().url(), + }), + ]), }), ]) ); @@ -65,7 +71,7 @@ export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge( .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)), system: z.string().optional(), }) -); +).passthrough(); export type AnthropicChatMessage = z.infer< typeof AnthropicV1MessagesSchema >["messages"][0]; @@ -77,7 +83,7 @@ function openAIMessagesToClaudeTextPrompt(messages: OpenAIChatMessage[]) { let role: string = m.role; if (role === "assistant") { role = "Assistant"; - } else if (role === "system") { + } else if (role === "system" || role === "developer") { role = "System"; } else if (role === "user") { role = "Human"; @@ -115,12 +121,13 @@ export const transformOpenAIToAnthropicChat: APIFormatTransformer< system, messages: newMessages, model: rest.model, - max_tokens: rest.max_tokens, + max_tokens: rest.max_completion_tokens ?? rest.max_tokens, stream: rest.stream, temperature: rest.temperature, top_p: rest.top_p, stop_sequences: typeof rest.stop === "string" ? [rest.stop] : rest.stop || undefined, + ...(rest.thinking ? { thinking: rest.thinking } : {}), ...(rest.user ? { metadata: { user_id: rest.user } } : {}), // Anthropic supports top_k, but OpenAI does not // OpenAI supports frequency_penalty, presence_penalty, logit_bias, n, seed, @@ -162,7 +169,7 @@ export const transformOpenAIToAnthropicText: APIFormatTransformer< return { model: rest.model, prompt: prompt, - max_tokens_to_sample: rest.max_tokens, + max_tokens_to_sample: rest.max_completion_tokens ?? rest.max_tokens, stop_sequences: stops, stream: rest.stream, temperature: rest.temperature, @@ -366,7 +373,7 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): { // Here we will lose the original name if it was a system message, but that // is generally okay because the system message is usually a prompt and not // a character in the chat. - const name = msg.role === "system" ? "System" : msg.name?.trim(); + const name = isSystemOpenAIRole(msg.role) ? "System" : msg.name?.trim(); const content = convertOpenAIContent(msg.content); // Prepend the display name to the first text content in the current message @@ -396,8 +403,8 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): { function isSystemOpenAIRole( role: OpenAIChatMessage["role"] -): role is "system" | "function" | "tool" { - return ["system", "function", "tool"].includes(role); +): role is "system" | "developer" | "function" | "tool" { + return ["system", "developer", "function", "tool"].includes(role); } function getFirstTextContent(content: OpenAIChatMessage["content"]) { diff --git a/src/shared/api-schemas/google-ai.ts b/src/shared/api-schemas/google-ai.ts index 2e23759..ca96e03 100644 --- a/src/shared/api-schemas/google-ai.ts +++ b/src/shared/api-schemas/google-ai.ts @@ -1,42 +1,62 @@ import { z } from "zod"; import { flattenOpenAIMessageContent, + OpenAIChatMessage, OpenAIV1ChatCompletionSchema, } from "./openai"; import { APIFormatTransformer } from "./index"; +const GoogleAIContentPartSchema = z.union([ + z.object({ text: z.string() }), + z.object({ + inline_data: z.object({ + mime_type: z.string().max(100), + data: z.string(), + }), + }), +]); + // https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent export const GoogleAIV1GenerateContentSchema = z .object({ - model: z.string().max(100), //actually specified in path but we need it for the router + model: z.string().max(100), // actually specified in path but we need it for the router stream: z.boolean().optional().default(false), // also used for router contents: z.array( z.object({ - parts: z.array(z.object({ text: z.string() })), + parts: z.array(GoogleAIContentPartSchema), role: z.enum(["user", "model"]), }) ), - tools: z.array(z.object({})).max(0).optional(), - safetySettings: z.array(z.object({})).max(0).optional(), + tools: z.array(z.any()).optional(), + toolConfig: z.any().optional(), + safetySettings: z.array(z.any()).optional(), + systemInstruction: z.any().optional(), generationConfig: z.object({ temperature: z.number().optional(), maxOutputTokens: z.coerce .number() .int() .optional() - .default(16) - .transform((v) => Math.min(v, 1024)), // TODO: Add config + .default(1024) + .transform((v) => Math.min(v, 65536)), candidateCount: z.literal(1).optional(), topP: z.number().optional(), topK: z.number().optional(), + responseMimeType: z.string().optional(), + responseSchema: z.any().optional(), + responseJsonSchema: z.any().optional(), + responseModalities: z.array(z.string()).optional(), + thinkingConfig: z.any().optional(), stopSequences: z.array(z.string().max(500)).max(5).optional(), }), }) - .strip(); + .passthrough(); export type GoogleAIChatMessage = z.infer< typeof GoogleAIV1GenerateContentSchema >["contents"][0]; +type GoogleAIPart = GoogleAIChatMessage["parts"][number]; + export const transformOpenAIToGoogleAI: APIFormatTransformer< typeof GoogleAIV1GenerateContentSchema > = async (req) => { @@ -54,40 +74,51 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer< } const { messages, ...rest } = result.data; + const systemMessages = messages.filter( + (m) => m.role === "system" || m.role === "developer" + ); const foundNames = new Set(); + const model = req.body.model; + const customThinkingConfig = + getObjectField(body, "thinkingConfig") ?? + getObjectField(getObjectField(body, "generationConfig"), "thinkingConfig"); + const customResponseModalities = getStringArrayField( + getObjectField(body, "generationConfig"), + "responseModalities" + ); const contents = messages + .filter((m) => m.role !== "system" && m.role !== "developer") .map((m) => { const role = m.role === "assistant" ? "model" : "user"; - // Detects character names so we can set stop sequences for them as Gemini - // is prone to continuing as the next character. - // If names are not available, we'll still try to prefix the message - // with generic names so we can set stops for them but they don't work - // as well as real names. - const text = flattenOpenAIMessageContent(m.content); + const parts = convertOpenAIContent(m.content); + const text = parts + .map((part) => ("text" in part ? part.text : "")) + .join("\n"); const propName = m.name?.trim(); - const textName = - m.role === "system" ? "" : text.match(/^(.{0,50}?): /)?.[1]?.trim(); - const name = - propName || textName || (role === "model" ? "Character" : "User"); + const textName = text.match(/^(.{0,50}?): /)?.[1]?.trim(); + const name = propName || textName || (role === "model" ? "Character" : "User"); foundNames.add(name); - // Prefixing messages with their character name seems to help avoid - // Gemini trying to continue as the next character, or at the very least - // ensures it will hit the stop sequence. Otherwise it will start a new - // paragraph and switch perspectives. - // The response will be very likely to include this prefix so frontends - // will need to strip it out. + // Prefixing speaker names helps Gemini avoid continuing as the next + // character in multi-party roleplay/chat prompts. const textPrefix = textName ? "" : `${name}: `; + const firstTextPart = parts.find( + (part): part is Extract => "text" in part + ); + if (firstTextPart) { + firstTextPart.text = textPrefix + firstTextPart.text; + } + return { - parts: [{ text: textPrefix + text }], + parts, role: m.role === "assistant" ? ("model" as const) : ("user" as const), }; }) .reduce((acc, msg) => { const last = acc[acc.length - 1]; if (last?.role === msg.role) { - last.parts[0].text += "\n\n" + msg.parts[0].text; + last.parts.push(...msg.parts); } else { acc.push(msg); } @@ -102,17 +133,44 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer< stops.push(...Array.from(foundNames).map((name) => `\n${name}:`)); stops = [...new Set(stops)].slice(0, 5); + const responseFormat = rest.response_format as Record | undefined; + const maxOutputTokens = + rest.max_completion_tokens ?? rest.max_tokens ?? 1024; + return { - model: req.body.model, + model, stream: rest.stream, contents, - tools: [], + tools: Array.isArray(rest.tools) ? rest.tools : undefined, + systemInstruction: systemMessages.length + ? { + parts: [ + { + text: systemMessages + .map((msg) => flattenOpenAIMessageContent(msg.content)) + .join("\n\n"), + }, + ], + } + : undefined, generationConfig: { - maxOutputTokens: rest.max_tokens, + maxOutputTokens, stopSequences: stops, topP: rest.top_p, - topK: 40, // openai schema doesn't have this, google ai defaults to 40 + topK: 40, // OpenAI schema doesn't expose this; Gemini defaults to 40. temperature: rest.temperature, + responseMimeType: + responseFormat?.type === "json_object" || + responseFormat?.type === "json_schema" + ? "application/json" + : undefined, + responseSchema: responseFormat?.json_schema?.schema, + responseJsonSchema: responseFormat?.json_schema?.schema, + responseModalities: + customResponseModalities ?? + (isGoogleAIImageModel(model) ? ["TEXT", "IMAGE"] : undefined), + thinkingConfig: + customThinkingConfig ?? getThinkingConfig(model, rest.reasoning_effort), }, safetySettings: [ { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" }, @@ -122,3 +180,117 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer< ], }; }; + +function convertOpenAIContent( + content: OpenAIChatMessage["content"] +): GoogleAIPart[] { + if (typeof content === "string") { + return [{ text: content }]; + } + + return content.map((item) => { + if ("text" in item) { + return { text: item.text }; + } + if ("refusal" in item) { + return { text: item.refusal }; + } + + const url = item.image_url.url; + if (!url.startsWith("data:")) { + return { text: "[ Unsupported image URL ]" }; + } + + const [meta, data = ""] = url.split(",", 2); + const mimeType = meta.split(";")[0].replace("data:", ""); + return { inline_data: { mime_type: mimeType, data } }; + }); +} + +function getThinkingConfig(model: string, reasoningEffort?: string) { + if (model.startsWith("gemini-2.5")) { + switch (reasoningEffort) { + case "none": + case "minimal": + case "low": + return { thinkingBudget: 0 }; + default: + return undefined; + } + } + + switch (reasoningEffort) { + case "low": + case "minimal": + case "none": + return { thinkingLevel: "LOW" }; + case "medium": + case "high": + case "xhigh": + return { thinkingLevel: "HIGH" }; + default: + return undefined; + } +} + +export function isGoogleAIImageModel(model: string) { + return [ + "gemini-2.0-flash-preview-image-generation", + "gemini-2.5-flash-image", + "gemini-3-pro-image-preview", + ].includes(model); +} + +export function flattenGoogleAIContentParts( + parts: Array> | undefined +) { + return (parts ?? []) + .map((part) => { + if (typeof part?.text === "string") { + return part.text; + } + + const inlineData = part?.inline_data ?? part?.inlineData; + if (inlineData?.data) { + const mimeType = inlineData.mime_type ?? inlineData.mimeType ?? "image/png"; + return `![generated image](data:${mimeType};base64,${inlineData.data})`; + } + + return ""; + }) + .filter(Boolean) + .join("\n\n"); +} + +function getObjectField( + value: unknown, + key: string +): Record | undefined { + if ( + value && + typeof value === "object" && + !Array.isArray(value) && + key in value && + value[key as keyof typeof value] && + typeof value[key as keyof typeof value] === "object" && + !Array.isArray(value[key as keyof typeof value]) + ) { + return value[key as keyof typeof value] as Record; + } + return undefined; +} + +function getStringArrayField(value: unknown, key: string) { + if ( + value && + typeof value === "object" && + !Array.isArray(value) && + key in value && + Array.isArray(value[key as keyof typeof value]) + ) { + return (value[key as keyof typeof value] as unknown[]).filter( + (item): item is string => typeof item === "string" + ); + } + return undefined; +} diff --git a/src/shared/api-schemas/index.ts b/src/shared/api-schemas/index.ts index 598bf23..1f0da68 100644 --- a/src/shared/api-schemas/index.ts +++ b/src/shared/api-schemas/index.ts @@ -17,6 +17,7 @@ import { OpenAIV1ImagesGenerationSchema, transformOpenAIToOpenAIImage, } from "./openai-image"; +import { OpenAIResponsesCreateSchema } from "./openai-responses"; import { GoogleAIV1GenerateContentSchema, transformOpenAIToGoogleAI, @@ -24,13 +25,22 @@ import { import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai"; export { OpenAIChatMessage } from "./openai"; +export { + containsOpenAIResponsesImageInput, + flattenOpenAIResponsesInput, + flattenOpenAIResponsesOutput, +} from "./openai-responses"; export { AnthropicChatMessage, AnthropicV1TextSchema, AnthropicV1MessagesSchema, flattenAnthropicMessages, } from "./anthropic"; -export { GoogleAIChatMessage } from "./google-ai"; +export { + GoogleAIChatMessage, + flattenGoogleAIContentParts, + isGoogleAIImageModel, +} from "./google-ai"; export { MistralAIChatMessage } from "./mistral-ai"; type APIPair = `${APIFormat}->${APIFormat}`; @@ -55,6 +65,7 @@ export const API_REQUEST_VALIDATORS: Record> = { "anthropic-chat": AnthropicV1MessagesSchema, "anthropic-text": AnthropicV1TextSchema, openai: OpenAIV1ChatCompletionSchema, + "openai-responses": OpenAIResponsesCreateSchema, "openai-text": OpenAIV1TextCompletionSchema, "openai-image": OpenAIV1ImagesGenerationSchema, "google-ai": GoogleAIV1GenerateContentSchema, diff --git a/src/shared/api-schemas/mistral-ai.ts b/src/shared/api-schemas/mistral-ai.ts index d67f246..5d3ca0d 100644 --- a/src/shared/api-schemas/mistral-ai.ts +++ b/src/shared/api-schemas/mistral-ai.ts @@ -20,7 +20,7 @@ export const MistralAIV1ChatCompletionsSchema = z.object({ stream: z.boolean().optional().default(false), safe_prompt: z.boolean().optional().default(false), random_seed: z.number().int().optional(), -}); +}).passthrough(); export type MistralAIChatMessage = z.infer< typeof MistralAIV1ChatCompletionsSchema >["messages"][0]; diff --git a/src/shared/api-schemas/openai-image.ts b/src/shared/api-schemas/openai-image.ts index 7133362..12d0fbe 100644 --- a/src/shared/api-schemas/openai-image.ts +++ b/src/shared/api-schemas/openai-image.ts @@ -5,19 +5,34 @@ import { APIFormatTransformer } from "./index"; // https://platform.openai.com/docs/api-reference/images/create export const OpenAIV1ImagesGenerationSchema = z .object({ - prompt: z.string().max(4000), + prompt: z.string().max(32000), model: z.string().max(100).optional(), - quality: z.enum(["standard", "hd"]).optional().default("standard"), - n: z.number().int().min(1).max(4).optional().default(1), + quality: z + .enum(["auto", "low", "medium", "high", "standard", "hd"]) + .optional(), + n: z.number().int().min(1).max(10).optional().default(1), response_format: z.enum(["url", "b64_json"]).optional(), + output_format: z.string().optional(), + output_compression: z.number().int().min(0).max(100).optional(), size: z - .enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]) + .enum([ + "auto", + "256x256", + "512x512", + "1024x1024", + "1024x1536", + "1536x1024", + "1792x1024", + "1024x1792", + ]) .optional() .default("1024x1024"), style: z.enum(["vivid", "natural"]).optional().default("vivid"), + background: z.string().optional(), + moderation: z.string().optional(), user: z.string().max(500).optional(), }) - .strip(); + .passthrough(); // Takes the last chat message and uses it verbatim as the image prompt. export const transformOpenAIToOpenAIImage: APIFormatTransformer< @@ -57,12 +72,21 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer< } // TODO: Add some way to specify parameters via chat message - const transformed = { - model: body.model.includes("dall-e") ? body.model : "dall-e-3", - quality: "standard", + const requestedModel = String(body.model ?? ""); + const model = + requestedModel.includes("dall-e") || requestedModel.includes("gpt-image") + ? requestedModel + : "gpt-image-1.5"; + const transformed: Record = { + model, size: "1024x1024", - response_format: "url", prompt: prompt.slice(index! + 6).trim(), }; + + if (model.includes("dall-e")) { + transformed.quality = "standard"; + transformed.response_format = "url"; + } + return OpenAIV1ImagesGenerationSchema.parse(transformed); }; diff --git a/src/shared/api-schemas/openai-responses.ts b/src/shared/api-schemas/openai-responses.ts new file mode 100644 index 0000000..a2c239c --- /dev/null +++ b/src/shared/api-schemas/openai-responses.ts @@ -0,0 +1,136 @@ +import { z } from "zod"; +import { OPENAI_OUTPUT_MAX } from "./openai"; + +const OpenAIResponsesReasoningSchema = z + .object({ + effort: z.string().optional(), + summary: z.union([z.string(), z.array(z.string())]).optional(), + }) + .passthrough(); + +const OpenAIResponsesTextSchema = z + .object({ + format: z.any().optional(), + verbosity: z.enum(["low", "medium", "high"]).optional(), + }) + .passthrough(); + +export const OpenAIResponsesCreateSchema = z + .object({ + model: z.string().max(100), + input: z.union([z.string(), z.array(z.any())]).optional(), + instructions: z.union([z.string(), z.array(z.any())]).optional(), + previous_response_id: z.string().max(100).optional(), + stream: z.boolean().optional().default(false), + max_output_tokens: z.coerce + .number() + .int() + .nullish() + .default(OPENAI_OUTPUT_MAX) + .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)), + temperature: z.number().optional(), + top_p: z.number().optional(), + user: z.string().max(500).optional(), + safety_identifier: z.string().max(500).optional(), + metadata: z.record(z.any()).optional(), + tools: z.array(z.any()).optional(), + tool_choice: z.any().optional(), + parallel_tool_calls: z.boolean().optional(), + include: z.array(z.string()).optional(), + store: z.boolean().optional(), + background: z.boolean().optional(), + reasoning: OpenAIResponsesReasoningSchema.optional(), + text: OpenAIResponsesTextSchema.optional(), + }) + .passthrough(); + +export function flattenOpenAIResponsesInput(input: unknown): string { + return flattenResponseValue(input).trim(); +} + +export function flattenOpenAIResponsesOutput(body: Record): string { + if (typeof body.output_text === "string" && body.output_text.trim()) { + return body.output_text.trim(); + } + + return flattenResponseValue(body.output ?? body.output_text).trim(); +} + +export function containsOpenAIResponsesImageInput(input: unknown): boolean { + return containsImage(input); +} + +function flattenResponseValue(value: unknown): string { + if (value === null || value === undefined) return ""; + if (typeof value === "string") return value; + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + + if (Array.isArray(value)) { + return value + .map((item) => flattenResponseValue(item)) + .filter(Boolean) + .join("\n"); + } + + if (!isRecord(value)) return ""; + + const typed = value; + if (hasStringProp(typed, "text")) return typed.text; + if (hasStringProp(typed, "refusal")) return typed.refusal; + if (hasStringProp(typed, "summary")) return typed.summary; + if (hasStringProp(typed, "arguments")) return typed.arguments; + if (hasStringProp(typed, "result")) return typed.result; + + const type = String(typed.type ?? ""); + if (type.includes("image")) return "[ Uploaded Image Omitted ]"; + if (type.includes("file")) return "[ File Omitted ]"; + + if (typeof typed.role === "string" && typed.content !== undefined) { + const content = flattenResponseValue(typed.content); + return content ? `${typed.role}: ${content}` : typed.role; + } + + const nested = [ + typed.content, + typed.input, + typed.output, + typed.summary, + typed.results, + typed.item, + typed.items, + ]; + for (const candidate of nested) { + const flattened = flattenResponseValue(candidate); + if (flattened) return flattened; + } + + return ""; +} + +function containsImage(value: unknown): boolean { + if (value === null || value === undefined) return false; + if (Array.isArray(value)) return value.some((item) => containsImage(item)); + if (!isRecord(value)) return false; + + const typed = value; + const type = String(typed.type ?? ""); + if (type.includes("image")) return true; + if (typed.image_url || typed.image || typed.input_image || typed.inline_data) { + return true; + } + + return Object.values(typed).some((item) => containsImage(item)); +} + +function hasStringProp( + value: Record, + key: T +): value is Record & Record { + return typeof value[key] === "string"; +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} diff --git a/src/shared/api-schemas/openai.ts b/src/shared/api-schemas/openai.ts index 0f0e420..ff81a48 100644 --- a/src/shared/api-schemas/openai.ts +++ b/src/shared/api-schemas/openai.ts @@ -7,6 +7,7 @@ export const OPENAI_OUTPUT_MAX = config.maxOutputTokensOpenAI; const OpenAIV1ChatContentArraySchema = z.array( z.union([ z.object({ type: z.literal("text"), text: z.string() }), + z.object({ type: z.literal("refusal"), refusal: z.string() }), z.object({ type: z.union([z.literal("image"), z.literal("image_url")]), image_url: z.object({ @@ -21,7 +22,14 @@ export const OpenAIV1ChatCompletionSchema = z model: z.string().max(100), messages: z.array( z.object({ - role: z.enum(["system", "user", "assistant", "tool", "function"]), + role: z.enum([ + "system", + "developer", + "user", + "assistant", + "tool", + "function", + ]), content: z.union([z.string(), OpenAIV1ChatContentArraySchema]), name: z.string().optional(), tool_calls: z.array(z.any()).optional(), @@ -54,11 +62,20 @@ export const OpenAIV1ChatCompletionSchema = z .nullish() .default(Math.min(OPENAI_OUTPUT_MAX, 4096)) .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)), + max_completion_tokens: z.coerce + .number() + .int() + .nullish() + .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)), frequency_penalty: z.number().optional().default(0), presence_penalty: z.number().optional().default(0), logit_bias: z.any().optional(), + metadata: z.record(z.any()).optional(), user: z.string().max(500).optional(), + safety_identifier: z.string().max(500).optional(), seed: z.number().int().optional(), + prompt_cache_key: z.string().max(500).optional(), + prompt_cache_retention: z.string().optional(), // Be warned that Azure OpenAI combines these two into a single field. // It's the only deviation from the OpenAI API that I'm aware of so I have // special cased it in `addAzureKey` rather than expecting clients to do it. @@ -70,14 +87,23 @@ export const OpenAIV1ChatCompletionSchema = z functions: z.array(z.any()).optional(), tool_choice: z.any().optional(), function_choice: z.any().optional(), - response_format: z.any(), + response_format: z.any().optional(), + parallel_tool_calls: z.boolean().optional(), + reasoning_effort: z.string().optional(), + stream_options: z.any().optional(), + modalities: z.array(z.string()).optional(), + audio: z.any().optional(), + prediction: z.any().optional(), + web_search_options: z.any().optional(), + service_tier: z.string().optional(), + verbosity: z.enum(["low", "medium", "high"]).optional(), }) // Tool usage must be enabled via config because we currently have no way to // track quota usage for them or enforce limits. .omit( Boolean(config.allowOpenAIToolUsage) ? {} : { tools: true, functions: true } ) - .strip(); + .passthrough(); export type OpenAIChatMessage = z.infer< typeof OpenAIV1ChatCompletionSchema >["messages"][0]; @@ -89,6 +115,7 @@ export function flattenOpenAIMessageContent( ? content .map((contentItem) => { if ("text" in contentItem) return contentItem.text; + if ("refusal" in contentItem) return contentItem.refusal; if ("image_url" in contentItem) return "[ Uploaded Image Omitted ]"; }) .join("\n") @@ -107,7 +134,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) { let role: string = m.role; if (role === "assistant") { role = "Assistant"; - } else if (role === "system") { + } else if (role === "system" || role === "developer") { role = "System"; } else if (role === "user") { role = "User"; @@ -121,7 +148,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) { .map((m) => { // Claude without prefixes (except system) and no Assistant priming let role: string = ""; - if (role === "system") { + if (m.role === "system" || m.role === "developer") { role = "System: "; } return `\n\n${role}${flattenOpenAIMessageContent(m.content)}`; diff --git a/src/shared/key-management/aws/checker.ts b/src/shared/key-management/aws/checker.ts index de22c64..bb8fb87 100644 --- a/src/shared/key-management/aws/checker.ts +++ b/src/shared/key-management/aws/checker.ts @@ -54,10 +54,10 @@ export class AwsKeyChecker extends KeyCheckerBase { if (isInitialCheck) { checks = [ this.invokeModel("anthropic.claude-v2", key), - this.invokeModel("anthropic.claude-3-sonnet-20240229-v1:0", key), - this.invokeModel("anthropic.claude-3-haiku-20240307-v1:0", key), - this.invokeModel("anthropic.claude-3-opus-20240229-v1:0", key), - this.invokeModel("anthropic.claude-3-5-sonnet-20240620-v1:0", key), + this.invokeModel("anthropic.claude-sonnet-4-5-20250929-v1:0", key), + this.invokeModel("anthropic.claude-haiku-4-5-20251001-v1:0", key), + this.invokeModel("anthropic.claude-opus-4-1-20250805-v1:0", key), + this.invokeModel("anthropic.claude-3-5-haiku-20241022-v1:0", key), ]; } diff --git a/src/shared/key-management/azure/checker.ts b/src/shared/key-management/azure/checker.ts index 6beeb14..c4ec657 100644 --- a/src/shared/key-management/azure/checker.ts +++ b/src/shared/key-management/azure/checker.ts @@ -35,9 +35,15 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase { } protected async testKeyOrFail(key: AzureOpenAIKey) { - const model = await this.testModel(key); - this.log.info({ key: key.hash, deploymentModel: model }, "Checked key."); - this.updateKey(key.hash, { modelFamilies: [model] }); + const result = await this.testModel(key); + this.log.info( + { key: key.hash, deploymentModel: result.modelIds[0] ?? result.family }, + "Checked key." + ); + this.updateKey(key.hash, { + modelFamilies: [result.family], + modelIds: result.modelIds, + }); } protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) { @@ -107,7 +113,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase { this.updateKey(key.hash, { lastChecked: next }); } - private async testModel(key: AzureOpenAIKey) { + private async testModel(key: AzureOpenAIKey): Promise<{ + family: ReturnType; + modelIds: string[]; + }> { const { apiKey, deploymentId, resourceName } = AzureOpenAIKeyChecker.getCredentialsFromKey(key); const url = POST_CHAT_COMPLETIONS(resourceName, deploymentId); @@ -126,7 +135,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase { // we try to invoke /chat/completions on dall-e-3. This is expected and // indicates a DALL-E deployment. if (response.status === 400) { - if (data.error.code === "OperationNotSupported") return "azure-dall-e"; + if (data.error.code === "OperationNotSupported") { + return { + family: "azure-dall-e", + modelIds: ["dall-e-3", "gpt-image-1", "gpt-image-1-mini", "gpt-image-1.5"], + }; + } throw new AxiosError( `Unexpected error when testing deployment ${deploymentId}`, "AZURE_TEST_ERROR", @@ -137,11 +151,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase { } const family = getAzureOpenAIModelFamily(data.model); + const normalizedModel = normalizeAzureModelId(data.model); // Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks. // Otherwise we can use the model family Azure returned. if (family !== "azure-gpt4") { - return family; + return { family, modelIds: [normalizedModel] }; } // Try to send an oversized prompt. GPT-4 Turbo can handle this but regular @@ -160,8 +175,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase { const code = contextTest.error?.code; this.log.debug({ code, status }, "Performed Azure GPT4 context size test."); - if (code === "context_length_exceeded") return "azure-gpt4"; - return "azure-gpt4-turbo"; + if (code === "context_length_exceeded") { + return { family: "azure-gpt4", modelIds: ["gpt-4"] }; + } + return { family: "azure-gpt4-turbo", modelIds: ["gpt-4-turbo"] }; } static errorIsAzureError(error: AxiosError): error is AxiosError { @@ -179,3 +196,7 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase { return { resourceName, deploymentId, apiKey }; } } + +function normalizeAzureModelId(model: string) { + return model.replace("gpt-35-turbo", "gpt-3.5-turbo"); +} diff --git a/src/shared/key-management/azure/provider.ts b/src/shared/key-management/azure/provider.ts index 681a6ed..abe9a65 100644 --- a/src/shared/key-management/azure/provider.ts +++ b/src/shared/key-management/azure/provider.ts @@ -14,6 +14,8 @@ type AzureOpenAIKeyUsage = { export interface AzureOpenAIKey extends Key, AzureOpenAIKeyUsage { readonly service: "azure"; readonly modelFamilies: AzureOpenAIModelFamily[]; + /** Exact model IDs or deployment aliases known to be backed by this key. */ + modelIds: string[]; /** The time at which this key was last rate limited. */ rateLimitedAt: number; /** The time until which this key is rate limited. */ @@ -62,6 +64,7 @@ export class AzureOpenAIKeyProvider implements KeyProvider { rateLimitedAt: 0, rateLimitedUntil: 0, contentFiltering: false, + modelIds: [], hash: `azu-${crypto .createHash("sha256") .update(key) @@ -73,6 +76,8 @@ export class AzureOpenAIKeyProvider implements KeyProvider { "azure-gpt4-32kTokens": 0, "azure-gpt4-turboTokens": 0, "azure-gpt4oTokens": 0, + "azure-gpt5Tokens": 0, + "azure-o-seriesTokens": 0, "azure-dall-eTokens": 0, }; this.keys.push(newKey); @@ -96,8 +101,14 @@ export class AzureOpenAIKeyProvider implements KeyProvider { public get(model: string) { const neededFamily = getAzureOpenAIModelFamily(model); + const normalizedModel = model + .replace(/^azure-/, "") + .replace("gpt-35-turbo", "gpt-3.5-turbo"); const availableKeys = this.keys.filter( - (k) => !k.isDisabled && k.modelFamilies.includes(neededFamily) + (k) => + !k.isDisabled && + k.modelFamilies.includes(neededFamily) && + (!k.modelIds.length || k.modelIds.includes(normalizedModel)) ); if (availableKeys.length === 0) { throw new PaymentRequiredError( diff --git a/src/shared/key-management/gcp/checker.ts b/src/shared/key-management/gcp/checker.ts index 5995065..c5e8f14 100644 --- a/src/shared/key-management/gcp/checker.ts +++ b/src/shared/key-management/gcp/checker.ts @@ -32,10 +32,10 @@ export class GcpKeyChecker extends KeyCheckerBase { const isInitialCheck = !key.lastChecked; if (isInitialCheck) { checks = [ - this.invokeModel("claude-3-haiku@20240307", key, true), - this.invokeModel("claude-3-sonnet@20240229", key, true), - this.invokeModel("claude-3-opus@20240229", key, true), - this.invokeModel("claude-3-5-sonnet@20240620", key, true), + this.invokeModel("claude-haiku-4-5@20251001", key, true), + this.invokeModel("claude-sonnet-4-5@20250929", key, true), + this.invokeModel("claude-opus-4-1@20250805", key, true), + this.invokeModel("claude-3-5-haiku@20241022", key, true), ]; const [sonnet, haiku, opus, sonnet35] = @@ -66,13 +66,13 @@ export class GcpKeyChecker extends KeyCheckerBase { }); } else { if (key.haikuEnabled) { - await this.invokeModel("claude-3-haiku@20240307", key, false) + await this.invokeModel("claude-haiku-4-5@20251001", key, false) } else if (key.sonnetEnabled) { - await this.invokeModel("claude-3-sonnet@20240229", key, false) + await this.invokeModel("claude-sonnet-4-5@20250929", key, false) } else if (key.sonnet35Enabled) { - await this.invokeModel("claude-3-5-sonnet@20240620", key, false) + await this.invokeModel("claude-3-5-haiku@20241022", key, false) } else { - await this.invokeModel("claude-3-opus@20240229", key, false) + await this.invokeModel("claude-opus-4-1@20250805", key, false) } this.updateKey(key.hash, { lastChecked: Date.now() }); diff --git a/src/shared/key-management/index.ts b/src/shared/key-management/index.ts index 67dfad4..16e20b8 100644 --- a/src/shared/key-management/index.ts +++ b/src/shared/key-management/index.ts @@ -4,6 +4,7 @@ import { KeyPool } from "./key-pool"; /** The request and response format used by a model's API. */ export type APIFormat = | "openai" + | "openai-responses" | "openai-text" | "openai-image" | "anthropic-chat" // Anthropic's newer messages array format diff --git a/src/shared/key-management/openai/checker.ts b/src/shared/key-management/openai/checker.ts index 481d0c4..b5cef57 100644 --- a/src/shared/key-management/openai/checker.ts +++ b/src/shared/key-management/openai/checker.ts @@ -111,7 +111,10 @@ export class OpenAIKeyChecker extends KeyCheckerBase { const familiesArray = [...families]; const keyFromPool = this.keys.find((k) => k.hash === key.hash)!; this.updateKey(key.hash, { - modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)), + modelIds: models, + modelSnapshots: models.filter((m) => + m.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/) + ), modelFamilies: familiesArray, lastChecked: keyFromPool.lastChecked, }); diff --git a/src/shared/key-management/openai/provider.ts b/src/shared/key-management/openai/provider.ts index 809262c..4f0e8e6 100644 --- a/src/shared/key-management/openai/provider.ts +++ b/src/shared/key-management/openai/provider.ts @@ -16,6 +16,8 @@ type OpenAIKeyUsage = { export interface OpenAIKey extends Key, OpenAIKeyUsage { readonly service: "openai"; modelFamilies: OpenAIModelFamily[]; + /** Exact model IDs reported by the models API for this key. */ + modelIds: string[]; /** * Some keys are assigned to multiple organizations, each with their own quota * limits. We clone the key for each organization and track usage/disabled @@ -97,6 +99,8 @@ export class OpenAIKeyProvider implements KeyProvider { "gpt4" as const, "gpt4-turbo" as const, "gpt4o" as const, + "gpt5" as const, + "o-series" as const, ], isTrial: false, isDisabled: false, @@ -118,8 +122,11 @@ export class OpenAIKeyProvider implements KeyProvider { "gpt4-32kTokens": 0, "gpt4-turboTokens": 0, gpt4oTokens: 0, + gpt5Tokens: 0, + "o-seriesTokens": 0, "dall-eTokens": 0, gpt4Rpm: 0, + modelIds: [], modelSnapshots: [], }; this.keys.push(newKey); @@ -160,8 +167,10 @@ export class OpenAIKeyProvider implements KeyProvider { if (model === "gpt-4-32k") model = "gpt-4-32k-0613"; const neededFamily = getOpenAIModelFamily(model); - const excludeTrials = model === "text-embedding-ada-002"; - const needsSnapshot = model.match(/-\d{4}(-preview)?$/); + const excludeTrials = /^text-embedding-(?:3-small|3-large|ada-002)$/.test( + model + ); + const needsSnapshot = model.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/); const availableKeys = this.keys.filter( // Allow keys which @@ -169,6 +178,7 @@ export class OpenAIKeyProvider implements KeyProvider { !key.isDisabled && // are not disabled key.modelFamilies.includes(neededFamily) && // have access to the model family we need (!excludeTrials || !key.isTrial) && // and are not trials if we don't want them + (!key.modelIds.length || key.modelIds.includes(model)) && // and have the requested model if exact inventory is available (!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need ); diff --git a/src/shared/models.ts b/src/shared/models.ts index b2322fb..3587e07 100644 --- a/src/shared/models.ts +++ b/src/shared/models.ts @@ -23,6 +23,8 @@ export type OpenAIModelFamily = | "gpt4-32k" | "gpt4-turbo" | "gpt4o" + | "gpt5" + | "o-series" | "dall-e"; export type AnthropicModelFamily = "claude" | "claude-opus"; export type GoogleAIModelFamily = "gemini-pro"; @@ -51,6 +53,8 @@ export const MODEL_FAMILIES = (( "gpt4-32k", "gpt4-turbo", "gpt4o", + "gpt5", + "o-series", "dall-e", "claude", "claude-opus", @@ -68,6 +72,8 @@ export const MODEL_FAMILIES = (( "azure-gpt4-32k", "azure-gpt4-turbo", "azure-gpt4o", + "azure-gpt5", + "azure-o-series", "azure-dall-e", ] as const); @@ -84,6 +90,10 @@ export const LLM_SERVICES = (( ] as const); export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = { + "^gpt-5(\\.\\d+)?([-.].+)?$": "gpt5", + "^o\\d([-.].+)?$": "o-series", + "^computer-use-preview$": "o-series", + "^gpt-4\\.1([-.].+)?$": "gpt4o", "^gpt-4o": "gpt4o", "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo", "^gpt-4-turbo(-preview)?$": "gpt4-turbo", @@ -94,7 +104,8 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = { "^gpt-4-\\d{4}$": "gpt4", "^gpt-4$": "gpt4", "^gpt-3.5-turbo": "turbo", - "^text-embedding-ada-002$": "turbo", + "^text-embedding-(ada-002|3-small|3-large)$": "turbo", + "^gpt-image-1([-.].+)?$": "dall-e", "^dall-e-\\d{1}$": "dall-e", }; @@ -106,6 +117,8 @@ export const MODEL_FAMILY_SERVICE: { "gpt4-turbo": "openai", "gpt4-32k": "openai", "gpt4o": "openai", + gpt5: "openai", + "o-series": "openai", "dall-e": "openai", claude: "anthropic", "claude-opus": "anthropic", @@ -118,6 +131,8 @@ export const MODEL_FAMILY_SERVICE: { "azure-gpt4-32k": "azure", "azure-gpt4-turbo": "azure", "azure-gpt4o": "azure", + "azure-gpt5": "azure", + "azure-o-series": "azure", "azure-dall-e": "azure", "gemini-pro": "google-ai", "mistral-tiny": "mistral-ai", @@ -150,7 +165,10 @@ export function getGoogleAIModelFamily(_model: string): ModelFamily { } export function getMistralAIModelFamily(model: string): MistralAIModelFamily { - const prunedModel = model.replace(/-(latest|\d{4})$/, ""); + const prunedModel = model.replace( + /-(latest|\d{4}|\d{6}|\d+\.\d+|v\d+(:\d+)?)$/, + "" + ); switch (prunedModel) { case "mistral-tiny": case "mistral-small": @@ -161,7 +179,34 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily { return "mistral-tiny"; case "open-mixtral-8x7b": return "mistral-small"; + case "ministral-3b": + case "ministral-8b": + case "mistral-small-3.1": + case "mistral-small-3.2": + return "mistral-small"; + case "magistral-medium": + return "mistral-medium"; + case "codestral": + case "devstral": + case "mistral-large-2": + case "mistral-large-3": + case "pixtral-large": + return "mistral-large"; default: + if (model.startsWith("mistral-small") || model.startsWith("ministral")) { + return "mistral-small"; + } + if (model.startsWith("mistral-medium") || model.startsWith("magistral")) { + return "mistral-medium"; + } + if ( + model.startsWith("mistral-large") || + model.startsWith("pixtral-large") || + model.startsWith("codestral") || + model.startsWith("devstral") + ) { + return "mistral-large"; + } return "mistral-tiny"; } } @@ -225,6 +270,7 @@ export function getModelFamilyForRequest(req: Request): ModelFamily { modelFamily = getClaudeModelFamily(model); break; case "openai": + case "openai-responses": case "openai-text": case "openai-image": modelFamily = getOpenAIModelFamily(model); diff --git a/src/shared/stats.ts b/src/shared/stats.ts index f631397..5f7cfe5 100644 --- a/src/shared/stats.ts +++ b/src/shared/stats.ts @@ -10,6 +10,14 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) { case "azure-gpt4o": cost = 0.000005; break; + case "gpt5": + case "azure-gpt5": + cost = 0.00001; + break; + case "o-series": + case "azure-o-series": + cost = 0.000012; + break; case "azure-gpt4-turbo": case "gpt4-turbo": cost = 0.00001; diff --git a/src/shared/tokenization/claude.ts b/src/shared/tokenization/claude.ts index 880db82..85d0781 100644 --- a/src/shared/tokenization/claude.ts +++ b/src/shared/tokenization/claude.ts @@ -65,7 +65,14 @@ async function getTokenCountForMessages({ numTokens += encoder.encode(text.normalize("NFKC"), "all").length; break; case "image": - numTokens += await getImageTokenCount(part.source.data); + if (part.source.type === "base64") { + numTokens += await getImageTokenCount(part.source.data); + } else { + // Remote image URLs are already hosted elsewhere, so we cannot + // inspect dimensions locally. Charge the documented worst-case + // token cost instead of undercounting them as zero. + numTokens += 1600; + } break; default: throw new Error(`Unsupported Anthropic content type.`); diff --git a/src/shared/tokenization/openai.ts b/src/shared/tokenization/openai.ts index 2c16c85..552e359 100644 --- a/src/shared/tokenization/openai.ts +++ b/src/shared/tokenization/openai.ts @@ -179,16 +179,33 @@ export const DALLE_TOKENS_PER_DOLLAR = 100000; * which we convert to tokens at a rate of 100000 tokens per dollar. */ export function getOpenAIImageCost(params: { - model: "dall-e-2" | "dall-e-3"; - quality: "standard" | "hd"; - resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024"; + model: + | "dall-e-2" + | "dall-e-3" + | "gpt-image-1" + | "gpt-image-1-mini" + | "gpt-image-1.5"; + quality: "auto" | "low" | "medium" | "high" | "standard" | "hd"; + resolution: + | "auto" + | "512x512" + | "256x256" + | "1024x1024" + | "1024x1536" + | "1536x1024" + | "1024x1792" + | "1792x1024"; n: number | null; }) { const { model, quality, resolution, n } = params; + const normalizedResolution = + resolution === "auto" ? "1024x1024" : resolution; + const normalizedQuality = + quality === "hd" || quality === "high" ? "hd" : "standard"; const usd = (() => { switch (model) { case "dall-e-2": - switch (resolution) { + switch (normalizedResolution) { case "512x512": return 0.018; case "256x256": @@ -199,12 +216,20 @@ export function getOpenAIImageCost(params: { throw new Error("Invalid resolution"); } case "dall-e-3": - switch (resolution) { + case "gpt-image-1.5": + case "gpt-image-1": + case "gpt-image-1-mini": + // GPT Image models have newer parameter ranges, but we still account + // for them using the existing DALL-E 3-style price buckets so the + // proxy can continue to enforce rough quota/cost limits. + switch (normalizedResolution) { case "1024x1024": - return quality === "standard" ? 0.04 : 0.08; + return normalizedQuality === "standard" ? 0.04 : 0.08; + case "1024x1536": + case "1536x1024": case "1024x1792": case "1792x1024": - return quality === "standard" ? 0.08 : 0.12; + return normalizedQuality === "standard" ? 0.08 : 0.12; default: throw new Error("Invalid resolution"); } @@ -233,7 +258,10 @@ export function estimateGoogleAITokenCount( let numTokens = 0; for (const message of prompt) { numTokens += tokensPerMessage; - numTokens += encoder.encode(message.parts[0].text).length; + const text = message.parts + .map((part) => ("text" in part ? part.text : "")) + .join("\n"); + numTokens += encoder.encode(text).length; } numTokens += 3; diff --git a/src/shared/tokenization/tokenizer.ts b/src/shared/tokenization/tokenizer.ts index 1b03f3d..4a145b4 100644 --- a/src/shared/tokenization/tokenizer.ts +++ b/src/shared/tokenization/tokenizer.ts @@ -55,7 +55,7 @@ type MistralAIChatTokenCountRequest = { type FlatPromptTokenCountRequest = { prompt: string; completion?: never; - service: "openai-text" | "anthropic-text" | "google-ai"; + service: "openai-text" | "openai-responses" | "anthropic-text" | "google-ai"; }; type StringCompletionTokenCountRequest = { @@ -105,6 +105,7 @@ export async function countTokens({ tokenization_duration_ms: getElapsedMs(time), }; case "openai": + case "openai-responses": case "openai-text": return { ...(await getOpenAITokenCount(prompt ?? completion, req.body.model)), diff --git a/src/shared/users/user-store.ts b/src/shared/users/user-store.ts index add0f94..272bc66 100644 --- a/src/shared/users/user-store.ts +++ b/src/shared/users/user-store.ts @@ -400,6 +400,7 @@ function getModelFamilyForQuotaUsage( switch (api) { case "openai": + case "openai-responses": case "openai-text": case "openai-image": return getOpenAIModelFamily(model);