Update proxy for modern model APIs

This commit is contained in:
Your Name
2026-04-06 03:59:37 -07:00
parent 824adfbbb2
commit 8662eadea7
48 changed files with 1294 additions and 214 deletions
+6 -2
View File
@@ -40,11 +40,11 @@ NODE_ENV=production
# Which model types users are allowed to access. # Which model types users are allowed to access.
# The following model families are recognized: # The following model families are recognized:
# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-dall-e # turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | gpt5 | o-series | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-gpt5 | azure-o-series | azure-dall-e
# By default, all models are allowed except for 'dall-e' / 'azure-dall-e'. # By default, all models are allowed except for 'dall-e' / 'azure-dall-e'.
# To allow DALL-E image generation, uncomment the line below and add 'dall-e' or # To allow DALL-E image generation, uncomment the line below and add 'dall-e' or
# 'azure-dall-e' to the list of allowed model families. # 'azure-dall-e' to the list of allowed model families.
# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o # ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,gpt5,o-series,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o,azure-gpt5,azure-o-series
# Which services can be used to process prompts containing images via multimodal # Which services can be used to process prompts containing images via multimodal
# models. The following services are recognized: # models. The following services are recognized:
@@ -115,10 +115,14 @@ NODE_ENV=production
# TOKEN_QUOTA_GPT4=0 # TOKEN_QUOTA_GPT4=0
# TOKEN_QUOTA_GPT4_32K=0 # TOKEN_QUOTA_GPT4_32K=0
# TOKEN_QUOTA_GPT4_TURBO=0 # TOKEN_QUOTA_GPT4_TURBO=0
# TOKEN_QUOTA_GPT5=0
# TOKEN_QUOTA_O_SERIES=0
# TOKEN_QUOTA_CLAUDE=0 # TOKEN_QUOTA_CLAUDE=0
# TOKEN_QUOTA_GEMINI_PRO=0 # TOKEN_QUOTA_GEMINI_PRO=0
# TOKEN_QUOTA_AWS_CLAUDE=0 # TOKEN_QUOTA_AWS_CLAUDE=0
# TOKEN_QUOTA_GCP_CLAUDE=0 # TOKEN_QUOTA_GCP_CLAUDE=0
# TOKEN_QUOTA_AZURE_GPT5=0
# TOKEN_QUOTA_AZURE_O_SERIES=0
# "Tokens" for image-generation models are counted at a rate of 100000 tokens # "Tokens" for image-generation models are counted at a rate of 100000 tokens
# per US$1.00 generated, which is similar to the cost of GPT-4 Turbo. # per US$1.00 generated, which is similar to the cost of GPT-4 Turbo.
# DALL-E 3 costs around US$0.10 per image (10000 tokens). # DALL-E 3 costs around US$0.10 per image (10000 tokens).
+8 -3
View File
@@ -45,11 +45,16 @@ You can also request Claude Instant, but support for this isn't fully implemente
### Supported model IDs ### Supported model IDs
Users can send these model IDs to the proxy to invoke the corresponding models. Users can send these model IDs to the proxy to invoke the corresponding models.
- **Claude** - **Claude**
- `anthropic.claude-v1` (~18k context, claude 1.3 -- EOL 2024-02-28)
- `anthropic.claude-v2` (~100k context, claude 2.0) - `anthropic.claude-v2` (~100k context, claude 2.0)
- `anthropic.claude-v2:1` (~200k context, claude 2.1) - `anthropic.claude-v2:1` (~200k context, claude 2.1)
- **Claude Instant** - `anthropic.claude-haiku-4-5-20251001-v1:0`
- `anthropic.claude-instant-v1` (~100k context, claude instant 1.2) - `anthropic.claude-sonnet-4-5-20250929-v1:0`
- `anthropic.claude-opus-4-1-20250805-v1:0`
- `anthropic.claude-3-5-haiku-20241022-v1:0`
- `anthropic.claude-sonnet-4-20250514-v1:0`
- `anthropic.claude-opus-4-20250514-v1:0`
For OpenAI-compatible callers, the proxy will also remap newer Claude-style names such as `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, `claude-opus-4-1-20250805`, and `claude-3-5-haiku-20241022` to the corresponding Bedrock model IDs.
## Note regarding logging ## Note regarding logging
+3 -1
View File
@@ -20,7 +20,9 @@ AZURE_CREDENTIALS=contoso-ml:gpt4-8k:0123456789abcdef0123456789abcdef,northwind-
Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model. Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model.
### Supported model IDs ### Supported model IDs
Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. For the most part they work the same with Azure. GPT-3.5 Turbo has an ID of "gpt-35-turbo" because Azure doesn't allow periods in model names, but the proxy should automatically convert this to the correct ID. Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. The proxy now understands newer Azure-backed OpenAI model families such as GPT-4o, GPT-4.1, GPT-5 / GPT-5.2, o-series reasoning models, and GPT Image deployments including `gpt-image-1.5`, plus the newer Responses API route at `/proxy/azure/openai/v1/responses`.
GPT-3.5 Turbo still has an Azure-specific ID of `gpt-35-turbo` because Azure doesn't allow periods in model names, but the proxy will automatically normalize that for you.
As noted above, you can only use model IDs for which a deployment has been created and added to the proxy. As noted above, you can only use model IDs for which a deployment has been created and added to the proxy.
+8 -4
View File
@@ -29,7 +29,11 @@ GCP_CREDENTIALS=my-first-project:xxx@yyy.com:us-east5:-----BEGIN PRIVATE KEY----
## Supported model IDs ## Supported model IDs
Users can send these model IDs to the proxy to invoke the corresponding models. Users can send these model IDs to the proxy to invoke the corresponding models.
- **Claude** - **Claude**
- `claude-3-haiku@20240307` - `claude-haiku-4-5@20251001`
- `claude-3-sonnet@20240229` - `claude-sonnet-4-5@20250929`
- `claude-3-opus@20240229` - `claude-opus-4-1@20250805`
- `claude-3-5-sonnet@20240620` - `claude-3-5-haiku@20241022`
- `claude-sonnet-4@20250514`
- `claude-opus-4@20250514`
For OpenAI-compatible callers, the proxy will also remap Claude-style aliases like `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, and `claude-opus-4-1-20250805` to the corresponding Vertex AI model IDs.
+51 -7
View File
@@ -7,8 +7,8 @@ Authorization: Bearer {{oai-key-1}}
Content-Type: application/json Content-Type: application/json
{ {
"model": "gpt-3.5-turbo", "model": "gpt-4.1-mini",
"max_tokens": 30, "max_completion_tokens": 30,
"stream": false, "stream": false,
"messages": [ "messages": [
{ {
@@ -18,6 +18,19 @@ Content-Type: application/json
] ]
} }
###
# @name OpenAI -- Responses API
POST https://api.openai.com/v1/responses
Authorization: Bearer {{oai-key-1}}
Content-Type: application/json
{
"model": "gpt-5.2",
"reasoning": { "effort": "medium" },
"max_output_tokens": 80,
"input": "This is a test prompt."
}
### ###
# @name OpenAI -- Text Completions # @name OpenAI -- Text Completions
POST https://api.openai.com/v1/completions POST https://api.openai.com/v1/completions
@@ -38,7 +51,7 @@ Authorization: Bearer {{oai-key-1}}
Content-Type: application/json Content-Type: application/json
{ {
"model": "text-embedding-ada-002", "model": "text-embedding-3-small",
"input": "This is a test embedding input." "input": "This is a test embedding input."
} }
@@ -81,8 +94,8 @@ Authorization: Bearer {{proxy-key}}
Content-Type: application/json Content-Type: application/json
{ {
"model": "gpt-4-1106-preview", "model": "gpt-4.1",
"max_tokens": 20, "max_completion_tokens": 20,
"stream": true, "stream": true,
"temperature": 1, "temperature": 1,
"seed": 123, "seed": 123,
@@ -94,6 +107,20 @@ Content-Type: application/json
] ]
} }
###
# @name Proxy / OpenAI -- Native Responses API
POST {{proxy-host}}/proxy/openai/v1/responses
Authorization: Bearer {{proxy-key}}
Content-Type: application/json
{
"model": "gpt-5.2",
"reasoning": { "effort": "medium" },
"max_output_tokens": 64,
"stream": false,
"input": "Summarize the purpose of this reverse proxy in one sentence."
}
### ###
# @name Proxy / OpenAI -- Native Text Completions # @name Proxy / OpenAI -- Native Text Completions
POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions
@@ -142,7 +169,7 @@ Authorization: Bearer {{proxy-key}}
Content-Type: application/json Content-Type: application/json
{ {
"model": "text-embedding-ada-002", "model": "text-embedding-3-small",
"input": "This is a test embedding input." "input": "This is a test embedding input."
} }
@@ -185,7 +212,7 @@ Authorization: Bearer {{proxy-key}}
Content-Type: application/json Content-Type: application/json
{ {
"model": "gpt-3.5-turbo", "model": "gpt-5.2",
"max_tokens": 20, "max_tokens": 20,
"stream": false, "stream": false,
"temperature": 0, "temperature": 0,
@@ -197,6 +224,23 @@ Content-Type: application/json
] ]
} }
###
# @name Proxy / Google AI -- OpenAI-Compat Image Generation
POST {{proxy-host}}/proxy/google-ai/v1/chat/completions
Authorization: Bearer {{proxy-key}}
Content-Type: application/json
{
"model": "gemini-2.5-flash-image",
"stream": false,
"messages": [
{
"role": "user",
"content": "Generate a flat vector-style illustration of a red fox reading a newspaper at a cafe table."
}
]
}
### ###
# @name Proxy / AWS Claude -- Native Completion # @name Proxy / AWS Claude -- Native Completion
POST {{proxy-host}}/proxy/aws/claude/v1/complete POST {{proxy-host}}/proxy/aws/claude/v1/complete
+4
View File
@@ -434,6 +434,8 @@ export const config: Config = {
"gpt4-32k", "gpt4-32k",
"gpt4-turbo", "gpt4-turbo",
"gpt4o", "gpt4o",
"gpt5",
"o-series",
"claude", "claude",
"claude-opus", "claude-opus",
"gemini-pro", "gemini-pro",
@@ -450,6 +452,8 @@ export const config: Config = {
"azure-gpt4-32k", "azure-gpt4-32k",
"azure-gpt4-turbo", "azure-gpt4-turbo",
"azure-gpt4o", "azure-gpt4o",
"azure-gpt5",
"azure-o-series",
]), ]),
rejectPhrases: parseCsv(getEnvWithDefault("REJECT_PHRASES", "")), rejectPhrases: parseCsv(getEnvWithDefault("REJECT_PHRASES", "")),
rejectMessage: getEnvWithDefault( rejectMessage: getEnvWithDefault(
+4
View File
@@ -17,6 +17,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
"gpt4-32k": "GPT-4 32k", "gpt4-32k": "GPT-4 32k",
"gpt4-turbo": "GPT-4 Turbo", "gpt4-turbo": "GPT-4 Turbo",
gpt4o: "GPT-4o", gpt4o: "GPT-4o",
gpt5: "GPT-5",
"o-series": "o-Series",
"dall-e": "DALL-E", "dall-e": "DALL-E",
claude: "Claude (Sonnet)", claude: "Claude (Sonnet)",
"claude-opus": "Claude (Opus)", "claude-opus": "Claude (Opus)",
@@ -34,6 +36,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
"azure-gpt4-32k": "Azure GPT-4 32k", "azure-gpt4-32k": "Azure GPT-4 32k",
"azure-gpt4-turbo": "Azure GPT-4 Turbo", "azure-gpt4-turbo": "Azure GPT-4 Turbo",
"azure-gpt4o": "Azure GPT-4o", "azure-gpt4o": "Azure GPT-4o",
"azure-gpt5": "Azure GPT-5",
"azure-o-series": "Azure o-Series",
"azure-dall-e": "Azure DALL-E", "azure-dall-e": "Azure DALL-E",
}; };
+28 -21
View File
@@ -29,24 +29,18 @@ const getModelsResponse = () => {
if (!config.anthropicKey) return { object: "list", data: [] }; if (!config.anthropicKey) return { object: "list", data: [] };
const claudeVariants = [ const claudeVariants = [
"claude-v1",
"claude-v1-100k",
"claude-instant-v1",
"claude-instant-v1-100k",
"claude-v1.3",
"claude-v1.3-100k",
"claude-v1.2",
"claude-v1.0",
"claude-instant-v1.1",
"claude-instant-v1.1-100k",
"claude-instant-v1.0",
"claude-2",
"claude-2.0", "claude-2.0",
"claude-2.1", "claude-2.1",
"claude-3-haiku-20240307", "claude-sonnet-4-5",
"claude-3-opus-20240229", "claude-sonnet-4-5-20250929",
"claude-3-sonnet-20240229", "claude-haiku-4-5",
"claude-3-5-sonnet-20240620" "claude-haiku-4-5-20251001",
"claude-opus-4-1",
"claude-opus-4-1-20250805",
"claude-opus-4-20250514",
"claude-sonnet-4-20250514",
"claude-3-5-haiku-20241022",
"claude-3-5-haiku-latest",
]; ];
const models = claudeVariants.map((id) => ({ const models = claudeVariants.map((id) => ({
@@ -230,7 +224,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware({
* (claude-3 based models do not support the old text completion endpoint). * (claude-3 based models do not support the old text completion endpoint).
*/ */
const preprocessAnthropicTextRequest: RequestHandler = (req, res, next) => { const preprocessAnthropicTextRequest: RequestHandler = (req, res, next) => {
if (req.body.model?.startsWith("claude-3")) { if (requiresAnthropicMessagesApi(req.body.model)) {
textToChatPreprocessor(req, res, next); textToChatPreprocessor(req, res, next);
} else { } else {
nativeTextPreprocessor(req, res, next); nativeTextPreprocessor(req, res, next);
@@ -255,7 +249,7 @@ const oaiToChatPreprocessor = createPreprocessorMiddleware({
*/ */
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => { const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
maybeReassignModel(req); maybeReassignModel(req);
if (req.body.model?.includes("claude-3")) { if (requiresAnthropicMessagesApi(req.body.model)) {
oaiToChatPreprocessor(req, res, next); oaiToChatPreprocessor(req, res, next);
} else { } else {
oaiToTextPreprocessor(req, res, next); oaiToTextPreprocessor(req, res, next);
@@ -315,7 +309,8 @@ function handleAnthropicTextCompatRequest(
const type = req.params.type; const type = req.params.type;
const action = req.params.action; const action = req.params.action;
const alreadyInChatFormat = Boolean(req.body.messages); const alreadyInChatFormat = Boolean(req.body.messages);
const compatModel = `claude-3-${type}-20240229`; const compatModel =
type === "opus" ? "claude-opus-4-1-20250805" : "claude-sonnet-4-5-20250929";
req.log.info( req.log.info(
{ type, inputModel: req.body.model, compatModel, alreadyInChatFormat }, { type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
"Handling Anthropic compatibility request" "Handling Anthropic compatibility request"
@@ -349,8 +344,20 @@ function handleAnthropicTextCompatRequest(
*/ */
function maybeReassignModel(req: Request) { function maybeReassignModel(req: Request) {
const model = req.body.model; const model = req.body.model;
if (!model.startsWith("gpt-")) return; const lower = String(model).toLowerCase();
req.body.model = "claude-3-sonnet-20240229"; if (
lower.startsWith("gpt-") ||
lower.startsWith("o1") ||
lower.startsWith("o3") ||
lower.startsWith("o4") ||
lower === "computer-use-preview"
) {
req.body.model = "claude-sonnet-4-5-20250929";
}
}
function requiresAnthropicMessagesApi(model?: string) {
return /^claude-(?:3|sonnet|opus)/.test(model ?? "");
} }
export const anthropic = anthropicRouter; export const anthropic = anthropicRouter;
+80 -13
View File
@@ -20,6 +20,12 @@ import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatRe
import { sendErrorToClient } from "./middleware/response/error-generator"; import { sendErrorToClient } from "./middleware/response/error-generator";
const LATEST_AWS_V2_MINOR_VERSION = "1"; const LATEST_AWS_V2_MINOR_VERSION = "1";
const AWS_CLAUDE_SONNET_45 = "anthropic.claude-sonnet-4-5-20250929-v1:0";
const AWS_CLAUDE_HAIKU_45 = "anthropic.claude-haiku-4-5-20251001-v1:0";
const AWS_CLAUDE_OPUS_41 = "anthropic.claude-opus-4-1-20250805-v1:0";
const AWS_CLAUDE_SONNET_4 = "anthropic.claude-sonnet-4-20250514-v1:0";
const AWS_CLAUDE_OPUS_4 = "anthropic.claude-opus-4-20250514-v1:0";
const AWS_CLAUDE_35_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0";
let modelsCache: any = null; let modelsCache: any = null;
let modelsCacheTime = 0; let modelsCacheTime = 0;
@@ -35,10 +41,12 @@ const getModelsResponse = () => {
const variants = [ const variants = [
"anthropic.claude-v2", "anthropic.claude-v2",
"anthropic.claude-v2:1", "anthropic.claude-v2:1",
"anthropic.claude-3-haiku-20240307-v1:0", AWS_CLAUDE_HAIKU_45,
"anthropic.claude-3-sonnet-20240229-v1:0", AWS_CLAUDE_SONNET_45,
"anthropic.claude-3-5-sonnet-20240620-v1:0", AWS_CLAUDE_OPUS_41,
"anthropic.claude-3-opus-20240229-v1:0", AWS_CLAUDE_35_HAIKU,
AWS_CLAUDE_SONNET_4,
AWS_CLAUDE_OPUS_4,
]; ];
const models = variants.map((id) => ({ const models = variants.map((id) => ({
@@ -164,7 +172,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware(
* (claude-3 based models do not support the old text completion endpoint). * (claude-3 based models do not support the old text completion endpoint).
*/ */
const preprocessAwsTextRequest: RequestHandler = (req, res, next) => { const preprocessAwsTextRequest: RequestHandler = (req, res, next) => {
if (req.body.model?.includes("claude-3")) { if (requiresAnthropicMessagesApi(req.body.model)) {
textToChatPreprocessor(req, res, next); textToChatPreprocessor(req, res, next);
} else { } else {
nativeTextPreprocessor(req, res, next); nativeTextPreprocessor(req, res, next);
@@ -186,7 +194,7 @@ const oaiToAwsChatPreprocessor = createPreprocessorMiddleware(
* or the new Claude chat completion endpoint, based on the requested model. * or the new Claude chat completion endpoint, based on the requested model.
*/ */
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => { const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
if (req.body.model?.includes("claude-3")) { if (requiresAnthropicMessagesApi(req.body.model)) {
oaiToAwsChatPreprocessor(req, res, next); oaiToAwsChatPreprocessor(req, res, next);
} else { } else {
oaiToAwsTextPreprocessor(req, res, next); oaiToAwsTextPreprocessor(req, res, next);
@@ -241,12 +249,65 @@ awsRouter.post(
*/ */
function maybeReassignModel(req: Request) { function maybeReassignModel(req: Request) {
const model = req.body.model; const model = req.body.model;
const lower = String(model).toLowerCase();
// If it looks like an AWS model, use it as-is // If it looks like an AWS model, use it as-is
if (model.includes("anthropic.claude")) { if (model.includes("anthropic.claude")) {
return; return;
} }
if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
req.body.model = AWS_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
req.body.model = AWS_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4")) {
req.body.model = AWS_CLAUDE_OPUS_4;
return;
}
if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
req.body.model = AWS_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
req.body.model = AWS_CLAUDE_SONNET_45;
return;
}
if (lower.includes("sonnet-4")) {
req.body.model = AWS_CLAUDE_SONNET_4;
return;
}
if (lower.includes("3-5") && lower.includes("haiku")) {
req.body.model = AWS_CLAUDE_35_HAIKU;
return;
}
if (lower.includes("opus")) {
req.body.model = AWS_CLAUDE_OPUS_41;
return;
}
if (lower.includes("haiku")) {
req.body.model = AWS_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet")) {
req.body.model = AWS_CLAUDE_SONNET_45;
return;
}
if (
lower.startsWith("gpt-") ||
lower.startsWith("o1") ||
lower.startsWith("o3") ||
lower.startsWith("o4") ||
lower === "computer-use-preview"
) {
req.body.model = AWS_CLAUDE_SONNET_45;
return;
}
// Anthropic model names can look like: // Anthropic model names can look like:
// - claude-v1 // - claude-v1
// - claude-2.1 // - claude-2.1
@@ -282,20 +343,22 @@ function maybeReassignModel(req: Request) {
case "3": case "3":
case "3.0": case "3.0":
if (name.includes("opus")) { if (name.includes("opus")) {
req.body.model = "anthropic.claude-3-opus-20240229-v1:0"; req.body.model = AWS_CLAUDE_OPUS_41;
} else if (name.includes("haiku")) { } else if (name.includes("haiku")) {
req.body.model = "anthropic.claude-3-haiku-20240307-v1:0"; req.body.model = AWS_CLAUDE_HAIKU_45;
} else { } else {
req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0"; req.body.model = AWS_CLAUDE_SONNET_45;
} }
return; return;
case "3.5": case "3.5":
req.body.model = "anthropic.claude-3-5-sonnet-20240620-v1:0"; req.body.model = name.includes("haiku")
? AWS_CLAUDE_35_HAIKU
: AWS_CLAUDE_SONNET_45;
return; return;
} }
// Fallback to Claude 2.1 // Fallback to Claude Sonnet 4
req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`; req.body.model = AWS_CLAUDE_SONNET_45;
return; return;
} }
@@ -306,7 +369,7 @@ export function handleCompatibilityRequest(
) { ) {
const action = req.params.action; const action = req.params.action;
const alreadyInChatFormat = Boolean(req.body.messages); const alreadyInChatFormat = Boolean(req.body.messages);
const compatModel = "anthropic.claude-3-5-sonnet-20240620-v1:0"; const compatModel = AWS_CLAUDE_SONNET_4;
req.log.info( req.log.info(
{ inputModel: req.body.model, compatModel, alreadyInChatFormat }, { inputModel: req.body.model, compatModel, alreadyInChatFormat },
"Handling AWS compatibility request" "Handling AWS compatibility request"
@@ -335,3 +398,7 @@ export function handleCompatibilityRequest(
} }
export const aws = awsRouter; export const aws = awsRouter;
function requiresAnthropicMessagesApi(model?: string) {
return /claude-(?:3|sonnet|opus)/.test(model ?? "");
}
+30 -6
View File
@@ -32,20 +32,29 @@ function getModelsResponse() {
} }
let available = new Set<AzureOpenAIModelFamily>(); let available = new Set<AzureOpenAIModelFamily>();
const availableModelIds = new Set<string>();
for (const key of keyPool.list()) { for (const key of keyPool.list()) {
if (key.isDisabled || key.service !== "azure") continue; if (key.isDisabled || key.service !== "azure") continue;
const azureKey = key as any;
key.modelFamilies.forEach((family) => key.modelFamilies.forEach((family) =>
available.add(family as AzureOpenAIModelFamily) available.add(family as AzureOpenAIModelFamily)
); );
azureKey.modelIds?.forEach((id: string) => availableModelIds.add(id));
} }
const allowed = new Set<ModelFamily>(config.allowedModelFamilies); const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
available = new Set([...available].filter((x) => allowed.has(x))); available = new Set([...available].filter((x) => allowed.has(x)));
const models = KNOWN_OPENAI_MODELS.map((id) => ({ const usingExactModelIds = availableModelIds.size > 0;
id,
object: "model", const sourceModels = usingExactModelIds
created: new Date().getTime(), ? [...new Set([...KNOWN_OPENAI_MODELS, ...availableModelIds])]
owned_by: "azure", : KNOWN_OPENAI_MODELS;
const models = sourceModels.map((id) => ({
id,
object: "model",
created: new Date().getTime(),
owned_by: "azure",
permission: [ permission: [
{ {
id: "modelperm-" + id, id: "modelperm-" + id,
@@ -58,7 +67,12 @@ function getModelsResponse() {
], ],
root: id, root: id,
parent: null, parent: null,
})).filter((model) => available.has(getAzureOpenAIModelFamily(model.id))); })).filter((model) => {
if (usingExactModelIds) {
return availableModelIds.has(model.id);
}
return available.has(getAzureOpenAIModelFamily(model.id));
});
modelsCache = { object: "list", data: models }; modelsCache = { object: "list", data: models };
modelsCacheTime = new Date().getTime(); modelsCacheTime = new Date().getTime();
@@ -115,6 +129,16 @@ azureOpenAIRouter.post(
}), }),
azureOpenAIProxy azureOpenAIProxy
); );
azureOpenAIRouter.post(
"/v1/responses",
ipLimiter,
createPreprocessorMiddleware({
inApi: "openai-responses",
outApi: "openai-responses",
service: "azure",
}),
azureOpenAIProxy
);
azureOpenAIRouter.post( azureOpenAIRouter.post(
"/v1/images/generations", "/v1/images/generations",
ipLimiter, ipLimiter,
+74 -12
View File
@@ -19,7 +19,12 @@ import {
import { transformAnthropicChatResponseToOpenAI } from "./anthropic"; import { transformAnthropicChatResponseToOpenAI } from "./anthropic";
import { sendErrorToClient } from "./middleware/response/error-generator"; import { sendErrorToClient } from "./middleware/response/error-generator";
const LATEST_GCP_SONNET_MINOR_VERSION = "20240229"; const GCP_CLAUDE_SONNET_45 = "claude-sonnet-4-5@20250929";
const GCP_CLAUDE_HAIKU_45 = "claude-haiku-4-5@20251001";
const GCP_CLAUDE_OPUS_41 = "claude-opus-4-1@20250805";
const GCP_CLAUDE_SONNET_4 = "claude-sonnet-4@20250514";
const GCP_CLAUDE_OPUS_4 = "claude-opus-4@20250514";
const GCP_CLAUDE_35_HAIKU = "claude-3-5-haiku@20241022";
let modelsCache: any = null; let modelsCache: any = null;
let modelsCacheTime = 0; let modelsCacheTime = 0;
@@ -33,10 +38,12 @@ const getModelsResponse = () => {
// https://docs.anthropic.com/en/docs/about-claude/models // https://docs.anthropic.com/en/docs/about-claude/models
const variants = [ const variants = [
"claude-3-haiku@20240307", GCP_CLAUDE_HAIKU_45,
"claude-3-sonnet@20240229", GCP_CLAUDE_SONNET_45,
"claude-3-opus@20240229", GCP_CLAUDE_OPUS_41,
"claude-3-5-sonnet@20240620", GCP_CLAUDE_35_HAIKU,
GCP_CLAUDE_SONNET_4,
GCP_CLAUDE_OPUS_4,
]; ];
const models = variants.map((id) => ({ const models = variants.map((id) => ({
@@ -147,6 +154,7 @@ gcpRouter.post(
*/ */
function maybeReassignModel(req: Request) { function maybeReassignModel(req: Request) {
const model = req.body.model; const model = req.body.model;
const lower = String(model).toLowerCase();
// If it looks like an GCP model, use it as-is // If it looks like an GCP model, use it as-is
// if (model.includes("anthropic.claude")) { // if (model.includes("anthropic.claude")) {
@@ -154,6 +162,58 @@ function maybeReassignModel(req: Request) {
return; return;
} }
if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4")) {
req.body.model = GCP_CLAUDE_OPUS_4;
return;
}
if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
req.body.model = GCP_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
if (lower.includes("sonnet-4")) {
req.body.model = GCP_CLAUDE_SONNET_4;
return;
}
if (lower.includes("3-5") && lower.includes("haiku")) {
req.body.model = GCP_CLAUDE_35_HAIKU;
return;
}
if (lower.includes("opus")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("haiku")) {
req.body.model = GCP_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet")) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
if (
lower.startsWith("gpt-") ||
lower.startsWith("o1") ||
lower.startsWith("o3") ||
lower.startsWith("o4") ||
lower === "computer-use-preview"
) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
// Anthropic model names can look like: // Anthropic model names can look like:
// - claude-v1 // - claude-v1
// - claude-2.1 // - claude-2.1
@@ -165,7 +225,7 @@ function maybeReassignModel(req: Request) {
// If there's no match, fallback to Claude3 Sonnet as it is most likely to be // If there's no match, fallback to Claude3 Sonnet as it is most likely to be
// available on GCP. // available on GCP.
if (!match) { if (!match) {
req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`; req.body.model = GCP_CLAUDE_SONNET_4;
return; return;
} }
@@ -176,20 +236,22 @@ function maybeReassignModel(req: Request) {
case "3": case "3":
case "3.0": case "3.0":
if (name.includes("opus")) { if (name.includes("opus")) {
req.body.model = "claude-3-opus@20240229"; req.body.model = GCP_CLAUDE_OPUS_41;
} else if (name.includes("haiku")) { } else if (name.includes("haiku")) {
req.body.model = "claude-3-haiku@20240307"; req.body.model = GCP_CLAUDE_HAIKU_45;
} else { } else {
req.body.model = "claude-3-sonnet@20240229"; req.body.model = GCP_CLAUDE_SONNET_45;
} }
return; return;
case "3.5": case "3.5":
req.body.model = "claude-3-5-sonnet@20240620"; req.body.model = name.includes("haiku")
? GCP_CLAUDE_35_HAIKU
: GCP_CLAUDE_SONNET_45;
return; return;
} }
// Fallback to Claude3 Sonnet // Fallback to Claude Sonnet 4
req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`; req.body.model = GCP_CLAUDE_SONNET_45;
return; return;
} }
+23 -7
View File
@@ -16,6 +16,11 @@ import {
ProxyResHandlerWithBody, ProxyResHandlerWithBody,
} from "./middleware/response"; } from "./middleware/response";
import { addGoogleAIKey } from "./middleware/request/preprocessors/add-google-ai-key"; import { addGoogleAIKey } from "./middleware/request/preprocessors/add-google-ai-key";
import { BadRequestError } from "../shared/errors";
import {
flattenGoogleAIContentParts,
isGoogleAIImageModel,
} from "../shared/api-schemas";
let modelsCache: any = null; let modelsCache: any = null;
let modelsCacheTime = 0; let modelsCacheTime = 0;
@@ -31,10 +36,15 @@ const getModelsResponse = () => {
if (!config.googleAIKey) return { object: "list", data: [] }; if (!config.googleAIKey) return { object: "list", data: [] };
const googleAIVariants = [ const googleAIVariants = [
"gemini-pro", "gemini-2.5-pro",
"gemini-1.0-pro", "gemini-2.5-flash",
"gemini-2.5-flash-lite",
"gemini-2.5-flash-image",
"gemini-3-pro-image-preview",
"gemini-2.0-flash-preview-image-generation",
"gemini-2.0-flash",
"gemini-1.5-pro", "gemini-1.5-pro",
"gemini-1.5-pro-latest", "gemini-1.5-flash",
]; ];
const models = googleAIVariants.map((id) => ({ const models = googleAIVariants.map((id) => ({
@@ -83,7 +93,8 @@ function transformGoogleAIResponse(
): Record<string, any> { ): Record<string, any> {
const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0); const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
const parts = resBody.candidates[0].content?.parts ?? [{ text: "" }]; const parts = resBody.candidates[0].content?.parts ?? [{ text: "" }];
const content = parts[0].text.replace(/^(.{0,50}?): /, () => ""); const content = flattenGoogleAIContentParts(parts)
.replace(/^(.{0,50}?): /, () => "");
return { return {
id: "goo-" + v4(), id: "goo-" + v4(),
object: "chat.completion", object: "chat.completion",
@@ -136,14 +147,19 @@ googleAIRouter.post(
googleAIProxy googleAIProxy
); );
/** Replaces requests for non-Google AI models with gemini-pro-1.5-latest. */ /** Replaces requests for non-Google AI models with Gemini 2.5 Flash. */
function maybeReassignModel(req: Request) { function maybeReassignModel(req: Request) {
const requested = req.body.model; const requested = req.body.model;
if (requested.includes("gemini")) { if (requested.includes("gemini")) {
if (req.body.stream && isGoogleAIImageModel(requested)) {
throw new BadRequestError(
"Streaming Gemini image-generation models is not currently supported by this proxy. Retry without `stream: true`."
);
}
return; return;
} }
req.log.info({ requested }, "Reassigning model to gemini-pro-1.5-latest"); req.log.info({ requested }, "Reassigning model to gemini-2.5-flash");
req.body.model = "gemini-pro-1.5-latest"; req.body.model = "gemini-2.5-flash";
} }
export const googleAI = googleAIRouter; export const googleAI = googleAIRouter;
+11 -1
View File
@@ -5,10 +5,15 @@ import { ZodError } from "zod";
import { generateErrorMessage } from "zod-error"; import { generateErrorMessage } from "zod-error";
import { HttpError } from "../../shared/errors"; import { HttpError } from "../../shared/errors";
import { assertNever } from "../../shared/utils"; import { assertNever } from "../../shared/utils";
import {
flattenGoogleAIContentParts,
flattenOpenAIResponsesOutput,
} from "../../shared/api-schemas";
import { QuotaExceededError } from "./request/preprocessors/apply-quota-limits"; import { QuotaExceededError } from "./request/preprocessors/apply-quota-limits";
import { sendErrorToClient } from "./response/error-generator"; import { sendErrorToClient } from "./response/error-generator";
const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions"; const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
const OPENAI_RESPONSES_ENDPOINT = "/v1/responses";
const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions"; const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings"; const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations"; const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
@@ -22,6 +27,7 @@ export function isTextGenerationRequest(req: Request) {
req.method === "POST" && req.method === "POST" &&
[ [
OPENAI_CHAT_COMPLETION_ENDPOINT, OPENAI_CHAT_COMPLETION_ENDPOINT,
OPENAI_RESPONSES_ENDPOINT,
OPENAI_TEXT_COMPLETION_ENDPOINT, OPENAI_TEXT_COMPLETION_ENDPOINT,
ANTHROPIC_COMPLETION_ENDPOINT, ANTHROPIC_COMPLETION_ENDPOINT,
ANTHROPIC_MESSAGES_ENDPOINT, ANTHROPIC_MESSAGES_ENDPOINT,
@@ -224,6 +230,8 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
// Can be null if the model wants to invoke tools rather than return a // Can be null if the model wants to invoke tools rather than return a
// completion. // completion.
return body.choices[0].message.content || ""; return body.choices[0].message.content || "";
case "openai-responses":
return flattenOpenAIResponsesOutput(body);
case "openai-text": case "openai-text":
return body.choices[0].text; return body.choices[0].text;
case "anthropic-chat": case "anthropic-chat":
@@ -252,7 +260,7 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
if ("choices" in body) { if ("choices" in body) {
return body.choices[0].message.content; return body.choices[0].message.content;
} }
return body.candidates[0].content.parts[0].text; return flattenGoogleAIContentParts(body.candidates?.[0]?.content?.parts);
case "openai-image": case "openai-image":
return body.data?.map((item: any) => item.url).join("\n"); return body.data?.map((item: any) => item.url).join("\n");
default: default:
@@ -267,6 +275,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
case "openai-text": case "openai-text":
case "mistral-ai": case "mistral-ai":
return body.model; return body.model;
case "openai-responses":
return body.model || req.body.model;
case "openai-image": case "openai-image":
return req.body.model; return req.body.model;
case "anthropic-chat": case "anthropic-chat":
@@ -47,6 +47,7 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
assignedKey = keyPool.get("dall-e-3", service); assignedKey = keyPool.get("dall-e-3", service);
break; break;
case "openai": case "openai":
case "openai-responses":
case "google-ai": case "google-ai":
case "mistral-ai": case "mistral-ai":
throw new Error( throw new Error(
@@ -109,9 +110,10 @@ export const addKeyForEmbeddingsRequest: HPMRequestCallback = (
throw new Error("Embeddings requests must be from OpenAI"); throw new Error("Embeddings requests must be from OpenAI");
} }
req.body = { input: req.body.input, model: "text-embedding-ada-002" }; const model = req.body.model || "text-embedding-3-small";
req.body = { input: req.body.input, model };
const key = keyPool.get("text-embedding-ada-002", "openai") as OpenAIKey; const key = keyPool.get(model, "openai") as OpenAIKey;
req.key = key; req.key = key;
req.log.info( req.log.info(
@@ -6,7 +6,7 @@ import {
import { RequestPreprocessor } from "../index"; import { RequestPreprocessor } from "../index";
export const addAzureKey: RequestPreprocessor = (req) => { export const addAzureKey: RequestPreprocessor = (req) => {
const validAPIs: APIFormat[] = ["openai", "openai-image"]; const validAPIs: APIFormat[] = ["openai", "openai-responses", "openai-image"];
const apisValid = [req.outboundApi, req.inboundApi].every((api) => const apisValid = [req.outboundApi, req.inboundApi].every((api) =>
validAPIs.includes(api) validAPIs.includes(api)
); );
@@ -50,6 +50,23 @@ export const addAzureKey: RequestPreprocessor = (req) => {
const cred = req.key as AzureOpenAIKey; const cred = req.key as AzureOpenAIKey;
const { resourceName, deploymentId, apiKey } = getCredentialsFromKey(cred); const { resourceName, deploymentId, apiKey } = getCredentialsFromKey(cred);
if (req.outboundApi === "openai-responses") {
req.body.model = deploymentId;
req.signedRequest = {
method: "POST",
protocol: "https:",
hostname: `${resourceName}.openai.azure.com`,
path: `/openai/v1/responses?api-version=preview`,
headers: {
["host"]: `${resourceName}.openai.azure.com`,
["content-type"]: "application/json",
["api-key"]: apiKey,
},
body: JSON.stringify(req.body),
};
return;
}
const operation = const operation =
req.outboundApi === "openai" ? "/chat/completions" : "/images/generations"; req.outboundApi === "openai" ? "/chat/completions" : "/images/generations";
const apiVersion = const apiVersion =
@@ -6,6 +6,7 @@ import {
GoogleAIChatMessage, GoogleAIChatMessage,
MistralAIChatMessage, MistralAIChatMessage,
OpenAIChatMessage, OpenAIChatMessage,
flattenOpenAIResponsesInput,
} from "../../../../shared/api-schemas"; } from "../../../../shared/api-schemas";
/** /**
@@ -18,11 +19,23 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
switch (service) { switch (service) {
case "openai": { case "openai": {
req.outputTokens = req.body.max_tokens; req.outputTokens =
req.body.max_completion_tokens ?? req.body.max_tokens ?? 0;
const prompt: OpenAIChatMessage[] = req.body.messages; const prompt: OpenAIChatMessage[] = req.body.messages;
result = await countTokens({ req, prompt, service }); result = await countTokens({ req, prompt, service });
break; break;
} }
case "openai-responses": {
req.outputTokens = req.body.max_output_tokens ?? 0;
const prompt = [
flattenOpenAIResponsesInput(req.body.instructions),
flattenOpenAIResponsesInput(req.body.input),
]
.filter(Boolean)
.join("\n\n");
result = await countTokens({ req, prompt, service });
break;
}
case "openai-text": { case "openai-text": {
req.outputTokens = req.body.max_tokens; req.outputTokens = req.body.max_tokens;
const prompt: string = req.body.prompt; const prompt: string = req.body.prompt;
@@ -4,8 +4,10 @@ import { assertNever } from "../../../../shared/utils";
import { RequestPreprocessor } from "../index"; import { RequestPreprocessor } from "../index";
import { BadRequestError } from "../../../../shared/errors"; import { BadRequestError } from "../../../../shared/errors";
import { import {
GoogleAIChatMessage,
MistralAIChatMessage, MistralAIChatMessage,
OpenAIChatMessage, OpenAIChatMessage,
flattenOpenAIResponsesInput,
flattenAnthropicMessages, flattenAnthropicMessages,
} from "../../../../shared/api-schemas"; } from "../../../../shared/api-schemas";
@@ -72,11 +74,27 @@ function getPromptFromRequest(req: Request) {
return `${msg.role}: ${text}`; return `${msg.role}: ${text}`;
}) })
.join("\n\n"); .join("\n\n");
case "openai-responses":
return [
flattenOpenAIResponsesInput(body.instructions),
flattenOpenAIResponsesInput(body.input),
]
.filter(Boolean)
.join("\n\n");
case "openai-text": case "openai-text":
case "openai-image": case "openai-image":
return body.prompt; return body.prompt;
case "google-ai": case "google-ai":
return body.prompt.text; return body.contents
.map(({ parts, role }: GoogleAIChatMessage) => {
const text = parts
.map((part: any) =>
"text" in part ? part.text : "[image omitted]"
)
.join("\n");
return `${role}: ${text}`;
})
.join("\n\n");
default: default:
assertNever(service); assertNever(service);
} }
@@ -6,8 +6,8 @@ import { RequestPreprocessor } from "../index";
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic; const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI; const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
const GOOGLE_AI_MAX_CONTEXT = 32000; const GOOGLE_AI_MAX_CONTEXT = 1048576;
const MISTRAL_AI_MAX_CONTENT = 32768; const MISTRAL_AI_MAX_CONTENT = 256000;
/** /**
* Assigns `req.promptTokens` and `req.outputTokens` based on the request body * Assigns `req.promptTokens` and `req.outputTokens` based on the request body
@@ -26,6 +26,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
let proxyMax: number; let proxyMax: number;
switch (req.outboundApi) { switch (req.outboundApi) {
case "openai": case "openai":
case "openai-responses":
case "openai-text": case "openai-text":
proxyMax = OPENAI_MAX_CONTEXT; proxyMax = OPENAI_MAX_CONTEXT;
break; break;
@@ -54,6 +55,12 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
let modelMax: number; let modelMax: number;
if (model.match(/gpt-3.5-turbo-16k/)) { if (model.match(/gpt-3.5-turbo-16k/)) {
modelMax = 16384; modelMax = 16384;
} else if (model.match(/^gpt-5(\.|-|\b)/)) {
modelMax = 1050000;
} else if (model.match(/^o\d/)) {
modelMax = 200000;
} else if (model.match(/^gpt-4\.1/)) {
modelMax = 1047576;
} else if (model.match(/^gpt-4o/)) { } else if (model.match(/^gpt-4o/)) {
modelMax = 128000; modelMax = 128000;
} else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) { } else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
@@ -80,12 +87,27 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
modelMax = 200000; modelMax = 200000;
} else if (model.match(/^claude-3/)) { } else if (model.match(/^claude-3/)) {
modelMax = 200000; modelMax = 200000;
} else if (model.match(/^claude-(opus|sonnet|haiku)-4/)) {
modelMax = 200000;
} else if (model.match(/^gemini-\d{3}$/)) { } else if (model.match(/^gemini-\d{3}$/)) {
modelMax = GOOGLE_AI_MAX_CONTEXT; modelMax = GOOGLE_AI_MAX_CONTEXT;
} else if (model.match(/^gemini-(2\.5|2\.0)/)) {
modelMax = 1048576;
} else if (model.match(/^mistral-(tiny|small|medium)$/)) { } else if (model.match(/^mistral-(tiny|small|medium)$/)) {
modelMax = MISTRAL_AI_MAX_CONTENT; modelMax = MISTRAL_AI_MAX_CONTENT;
} else if (
model.match(
/^(mistral|ministral|magistral|pixtral|codestral|devstral|voxtral)-/
)
) {
modelMax = MISTRAL_AI_MAX_CONTENT;
} else if (model.match(/^anthropic\.claude-3/)) { } else if (model.match(/^anthropic\.claude-3/)) {
modelMax = 200000; modelMax = 200000;
} else if (
model.match(/^anthropic\.claude-(opus|sonnet|haiku)-4/) ||
model.match(/^claude-(opus|sonnet|haiku)-4@/)
) {
modelMax = 200000;
} else if (model.match(/^anthropic\.claude-v2:\d/)) { } else if (model.match(/^anthropic\.claude-v2:\d/)) {
modelMax = 200000; modelMax = 200000;
} else if (model.match(/^anthropic\.claude/)) { } else if (model.match(/^anthropic\.claude/)) {
@@ -121,8 +143,8 @@ function assertRequestHasTokenCounts(
req: Request req: Request
): asserts req is Request & { promptTokens: number; outputTokens: number } { ): asserts req is Request & { promptTokens: number; outputTokens: number } {
z.object({ z.object({
promptTokens: z.number().int().min(1), promptTokens: z.number().int().min(0),
outputTokens: z.number().int().min(1), outputTokens: z.number().int().min(0),
}) })
.nonstrict() .nonstrict()
.parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens }); .parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
@@ -3,6 +3,7 @@ import { assertNever } from "../../../../shared/utils";
import { RequestPreprocessor } from "../index"; import { RequestPreprocessor } from "../index";
import { containsImageContent as containsImageContentOpenAI } from "../../../../shared/api-schemas/openai"; import { containsImageContent as containsImageContentOpenAI } from "../../../../shared/api-schemas/openai";
import { containsImageContent as containsImageContentAnthropic } from "../../../../shared/api-schemas/anthropic"; import { containsImageContent as containsImageContentAnthropic } from "../../../../shared/api-schemas/anthropic";
import { containsOpenAIResponsesImageInput } from "../../../../shared/api-schemas";
import { ForbiddenError } from "../../../../shared/errors"; import { ForbiddenError } from "../../../../shared/errors";
/** /**
@@ -22,11 +23,20 @@ export const validateVision: RequestPreprocessor = async (req) => {
case "openai": case "openai":
hasImage = containsImageContentOpenAI(req.body.messages); hasImage = containsImageContentOpenAI(req.body.messages);
break; break;
case "openai-responses":
hasImage =
containsOpenAIResponsesImageInput(req.body.instructions) ||
containsOpenAIResponsesImageInput(req.body.input);
break;
case "anthropic-chat": case "anthropic-chat":
hasImage = containsImageContentAnthropic(req.body.messages); hasImage = containsImageContentAnthropic(req.body.messages);
break; break;
case "anthropic-text":
case "google-ai": case "google-ai":
hasImage = req.body.contents?.some((message: { parts: any[] }) =>
message.parts?.some((part) => "inline_data" in part)
);
break;
case "anthropic-text":
case "mistral-ai": case "mistral-ai":
case "openai-image": case "openai-image":
case "openai-text": case "openai-text":
@@ -72,7 +72,15 @@ type ErrorGeneratorOptions = {
}; };
export function tryInferFormat(body: any): APIFormat | "unknown" { export function tryInferFormat(body: any): APIFormat | "unknown" {
if (typeof body !== "object" || !body.model) { if (typeof body !== "object") {
return "unknown";
}
if (body.object === "response" || Array.isArray(body.output)) {
return "openai-responses";
}
if (!body.model) {
return "unknown"; return "unknown";
} }
@@ -158,7 +166,30 @@ export function buildSpoofedCompletion({
switch (format) { switch (format) {
case "openai": case "openai":
case "openai-responses":
case "mistral-ai": case "mistral-ai":
if (format === "openai-responses") {
return {
id: "error-" + id,
object: "response",
created_at: Math.floor(Date.now() / 1000),
model,
status: "completed",
error: null,
incomplete_details: null,
output_text: content,
output: [
{
id: "msg-error-" + id,
type: "message",
role: "assistant",
status: "completed",
content: [{ type: "output_text", text: content, annotations: [] }],
},
],
usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
};
}
return { return {
id: "error-" + id, id: "error-" + id,
object: "chat.completion", object: "chat.completion",
@@ -248,7 +279,23 @@ export function buildSpoofedSSE({
switch (format) { switch (format) {
case "openai": case "openai":
case "openai-responses":
case "mistral-ai": case "mistral-ai":
if (format === "openai-responses") {
return (
`data: ${JSON.stringify({
type: "response.completed",
response: buildSpoofedCompletion({
format,
title,
message,
obj,
reqId,
model,
}),
})}\n\n`
);
}
event = { event = {
id: "chatcmpl-" + id, id: "chatcmpl-" + id,
object: "chat.completion.chunk", object: "chat.completion.chunk",
+27 -1
View File
@@ -11,6 +11,7 @@ import { ProxyResHandlerWithBody } from ".";
import { assertNever } from "../../../shared/utils"; import { assertNever } from "../../../shared/utils";
import { import {
AnthropicChatMessage, AnthropicChatMessage,
flattenOpenAIResponsesInput,
flattenAnthropicMessages, GoogleAIChatMessage, flattenAnthropicMessages, GoogleAIChatMessage,
MistralAIChatMessage, MistralAIChatMessage,
OpenAIChatMessage, OpenAIChatMessage,
@@ -62,6 +63,7 @@ const getPromptForRequest = (
): ):
| string | string
| OpenAIChatMessage[] | OpenAIChatMessage[]
| { instructions?: unknown; input?: unknown }
| { contents: GoogleAIChatMessage[] } | { contents: GoogleAIChatMessage[] }
| { system: string; messages: AnthropicChatMessage[] } | { system: string; messages: AnthropicChatMessage[] }
| MistralAIChatMessage[] | MistralAIChatMessage[]
@@ -73,6 +75,11 @@ const getPromptForRequest = (
case "openai": case "openai":
case "mistral-ai": case "mistral-ai":
return req.body.messages; return req.body.messages;
case "openai-responses":
return {
instructions: req.body.instructions,
input: req.body.input,
};
case "anthropic-chat": case "anthropic-chat":
return { system: req.body.system, messages: req.body.messages }; return { system: req.body.system, messages: req.body.messages };
case "openai-text": case "openai-text":
@@ -99,6 +106,7 @@ const flattenMessages = (
| string | string
| OaiImageResult | OaiImageResult
| OpenAIChatMessage[] | OpenAIChatMessage[]
| { instructions?: unknown; input?: unknown }
| { contents: GoogleAIChatMessage[] } | { contents: GoogleAIChatMessage[] }
| { system: string; messages: AnthropicChatMessage[] } | { system: string; messages: AnthropicChatMessage[] }
| MistralAIChatMessage[] | MistralAIChatMessage[]
@@ -114,12 +122,20 @@ const flattenMessages = (
return val.contents return val.contents
.map(({ parts, role }) => { .map(({ parts, role }) => {
const text = parts const text = parts
.map((p) => p.text) .map((p: any) => ("text" in p ? p.text : "(( Attached Image ))"))
.join("\n"); .join("\n");
return `${role}: ${text}`; return `${role}: ${text}`;
}) })
.join("\n"); .join("\n");
} }
if (isOpenAIResponsesPrompt(val)) {
return [
flattenOpenAIResponsesInput(val.instructions),
flattenOpenAIResponsesInput(val.input),
]
.filter(Boolean)
.join("\n\n");
}
if (Array.isArray(val)) { if (Array.isArray(val)) {
return val return val
.map(({ content, role }) => { .map(({ content, role }) => {
@@ -140,6 +156,16 @@ const flattenMessages = (
return val.prompt.trim(); return val.prompt.trim();
}; };
function isOpenAIResponsesPrompt(
val: unknown
): val is { instructions?: unknown; input?: unknown } {
return (
typeof val === "object" &&
val !== null &&
("instructions" in val || "input" in val)
);
}
function isGoogleAIChatPrompt( function isGoogleAIChatPrompt(
val: unknown val: unknown
): val is { contents: GoogleAIChatMessage[] } { ): val is { contents: GoogleAIChatMessage[] } {
@@ -8,6 +8,7 @@ import {
mergeEventsForOpenAIText, mergeEventsForOpenAIText,
AnthropicV2StreamEvent, AnthropicV2StreamEvent,
OpenAIChatCompletionStreamEvent, OpenAIChatCompletionStreamEvent,
OpenAIResponsesStreamEvent,
} from "./index"; } from "./index";
/** /**
@@ -17,13 +18,36 @@ import {
export class EventAggregator { export class EventAggregator {
private readonly format: APIFormat; private readonly format: APIFormat;
private readonly events: OpenAIChatCompletionStreamEvent[]; private readonly events: OpenAIChatCompletionStreamEvent[];
private responseBody: Record<string, any> | null;
private responseEventCount: number;
private responseOutputText: string;
constructor({ format }: { format: APIFormat }) { constructor({ format }: { format: APIFormat }) {
this.events = []; this.events = [];
this.format = format; this.format = format;
this.responseBody = null;
this.responseEventCount = 0;
this.responseOutputText = "";
} }
addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) { addEvent(
event:
| OpenAIChatCompletionStreamEvent
| AnthropicV2StreamEvent
| OpenAIResponsesStreamEvent
) {
if (eventIsOpenAIResponsesEvent(event)) {
this.responseEventCount++;
if (event.response && typeof event.response === "object") {
this.responseBody = event.response;
}
if (event.type === "response.output_text.delta") {
this.responseOutputText += event.delta || event.text || "";
}
return;
}
if (eventIsOpenAIEvent(event)) { if (eventIsOpenAIEvent(event)) {
this.events.push(event); this.events.push(event);
} else { } else {
@@ -52,8 +76,15 @@ export class EventAggregator {
getFinalResponse() { getFinalResponse() {
switch (this.format) { switch (this.format) {
case "openai": case "openai":
case "openai-responses":
case "google-ai": case "google-ai":
case "mistral-ai": case "mistral-ai":
if (this.format === "openai-responses") {
if (this.responseBody) {
return this.responseBody;
}
return { output_text: this.responseOutputText };
}
return mergeEventsForOpenAIChat(this.events); return mergeEventsForOpenAIChat(this.events);
case "openai-text": case "openai-text":
return mergeEventsForOpenAIText(this.events); return mergeEventsForOpenAIText(this.events);
@@ -69,7 +100,7 @@ export class EventAggregator {
} }
hasEvents() { hasEvents() {
return this.events.length > 0; return this.events.length > 0 || this.responseEventCount > 0;
} }
} }
@@ -78,3 +109,9 @@ function eventIsOpenAIEvent(
): event is OpenAIChatCompletionStreamEvent { ): event is OpenAIChatCompletionStreamEvent {
return event?.object === "chat.completion.chunk"; return event?.object === "chat.completion.chunk";
} }
function eventIsOpenAIResponsesEvent(
event: any
): event is OpenAIResponsesStreamEvent {
return typeof event?.type === "string" && event.type.startsWith("response.");
}
@@ -26,6 +26,14 @@ export type OpenAIChatCompletionStreamEvent = {
}[]; }[];
}; };
export type OpenAIResponsesStreamEvent = {
type: string;
response?: Record<string, any>;
delta?: string;
text?: string;
[key: string]: any;
};
export type StreamingCompletionTransformer< export type StreamingCompletionTransformer<
T = OpenAIChatCompletionStreamEvent, T = OpenAIChatCompletionStreamEvent,
S = any, S = any,
@@ -42,6 +50,7 @@ export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-ant
export { anthropicChatToOpenAI } from "./transformers/anthropic-chat-to-openai"; export { anthropicChatToOpenAI } from "./transformers/anthropic-chat-to-openai";
export { googleAIToOpenAI } from "./transformers/google-ai-to-openai"; export { googleAIToOpenAI } from "./transformers/google-ai-to-openai";
export { passthroughToOpenAI } from "./transformers/passthrough-to-openai"; export { passthroughToOpenAI } from "./transformers/passthrough-to-openai";
export { passthroughToOpenAIResponses } from "./transformers/passthrough-to-openai-responses";
export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat"; export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
export { mergeEventsForOpenAIText } from "./aggregators/openai-text"; export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text"; export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text";
@@ -10,8 +10,10 @@ import {
anthropicV2ToOpenAI, anthropicV2ToOpenAI,
googleAIToOpenAI, googleAIToOpenAI,
OpenAIChatCompletionStreamEvent, OpenAIChatCompletionStreamEvent,
OpenAIResponsesStreamEvent,
openAITextToOpenAIChat, openAITextToOpenAIChat,
passthroughToOpenAI, passthroughToOpenAI,
passthroughToOpenAIResponses,
StreamingCompletionTransformer, StreamingCompletionTransformer,
} from "./index"; } from "./index";
@@ -35,7 +37,9 @@ export class SSEMessageTransformer extends Transform {
private readonly inputFormat: APIFormat; private readonly inputFormat: APIFormat;
private readonly transformFn: StreamingCompletionTransformer< private readonly transformFn: StreamingCompletionTransformer<
// TODO: Refactor transformers to not assume only OpenAI events as output // TODO: Refactor transformers to not assume only OpenAI events as output
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent | OpenAIChatCompletionStreamEvent
| AnthropicV2StreamEvent
| OpenAIResponsesStreamEvent
>; >;
private readonly log; private readonly log;
private readonly fallbackId: string; private readonly fallbackId: string;
@@ -126,12 +130,14 @@ function getTransformer(
// used for that case. // used for that case.
requestApi: APIFormat = "openai" requestApi: APIFormat = "openai"
): StreamingCompletionTransformer< ): StreamingCompletionTransformer<
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent | OpenAIResponsesStreamEvent
> { > {
switch (responseApi) { switch (responseApi) {
case "openai": case "openai":
case "mistral-ai": case "mistral-ai":
return passthroughToOpenAI; return passthroughToOpenAI;
case "openai-responses":
return passthroughToOpenAIResponses;
case "openai-text": case "openai-text":
return openAITextToOpenAIChat; return openAITextToOpenAIChat;
case "anthropic-text": case "anthropic-text":
@@ -0,0 +1,43 @@
import {
OpenAIResponsesStreamEvent,
SSEResponseTransformArgs,
StreamingCompletionTransformer,
} from "../index";
import { parseEvent, ServerSentEvent } from "../parse-sse";
import { logger } from "../../../../../logger";
const log = logger.child({
module: "sse-transformer",
transformer: "openai-responses-to-openai-responses",
});
export const passthroughToOpenAIResponses: StreamingCompletionTransformer<
OpenAIResponsesStreamEvent
> = (
params: SSEResponseTransformArgs
) => {
const { data } = params;
const rawEvent = parseEvent(data);
if (!rawEvent.data || rawEvent.data === "[DONE]") {
return { position: -1 };
}
const responseEvent = asResponseEvent(rawEvent);
if (!responseEvent) {
return { position: -1 };
}
return { position: -1, event: responseEvent };
};
function asResponseEvent(
event: ServerSentEvent
): OpenAIResponsesStreamEvent | null {
try {
return JSON.parse(event.data) as OpenAIResponsesStreamEvent;
} catch (error) {
log.warn({ error: error.stack, event }, "Received invalid event");
}
return null;
}
+20 -16
View File
@@ -24,25 +24,29 @@ import {
// https://docs.mistral.ai/platform/endpoints // https://docs.mistral.ai/platform/endpoints
export const KNOWN_MISTRAL_AI_MODELS = [ export const KNOWN_MISTRAL_AI_MODELS = [
// Mistral 7b (open weight, legacy)
"open-mistral-7b",
"mistral-tiny-2312",
// Mixtral 8x7b (open weight, legacy)
"open-mixtral-8x7b",
"mistral-small-2312",
// Mixtral Small (newer 8x7b, closed weight)
"mistral-small-latest", "mistral-small-latest",
"mistral-small-2402", "mistral-small-2603",
// Mistral Medium "mistral-small-2506",
"mistral-medium-latest", "mistral-medium-latest",
"mistral-medium-2312", "mistral-medium-2508",
// Mistral Large "mistral-medium-2505",
"magistral-medium-latest",
"magistral-medium-2507",
"magistral-small-2507",
"mistral-large-latest", "mistral-large-latest",
"mistral-large-2402", "mistral-large-2512",
// Deprecated identifiers (2024-05-01) "ministral-14b-2512",
"mistral-tiny", "ministral-8b-latest",
"mistral-small", "ministral-8b-2512",
"mistral-medium", "ministral-3b-latest",
"ministral-3b-2512",
"pixtral-large-latest",
"pixtral-large-2411",
"codestral-latest",
"codestral-2508",
"devstral-small-latest",
"devstral-small-2507",
"devstral-medium-2507",
]; ];
let modelsCache: any = null; let modelsCache: any = null;
+7 -1
View File
@@ -18,7 +18,13 @@ import {
import { generateModelList } from "./openai"; import { generateModelList } from "./openai";
import { OpenAIImageGenerationResult } from "../shared/file-storage/mirror-generated-image"; import { OpenAIImageGenerationResult } from "../shared/file-storage/mirror-generated-image";
const KNOWN_MODELS = ["dall-e-2", "dall-e-3"]; const KNOWN_MODELS = [
"dall-e-2",
"dall-e-3",
"gpt-image-1.5",
"gpt-image-1",
"gpt-image-1-mini",
];
let modelListCache: any = null; let modelListCache: any = null;
let modelListValid = 0; let modelListValid = 0;
+63 -10
View File
@@ -28,28 +28,57 @@ import {
// https://platform.openai.com/docs/models/overview // https://platform.openai.com/docs/models/overview
export const KNOWN_OPENAI_MODELS = [ export const KNOWN_OPENAI_MODELS = [
"gpt-5.2",
"gpt-5.2-chat",
"gpt-5.2-chat-latest",
"gpt-5.2-pro",
"gpt-5.2-codex",
"gpt-5.1",
"gpt-5.1-chat",
"gpt-5.1-codex",
"gpt-5.1-codex-mini",
"gpt-5.1-codex-max",
"gpt-5",
"gpt-5-chat",
"gpt-5-pro",
"gpt-5-codex",
"gpt-5-mini",
"gpt-5-nano",
"gpt-4.1",
"gpt-4.1-2025-04-14",
"gpt-4.1-mini",
"gpt-4.1-nano",
"o3-pro",
"o3-deep-research",
"computer-use-preview",
"o4-mini",
"o4-mini-deep-research",
"o3",
"o3-mini",
"o1",
"o1-pro",
"gpt-4o", "gpt-4o",
"gpt-4o-2024-08-06",
"gpt-4o-mini",
"gpt-4o-2024-05-13", "gpt-4o-2024-05-13",
"gpt-4-turbo", // alias for latest gpt4-turbo stable "gpt-4-turbo", // alias for latest gpt4-turbo stable
"gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision "gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
"gpt-4-turbo-preview", // alias for latest turbo preview
"gpt-4-0125-preview", // gpt4-turbo preview 2
"gpt-4-1106-preview", // gpt4-turbo preview 1
"gpt-4-vision-preview", // gpt4-turbo preview 1 with vision
"gpt-4", "gpt-4",
"gpt-4-0613", "gpt-4-0613",
"gpt-4-0314", // EOL 2024-06-13
"gpt-4-32k", "gpt-4-32k",
"gpt-4-32k-0314", // EOL 2024-06-13
"gpt-4-32k-0613", "gpt-4-32k-0613",
"gpt-3.5-turbo", "gpt-3.5-turbo",
"gpt-3.5-turbo-0301", // EOL 2024-06-13
"gpt-3.5-turbo-0613", "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct",
"gpt-3.5-turbo-instruct-0914", "gpt-3.5-turbo-instruct-0914",
"text-embedding-3-small",
"text-embedding-3-large",
"text-embedding-ada-002", "text-embedding-ada-002",
"gpt-image-1.5",
"gpt-image-1",
"gpt-image-1-mini",
"dall-e-3",
"dall-e-2",
]; ];
let modelsCache: any = null; let modelsCache: any = null;
@@ -59,11 +88,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
// Get available families and snapshots // Get available families and snapshots
let availableFamilies = new Set<OpenAIModelFamily>(); let availableFamilies = new Set<OpenAIModelFamily>();
const availableSnapshots = new Set<string>(); const availableSnapshots = new Set<string>();
const availableModelIds = new Set<string>();
for (const key of keyPool.list()) { for (const key of keyPool.list()) {
if (key.isDisabled || key.service !== "openai") continue; if (key.isDisabled || key.service !== "openai") continue;
const asOpenAIKey = key as OpenAIKey; const asOpenAIKey = key as OpenAIKey;
asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f)); asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f));
asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s)); asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s));
asOpenAIKey.modelIds.forEach((id) => availableModelIds.add(id));
} }
// Remove disabled families // Remove disabled families
@@ -71,8 +102,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
availableFamilies = new Set( availableFamilies = new Set(
[...availableFamilies].filter((x) => allowed.has(x)) [...availableFamilies].filter((x) => allowed.has(x))
); );
const usingExactModelIds = availableModelIds.size > 0;
return models const sourceModels = usingExactModelIds
? [...new Set([...models, ...availableModelIds])]
: models;
return sourceModels
.map((id) => ({ .map((id) => ({
id, id,
object: "model", object: "model",
@@ -92,6 +128,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
parent: null, parent: null,
})) }))
.filter((model) => { .filter((model) => {
if (usingExactModelIds) {
return (
allowed.has(getOpenAIModelFamily(model.id)) &&
availableModelIds.has(model.id)
);
}
// First check if the family is available // First check if the family is available
const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id)); const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id));
if (!hasFamily) return false; if (!hasFamily) return false;
@@ -233,6 +276,16 @@ openaiRouter.post(
}), }),
openaiProxy openaiProxy
); );
openaiRouter.post(
"/v1/responses",
ipLimiter,
createPreprocessorMiddleware({
inApi: "openai-responses",
outApi: "openai-responses",
service: "openai",
}),
openaiProxy
);
// Embeddings endpoint. // Embeddings endpoint.
openaiRouter.post( openaiRouter.post(
"/v1/embeddings", "/v1/embeddings",
+20 -13
View File
@@ -31,18 +31,24 @@ export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge(
.int() .int()
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)), .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
}) })
); ).passthrough();
const AnthropicV1MessageMultimodalContentSchema = z.array( const AnthropicV1MessageMultimodalContentSchema = z.array(
z.union([ z.union([
z.object({ type: z.literal("text"), text: z.string() }), z.object({ type: z.literal("text"), text: z.string() }),
z.object({ z.object({
type: z.literal("image"), type: z.literal("image"),
source: z.object({ source: z.union([
type: z.literal("base64"), z.object({
media_type: z.string().max(100), type: z.literal("base64"),
data: z.string(), media_type: z.string().max(100),
}), data: z.string(),
}),
z.object({
type: z.literal("url"),
url: z.string().url(),
}),
]),
}), }),
]) ])
); );
@@ -65,7 +71,7 @@ export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge(
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)), .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
system: z.string().optional(), system: z.string().optional(),
}) })
); ).passthrough();
export type AnthropicChatMessage = z.infer< export type AnthropicChatMessage = z.infer<
typeof AnthropicV1MessagesSchema typeof AnthropicV1MessagesSchema
>["messages"][0]; >["messages"][0];
@@ -77,7 +83,7 @@ function openAIMessagesToClaudeTextPrompt(messages: OpenAIChatMessage[]) {
let role: string = m.role; let role: string = m.role;
if (role === "assistant") { if (role === "assistant") {
role = "Assistant"; role = "Assistant";
} else if (role === "system") { } else if (role === "system" || role === "developer") {
role = "System"; role = "System";
} else if (role === "user") { } else if (role === "user") {
role = "Human"; role = "Human";
@@ -115,12 +121,13 @@ export const transformOpenAIToAnthropicChat: APIFormatTransformer<
system, system,
messages: newMessages, messages: newMessages,
model: rest.model, model: rest.model,
max_tokens: rest.max_tokens, max_tokens: rest.max_completion_tokens ?? rest.max_tokens,
stream: rest.stream, stream: rest.stream,
temperature: rest.temperature, temperature: rest.temperature,
top_p: rest.top_p, top_p: rest.top_p,
stop_sequences: stop_sequences:
typeof rest.stop === "string" ? [rest.stop] : rest.stop || undefined, typeof rest.stop === "string" ? [rest.stop] : rest.stop || undefined,
...(rest.thinking ? { thinking: rest.thinking } : {}),
...(rest.user ? { metadata: { user_id: rest.user } } : {}), ...(rest.user ? { metadata: { user_id: rest.user } } : {}),
// Anthropic supports top_k, but OpenAI does not // Anthropic supports top_k, but OpenAI does not
// OpenAI supports frequency_penalty, presence_penalty, logit_bias, n, seed, // OpenAI supports frequency_penalty, presence_penalty, logit_bias, n, seed,
@@ -162,7 +169,7 @@ export const transformOpenAIToAnthropicText: APIFormatTransformer<
return { return {
model: rest.model, model: rest.model,
prompt: prompt, prompt: prompt,
max_tokens_to_sample: rest.max_tokens, max_tokens_to_sample: rest.max_completion_tokens ?? rest.max_tokens,
stop_sequences: stops, stop_sequences: stops,
stream: rest.stream, stream: rest.stream,
temperature: rest.temperature, temperature: rest.temperature,
@@ -366,7 +373,7 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
// Here we will lose the original name if it was a system message, but that // Here we will lose the original name if it was a system message, but that
// is generally okay because the system message is usually a prompt and not // is generally okay because the system message is usually a prompt and not
// a character in the chat. // a character in the chat.
const name = msg.role === "system" ? "System" : msg.name?.trim(); const name = isSystemOpenAIRole(msg.role) ? "System" : msg.name?.trim();
const content = convertOpenAIContent(msg.content); const content = convertOpenAIContent(msg.content);
// Prepend the display name to the first text content in the current message // Prepend the display name to the first text content in the current message
@@ -396,8 +403,8 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
function isSystemOpenAIRole( function isSystemOpenAIRole(
role: OpenAIChatMessage["role"] role: OpenAIChatMessage["role"]
): role is "system" | "function" | "tool" { ): role is "system" | "developer" | "function" | "tool" {
return ["system", "function", "tool"].includes(role); return ["system", "developer", "function", "tool"].includes(role);
} }
function getFirstTextContent(content: OpenAIChatMessage["content"]) { function getFirstTextContent(content: OpenAIChatMessage["content"]) {
+201 -29
View File
@@ -1,42 +1,62 @@
import { z } from "zod"; import { z } from "zod";
import { import {
flattenOpenAIMessageContent, flattenOpenAIMessageContent,
OpenAIChatMessage,
OpenAIV1ChatCompletionSchema, OpenAIV1ChatCompletionSchema,
} from "./openai"; } from "./openai";
import { APIFormatTransformer } from "./index"; import { APIFormatTransformer } from "./index";
const GoogleAIContentPartSchema = z.union([
z.object({ text: z.string() }),
z.object({
inline_data: z.object({
mime_type: z.string().max(100),
data: z.string(),
}),
}),
]);
// https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent // https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent
export const GoogleAIV1GenerateContentSchema = z export const GoogleAIV1GenerateContentSchema = z
.object({ .object({
model: z.string().max(100), //actually specified in path but we need it for the router model: z.string().max(100), // actually specified in path but we need it for the router
stream: z.boolean().optional().default(false), // also used for router stream: z.boolean().optional().default(false), // also used for router
contents: z.array( contents: z.array(
z.object({ z.object({
parts: z.array(z.object({ text: z.string() })), parts: z.array(GoogleAIContentPartSchema),
role: z.enum(["user", "model"]), role: z.enum(["user", "model"]),
}) })
), ),
tools: z.array(z.object({})).max(0).optional(), tools: z.array(z.any()).optional(),
safetySettings: z.array(z.object({})).max(0).optional(), toolConfig: z.any().optional(),
safetySettings: z.array(z.any()).optional(),
systemInstruction: z.any().optional(),
generationConfig: z.object({ generationConfig: z.object({
temperature: z.number().optional(), temperature: z.number().optional(),
maxOutputTokens: z.coerce maxOutputTokens: z.coerce
.number() .number()
.int() .int()
.optional() .optional()
.default(16) .default(1024)
.transform((v) => Math.min(v, 1024)), // TODO: Add config .transform((v) => Math.min(v, 65536)),
candidateCount: z.literal(1).optional(), candidateCount: z.literal(1).optional(),
topP: z.number().optional(), topP: z.number().optional(),
topK: z.number().optional(), topK: z.number().optional(),
responseMimeType: z.string().optional(),
responseSchema: z.any().optional(),
responseJsonSchema: z.any().optional(),
responseModalities: z.array(z.string()).optional(),
thinkingConfig: z.any().optional(),
stopSequences: z.array(z.string().max(500)).max(5).optional(), stopSequences: z.array(z.string().max(500)).max(5).optional(),
}), }),
}) })
.strip(); .passthrough();
export type GoogleAIChatMessage = z.infer< export type GoogleAIChatMessage = z.infer<
typeof GoogleAIV1GenerateContentSchema typeof GoogleAIV1GenerateContentSchema
>["contents"][0]; >["contents"][0];
type GoogleAIPart = GoogleAIChatMessage["parts"][number];
export const transformOpenAIToGoogleAI: APIFormatTransformer< export const transformOpenAIToGoogleAI: APIFormatTransformer<
typeof GoogleAIV1GenerateContentSchema typeof GoogleAIV1GenerateContentSchema
> = async (req) => { > = async (req) => {
@@ -54,40 +74,51 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
} }
const { messages, ...rest } = result.data; const { messages, ...rest } = result.data;
const systemMessages = messages.filter(
(m) => m.role === "system" || m.role === "developer"
);
const foundNames = new Set<string>(); const foundNames = new Set<string>();
const model = req.body.model;
const customThinkingConfig =
getObjectField(body, "thinkingConfig") ??
getObjectField(getObjectField(body, "generationConfig"), "thinkingConfig");
const customResponseModalities = getStringArrayField(
getObjectField(body, "generationConfig"),
"responseModalities"
);
const contents = messages const contents = messages
.filter((m) => m.role !== "system" && m.role !== "developer")
.map((m) => { .map((m) => {
const role = m.role === "assistant" ? "model" : "user"; const role = m.role === "assistant" ? "model" : "user";
// Detects character names so we can set stop sequences for them as Gemini const parts = convertOpenAIContent(m.content);
// is prone to continuing as the next character. const text = parts
// If names are not available, we'll still try to prefix the message .map((part) => ("text" in part ? part.text : ""))
// with generic names so we can set stops for them but they don't work .join("\n");
// as well as real names.
const text = flattenOpenAIMessageContent(m.content);
const propName = m.name?.trim(); const propName = m.name?.trim();
const textName = const textName = text.match(/^(.{0,50}?): /)?.[1]?.trim();
m.role === "system" ? "" : text.match(/^(.{0,50}?): /)?.[1]?.trim(); const name = propName || textName || (role === "model" ? "Character" : "User");
const name =
propName || textName || (role === "model" ? "Character" : "User");
foundNames.add(name); foundNames.add(name);
// Prefixing messages with their character name seems to help avoid // Prefixing speaker names helps Gemini avoid continuing as the next
// Gemini trying to continue as the next character, or at the very least // character in multi-party roleplay/chat prompts.
// ensures it will hit the stop sequence. Otherwise it will start a new
// paragraph and switch perspectives.
// The response will be very likely to include this prefix so frontends
// will need to strip it out.
const textPrefix = textName ? "" : `${name}: `; const textPrefix = textName ? "" : `${name}: `;
const firstTextPart = parts.find(
(part): part is Extract<GoogleAIPart, { text: string }> => "text" in part
);
if (firstTextPart) {
firstTextPart.text = textPrefix + firstTextPart.text;
}
return { return {
parts: [{ text: textPrefix + text }], parts,
role: m.role === "assistant" ? ("model" as const) : ("user" as const), role: m.role === "assistant" ? ("model" as const) : ("user" as const),
}; };
}) })
.reduce<GoogleAIChatMessage[]>((acc, msg) => { .reduce<GoogleAIChatMessage[]>((acc, msg) => {
const last = acc[acc.length - 1]; const last = acc[acc.length - 1];
if (last?.role === msg.role) { if (last?.role === msg.role) {
last.parts[0].text += "\n\n" + msg.parts[0].text; last.parts.push(...msg.parts);
} else { } else {
acc.push(msg); acc.push(msg);
} }
@@ -102,17 +133,44 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
stops.push(...Array.from(foundNames).map((name) => `\n${name}:`)); stops.push(...Array.from(foundNames).map((name) => `\n${name}:`));
stops = [...new Set(stops)].slice(0, 5); stops = [...new Set(stops)].slice(0, 5);
const responseFormat = rest.response_format as Record<string, any> | undefined;
const maxOutputTokens =
rest.max_completion_tokens ?? rest.max_tokens ?? 1024;
return { return {
model: req.body.model, model,
stream: rest.stream, stream: rest.stream,
contents, contents,
tools: [], tools: Array.isArray(rest.tools) ? rest.tools : undefined,
systemInstruction: systemMessages.length
? {
parts: [
{
text: systemMessages
.map((msg) => flattenOpenAIMessageContent(msg.content))
.join("\n\n"),
},
],
}
: undefined,
generationConfig: { generationConfig: {
maxOutputTokens: rest.max_tokens, maxOutputTokens,
stopSequences: stops, stopSequences: stops,
topP: rest.top_p, topP: rest.top_p,
topK: 40, // openai schema doesn't have this, google ai defaults to 40 topK: 40, // OpenAI schema doesn't expose this; Gemini defaults to 40.
temperature: rest.temperature, temperature: rest.temperature,
responseMimeType:
responseFormat?.type === "json_object" ||
responseFormat?.type === "json_schema"
? "application/json"
: undefined,
responseSchema: responseFormat?.json_schema?.schema,
responseJsonSchema: responseFormat?.json_schema?.schema,
responseModalities:
customResponseModalities ??
(isGoogleAIImageModel(model) ? ["TEXT", "IMAGE"] : undefined),
thinkingConfig:
customThinkingConfig ?? getThinkingConfig(model, rest.reasoning_effort),
}, },
safetySettings: [ safetySettings: [
{ category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
@@ -122,3 +180,117 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
], ],
}; };
}; };
function convertOpenAIContent(
content: OpenAIChatMessage["content"]
): GoogleAIPart[] {
if (typeof content === "string") {
return [{ text: content }];
}
return content.map((item) => {
if ("text" in item) {
return { text: item.text };
}
if ("refusal" in item) {
return { text: item.refusal };
}
const url = item.image_url.url;
if (!url.startsWith("data:")) {
return { text: "[ Unsupported image URL ]" };
}
const [meta, data = ""] = url.split(",", 2);
const mimeType = meta.split(";")[0].replace("data:", "");
return { inline_data: { mime_type: mimeType, data } };
});
}
function getThinkingConfig(model: string, reasoningEffort?: string) {
if (model.startsWith("gemini-2.5")) {
switch (reasoningEffort) {
case "none":
case "minimal":
case "low":
return { thinkingBudget: 0 };
default:
return undefined;
}
}
switch (reasoningEffort) {
case "low":
case "minimal":
case "none":
return { thinkingLevel: "LOW" };
case "medium":
case "high":
case "xhigh":
return { thinkingLevel: "HIGH" };
default:
return undefined;
}
}
export function isGoogleAIImageModel(model: string) {
return [
"gemini-2.0-flash-preview-image-generation",
"gemini-2.5-flash-image",
"gemini-3-pro-image-preview",
].includes(model);
}
export function flattenGoogleAIContentParts(
parts: Array<Record<string, any>> | undefined
) {
return (parts ?? [])
.map((part) => {
if (typeof part?.text === "string") {
return part.text;
}
const inlineData = part?.inline_data ?? part?.inlineData;
if (inlineData?.data) {
const mimeType = inlineData.mime_type ?? inlineData.mimeType ?? "image/png";
return `![generated image](data:${mimeType};base64,${inlineData.data})`;
}
return "";
})
.filter(Boolean)
.join("\n\n");
}
function getObjectField(
value: unknown,
key: string
): Record<string, any> | undefined {
if (
value &&
typeof value === "object" &&
!Array.isArray(value) &&
key in value &&
value[key as keyof typeof value] &&
typeof value[key as keyof typeof value] === "object" &&
!Array.isArray(value[key as keyof typeof value])
) {
return value[key as keyof typeof value] as Record<string, any>;
}
return undefined;
}
function getStringArrayField(value: unknown, key: string) {
if (
value &&
typeof value === "object" &&
!Array.isArray(value) &&
key in value &&
Array.isArray(value[key as keyof typeof value])
) {
return (value[key as keyof typeof value] as unknown[]).filter(
(item): item is string => typeof item === "string"
);
}
return undefined;
}
+12 -1
View File
@@ -17,6 +17,7 @@ import {
OpenAIV1ImagesGenerationSchema, OpenAIV1ImagesGenerationSchema,
transformOpenAIToOpenAIImage, transformOpenAIToOpenAIImage,
} from "./openai-image"; } from "./openai-image";
import { OpenAIResponsesCreateSchema } from "./openai-responses";
import { import {
GoogleAIV1GenerateContentSchema, GoogleAIV1GenerateContentSchema,
transformOpenAIToGoogleAI, transformOpenAIToGoogleAI,
@@ -24,13 +25,22 @@ import {
import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai"; import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";
export { OpenAIChatMessage } from "./openai"; export { OpenAIChatMessage } from "./openai";
export {
containsOpenAIResponsesImageInput,
flattenOpenAIResponsesInput,
flattenOpenAIResponsesOutput,
} from "./openai-responses";
export { export {
AnthropicChatMessage, AnthropicChatMessage,
AnthropicV1TextSchema, AnthropicV1TextSchema,
AnthropicV1MessagesSchema, AnthropicV1MessagesSchema,
flattenAnthropicMessages, flattenAnthropicMessages,
} from "./anthropic"; } from "./anthropic";
export { GoogleAIChatMessage } from "./google-ai"; export {
GoogleAIChatMessage,
flattenGoogleAIContentParts,
isGoogleAIImageModel,
} from "./google-ai";
export { MistralAIChatMessage } from "./mistral-ai"; export { MistralAIChatMessage } from "./mistral-ai";
type APIPair = `${APIFormat}->${APIFormat}`; type APIPair = `${APIFormat}->${APIFormat}`;
@@ -55,6 +65,7 @@ export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
"anthropic-chat": AnthropicV1MessagesSchema, "anthropic-chat": AnthropicV1MessagesSchema,
"anthropic-text": AnthropicV1TextSchema, "anthropic-text": AnthropicV1TextSchema,
openai: OpenAIV1ChatCompletionSchema, openai: OpenAIV1ChatCompletionSchema,
"openai-responses": OpenAIResponsesCreateSchema,
"openai-text": OpenAIV1TextCompletionSchema, "openai-text": OpenAIV1TextCompletionSchema,
"openai-image": OpenAIV1ImagesGenerationSchema, "openai-image": OpenAIV1ImagesGenerationSchema,
"google-ai": GoogleAIV1GenerateContentSchema, "google-ai": GoogleAIV1GenerateContentSchema,
+1 -1
View File
@@ -20,7 +20,7 @@ export const MistralAIV1ChatCompletionsSchema = z.object({
stream: z.boolean().optional().default(false), stream: z.boolean().optional().default(false),
safe_prompt: z.boolean().optional().default(false), safe_prompt: z.boolean().optional().default(false),
random_seed: z.number().int().optional(), random_seed: z.number().int().optional(),
}); }).passthrough();
export type MistralAIChatMessage = z.infer< export type MistralAIChatMessage = z.infer<
typeof MistralAIV1ChatCompletionsSchema typeof MistralAIV1ChatCompletionsSchema
>["messages"][0]; >["messages"][0];
+33 -9
View File
@@ -5,19 +5,34 @@ import { APIFormatTransformer } from "./index";
// https://platform.openai.com/docs/api-reference/images/create // https://platform.openai.com/docs/api-reference/images/create
export const OpenAIV1ImagesGenerationSchema = z export const OpenAIV1ImagesGenerationSchema = z
.object({ .object({
prompt: z.string().max(4000), prompt: z.string().max(32000),
model: z.string().max(100).optional(), model: z.string().max(100).optional(),
quality: z.enum(["standard", "hd"]).optional().default("standard"), quality: z
n: z.number().int().min(1).max(4).optional().default(1), .enum(["auto", "low", "medium", "high", "standard", "hd"])
.optional(),
n: z.number().int().min(1).max(10).optional().default(1),
response_format: z.enum(["url", "b64_json"]).optional(), response_format: z.enum(["url", "b64_json"]).optional(),
output_format: z.string().optional(),
output_compression: z.number().int().min(0).max(100).optional(),
size: z size: z
.enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]) .enum([
"auto",
"256x256",
"512x512",
"1024x1024",
"1024x1536",
"1536x1024",
"1792x1024",
"1024x1792",
])
.optional() .optional()
.default("1024x1024"), .default("1024x1024"),
style: z.enum(["vivid", "natural"]).optional().default("vivid"), style: z.enum(["vivid", "natural"]).optional().default("vivid"),
background: z.string().optional(),
moderation: z.string().optional(),
user: z.string().max(500).optional(), user: z.string().max(500).optional(),
}) })
.strip(); .passthrough();
// Takes the last chat message and uses it verbatim as the image prompt. // Takes the last chat message and uses it verbatim as the image prompt.
export const transformOpenAIToOpenAIImage: APIFormatTransformer< export const transformOpenAIToOpenAIImage: APIFormatTransformer<
@@ -57,12 +72,21 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
} }
// TODO: Add some way to specify parameters via chat message // TODO: Add some way to specify parameters via chat message
const transformed = { const requestedModel = String(body.model ?? "");
model: body.model.includes("dall-e") ? body.model : "dall-e-3", const model =
quality: "standard", requestedModel.includes("dall-e") || requestedModel.includes("gpt-image")
? requestedModel
: "gpt-image-1.5";
const transformed: Record<string, any> = {
model,
size: "1024x1024", size: "1024x1024",
response_format: "url",
prompt: prompt.slice(index! + 6).trim(), prompt: prompt.slice(index! + 6).trim(),
}; };
if (model.includes("dall-e")) {
transformed.quality = "standard";
transformed.response_format = "url";
}
return OpenAIV1ImagesGenerationSchema.parse(transformed); return OpenAIV1ImagesGenerationSchema.parse(transformed);
}; };
+136
View File
@@ -0,0 +1,136 @@
import { z } from "zod";
import { OPENAI_OUTPUT_MAX } from "./openai";
const OpenAIResponsesReasoningSchema = z
.object({
effort: z.string().optional(),
summary: z.union([z.string(), z.array(z.string())]).optional(),
})
.passthrough();
const OpenAIResponsesTextSchema = z
.object({
format: z.any().optional(),
verbosity: z.enum(["low", "medium", "high"]).optional(),
})
.passthrough();
export const OpenAIResponsesCreateSchema = z
.object({
model: z.string().max(100),
input: z.union([z.string(), z.array(z.any())]).optional(),
instructions: z.union([z.string(), z.array(z.any())]).optional(),
previous_response_id: z.string().max(100).optional(),
stream: z.boolean().optional().default(false),
max_output_tokens: z.coerce
.number()
.int()
.nullish()
.default(OPENAI_OUTPUT_MAX)
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
temperature: z.number().optional(),
top_p: z.number().optional(),
user: z.string().max(500).optional(),
safety_identifier: z.string().max(500).optional(),
metadata: z.record(z.any()).optional(),
tools: z.array(z.any()).optional(),
tool_choice: z.any().optional(),
parallel_tool_calls: z.boolean().optional(),
include: z.array(z.string()).optional(),
store: z.boolean().optional(),
background: z.boolean().optional(),
reasoning: OpenAIResponsesReasoningSchema.optional(),
text: OpenAIResponsesTextSchema.optional(),
})
.passthrough();
export function flattenOpenAIResponsesInput(input: unknown): string {
return flattenResponseValue(input).trim();
}
export function flattenOpenAIResponsesOutput(body: Record<string, any>): string {
if (typeof body.output_text === "string" && body.output_text.trim()) {
return body.output_text.trim();
}
return flattenResponseValue(body.output ?? body.output_text).trim();
}
export function containsOpenAIResponsesImageInput(input: unknown): boolean {
return containsImage(input);
}
function flattenResponseValue(value: unknown): string {
if (value === null || value === undefined) return "";
if (typeof value === "string") return value;
if (typeof value === "number" || typeof value === "boolean") {
return String(value);
}
if (Array.isArray(value)) {
return value
.map((item) => flattenResponseValue(item))
.filter(Boolean)
.join("\n");
}
if (!isRecord(value)) return "";
const typed = value;
if (hasStringProp(typed, "text")) return typed.text;
if (hasStringProp(typed, "refusal")) return typed.refusal;
if (hasStringProp(typed, "summary")) return typed.summary;
if (hasStringProp(typed, "arguments")) return typed.arguments;
if (hasStringProp(typed, "result")) return typed.result;
const type = String(typed.type ?? "");
if (type.includes("image")) return "[ Uploaded Image Omitted ]";
if (type.includes("file")) return "[ File Omitted ]";
if (typeof typed.role === "string" && typed.content !== undefined) {
const content = flattenResponseValue(typed.content);
return content ? `${typed.role}: ${content}` : typed.role;
}
const nested = [
typed.content,
typed.input,
typed.output,
typed.summary,
typed.results,
typed.item,
typed.items,
];
for (const candidate of nested) {
const flattened = flattenResponseValue(candidate);
if (flattened) return flattened;
}
return "";
}
function containsImage(value: unknown): boolean {
if (value === null || value === undefined) return false;
if (Array.isArray(value)) return value.some((item) => containsImage(item));
if (!isRecord(value)) return false;
const typed = value;
const type = String(typed.type ?? "");
if (type.includes("image")) return true;
if (typed.image_url || typed.image || typed.input_image || typed.inline_data) {
return true;
}
return Object.values(typed).some((item) => containsImage(item));
}
function hasStringProp<T extends string>(
value: Record<string, unknown>,
key: T
): value is Record<string, unknown> & Record<T, string> {
return typeof value[key] === "string";
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
}
+32 -5
View File
@@ -7,6 +7,7 @@ export const OPENAI_OUTPUT_MAX = config.maxOutputTokensOpenAI;
const OpenAIV1ChatContentArraySchema = z.array( const OpenAIV1ChatContentArraySchema = z.array(
z.union([ z.union([
z.object({ type: z.literal("text"), text: z.string() }), z.object({ type: z.literal("text"), text: z.string() }),
z.object({ type: z.literal("refusal"), refusal: z.string() }),
z.object({ z.object({
type: z.union([z.literal("image"), z.literal("image_url")]), type: z.union([z.literal("image"), z.literal("image_url")]),
image_url: z.object({ image_url: z.object({
@@ -21,7 +22,14 @@ export const OpenAIV1ChatCompletionSchema = z
model: z.string().max(100), model: z.string().max(100),
messages: z.array( messages: z.array(
z.object({ z.object({
role: z.enum(["system", "user", "assistant", "tool", "function"]), role: z.enum([
"system",
"developer",
"user",
"assistant",
"tool",
"function",
]),
content: z.union([z.string(), OpenAIV1ChatContentArraySchema]), content: z.union([z.string(), OpenAIV1ChatContentArraySchema]),
name: z.string().optional(), name: z.string().optional(),
tool_calls: z.array(z.any()).optional(), tool_calls: z.array(z.any()).optional(),
@@ -54,11 +62,20 @@ export const OpenAIV1ChatCompletionSchema = z
.nullish() .nullish()
.default(Math.min(OPENAI_OUTPUT_MAX, 4096)) .default(Math.min(OPENAI_OUTPUT_MAX, 4096))
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)), .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
max_completion_tokens: z.coerce
.number()
.int()
.nullish()
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
frequency_penalty: z.number().optional().default(0), frequency_penalty: z.number().optional().default(0),
presence_penalty: z.number().optional().default(0), presence_penalty: z.number().optional().default(0),
logit_bias: z.any().optional(), logit_bias: z.any().optional(),
metadata: z.record(z.any()).optional(),
user: z.string().max(500).optional(), user: z.string().max(500).optional(),
safety_identifier: z.string().max(500).optional(),
seed: z.number().int().optional(), seed: z.number().int().optional(),
prompt_cache_key: z.string().max(500).optional(),
prompt_cache_retention: z.string().optional(),
// Be warned that Azure OpenAI combines these two into a single field. // Be warned that Azure OpenAI combines these two into a single field.
// It's the only deviation from the OpenAI API that I'm aware of so I have // It's the only deviation from the OpenAI API that I'm aware of so I have
// special cased it in `addAzureKey` rather than expecting clients to do it. // special cased it in `addAzureKey` rather than expecting clients to do it.
@@ -70,14 +87,23 @@ export const OpenAIV1ChatCompletionSchema = z
functions: z.array(z.any()).optional(), functions: z.array(z.any()).optional(),
tool_choice: z.any().optional(), tool_choice: z.any().optional(),
function_choice: z.any().optional(), function_choice: z.any().optional(),
response_format: z.any(), response_format: z.any().optional(),
parallel_tool_calls: z.boolean().optional(),
reasoning_effort: z.string().optional(),
stream_options: z.any().optional(),
modalities: z.array(z.string()).optional(),
audio: z.any().optional(),
prediction: z.any().optional(),
web_search_options: z.any().optional(),
service_tier: z.string().optional(),
verbosity: z.enum(["low", "medium", "high"]).optional(),
}) })
// Tool usage must be enabled via config because we currently have no way to // Tool usage must be enabled via config because we currently have no way to
// track quota usage for them or enforce limits. // track quota usage for them or enforce limits.
.omit( .omit(
Boolean(config.allowOpenAIToolUsage) ? {} : { tools: true, functions: true } Boolean(config.allowOpenAIToolUsage) ? {} : { tools: true, functions: true }
) )
.strip(); .passthrough();
export type OpenAIChatMessage = z.infer< export type OpenAIChatMessage = z.infer<
typeof OpenAIV1ChatCompletionSchema typeof OpenAIV1ChatCompletionSchema
>["messages"][0]; >["messages"][0];
@@ -89,6 +115,7 @@ export function flattenOpenAIMessageContent(
? content ? content
.map((contentItem) => { .map((contentItem) => {
if ("text" in contentItem) return contentItem.text; if ("text" in contentItem) return contentItem.text;
if ("refusal" in contentItem) return contentItem.refusal;
if ("image_url" in contentItem) return "[ Uploaded Image Omitted ]"; if ("image_url" in contentItem) return "[ Uploaded Image Omitted ]";
}) })
.join("\n") .join("\n")
@@ -107,7 +134,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
let role: string = m.role; let role: string = m.role;
if (role === "assistant") { if (role === "assistant") {
role = "Assistant"; role = "Assistant";
} else if (role === "system") { } else if (role === "system" || role === "developer") {
role = "System"; role = "System";
} else if (role === "user") { } else if (role === "user") {
role = "User"; role = "User";
@@ -121,7 +148,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
.map((m) => { .map((m) => {
// Claude without prefixes (except system) and no Assistant priming // Claude without prefixes (except system) and no Assistant priming
let role: string = ""; let role: string = "";
if (role === "system") { if (m.role === "system" || m.role === "developer") {
role = "System: "; role = "System: ";
} }
return `\n\n${role}${flattenOpenAIMessageContent(m.content)}`; return `\n\n${role}${flattenOpenAIMessageContent(m.content)}`;
+4 -4
View File
@@ -54,10 +54,10 @@ export class AwsKeyChecker extends KeyCheckerBase<AwsBedrockKey> {
if (isInitialCheck) { if (isInitialCheck) {
checks = [ checks = [
this.invokeModel("anthropic.claude-v2", key), this.invokeModel("anthropic.claude-v2", key),
this.invokeModel("anthropic.claude-3-sonnet-20240229-v1:0", key), this.invokeModel("anthropic.claude-sonnet-4-5-20250929-v1:0", key),
this.invokeModel("anthropic.claude-3-haiku-20240307-v1:0", key), this.invokeModel("anthropic.claude-haiku-4-5-20251001-v1:0", key),
this.invokeModel("anthropic.claude-3-opus-20240229-v1:0", key), this.invokeModel("anthropic.claude-opus-4-1-20250805-v1:0", key),
this.invokeModel("anthropic.claude-3-5-sonnet-20240620-v1:0", key), this.invokeModel("anthropic.claude-3-5-haiku-20241022-v1:0", key),
]; ];
} }
+29 -8
View File
@@ -35,9 +35,15 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
} }
protected async testKeyOrFail(key: AzureOpenAIKey) { protected async testKeyOrFail(key: AzureOpenAIKey) {
const model = await this.testModel(key); const result = await this.testModel(key);
this.log.info({ key: key.hash, deploymentModel: model }, "Checked key."); this.log.info(
this.updateKey(key.hash, { modelFamilies: [model] }); { key: key.hash, deploymentModel: result.modelIds[0] ?? result.family },
"Checked key."
);
this.updateKey(key.hash, {
modelFamilies: [result.family],
modelIds: result.modelIds,
});
} }
protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) { protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) {
@@ -107,7 +113,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
this.updateKey(key.hash, { lastChecked: next }); this.updateKey(key.hash, { lastChecked: next });
} }
private async testModel(key: AzureOpenAIKey) { private async testModel(key: AzureOpenAIKey): Promise<{
family: ReturnType<typeof getAzureOpenAIModelFamily>;
modelIds: string[];
}> {
const { apiKey, deploymentId, resourceName } = const { apiKey, deploymentId, resourceName } =
AzureOpenAIKeyChecker.getCredentialsFromKey(key); AzureOpenAIKeyChecker.getCredentialsFromKey(key);
const url = POST_CHAT_COMPLETIONS(resourceName, deploymentId); const url = POST_CHAT_COMPLETIONS(resourceName, deploymentId);
@@ -126,7 +135,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
// we try to invoke /chat/completions on dall-e-3. This is expected and // we try to invoke /chat/completions on dall-e-3. This is expected and
// indicates a DALL-E deployment. // indicates a DALL-E deployment.
if (response.status === 400) { if (response.status === 400) {
if (data.error.code === "OperationNotSupported") return "azure-dall-e"; if (data.error.code === "OperationNotSupported") {
return {
family: "azure-dall-e",
modelIds: ["dall-e-3", "gpt-image-1", "gpt-image-1-mini", "gpt-image-1.5"],
};
}
throw new AxiosError( throw new AxiosError(
`Unexpected error when testing deployment ${deploymentId}`, `Unexpected error when testing deployment ${deploymentId}`,
"AZURE_TEST_ERROR", "AZURE_TEST_ERROR",
@@ -137,11 +151,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
} }
const family = getAzureOpenAIModelFamily(data.model); const family = getAzureOpenAIModelFamily(data.model);
const normalizedModel = normalizeAzureModelId(data.model);
// Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks. // Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks.
// Otherwise we can use the model family Azure returned. // Otherwise we can use the model family Azure returned.
if (family !== "azure-gpt4") { if (family !== "azure-gpt4") {
return family; return { family, modelIds: [normalizedModel] };
} }
// Try to send an oversized prompt. GPT-4 Turbo can handle this but regular // Try to send an oversized prompt. GPT-4 Turbo can handle this but regular
@@ -160,8 +175,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
const code = contextTest.error?.code; const code = contextTest.error?.code;
this.log.debug({ code, status }, "Performed Azure GPT4 context size test."); this.log.debug({ code, status }, "Performed Azure GPT4 context size test.");
if (code === "context_length_exceeded") return "azure-gpt4"; if (code === "context_length_exceeded") {
return "azure-gpt4-turbo"; return { family: "azure-gpt4", modelIds: ["gpt-4"] };
}
return { family: "azure-gpt4-turbo", modelIds: ["gpt-4-turbo"] };
} }
static errorIsAzureError(error: AxiosError): error is AxiosError<AzureError> { static errorIsAzureError(error: AxiosError): error is AxiosError<AzureError> {
@@ -179,3 +196,7 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
return { resourceName, deploymentId, apiKey }; return { resourceName, deploymentId, apiKey };
} }
} }
function normalizeAzureModelId(model: string) {
return model.replace("gpt-35-turbo", "gpt-3.5-turbo");
}
+12 -1
View File
@@ -14,6 +14,8 @@ type AzureOpenAIKeyUsage = {
export interface AzureOpenAIKey extends Key, AzureOpenAIKeyUsage { export interface AzureOpenAIKey extends Key, AzureOpenAIKeyUsage {
readonly service: "azure"; readonly service: "azure";
readonly modelFamilies: AzureOpenAIModelFamily[]; readonly modelFamilies: AzureOpenAIModelFamily[];
/** Exact model IDs or deployment aliases known to be backed by this key. */
modelIds: string[];
/** The time at which this key was last rate limited. */ /** The time at which this key was last rate limited. */
rateLimitedAt: number; rateLimitedAt: number;
/** The time until which this key is rate limited. */ /** The time until which this key is rate limited. */
@@ -62,6 +64,7 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
rateLimitedAt: 0, rateLimitedAt: 0,
rateLimitedUntil: 0, rateLimitedUntil: 0,
contentFiltering: false, contentFiltering: false,
modelIds: [],
hash: `azu-${crypto hash: `azu-${crypto
.createHash("sha256") .createHash("sha256")
.update(key) .update(key)
@@ -73,6 +76,8 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
"azure-gpt4-32kTokens": 0, "azure-gpt4-32kTokens": 0,
"azure-gpt4-turboTokens": 0, "azure-gpt4-turboTokens": 0,
"azure-gpt4oTokens": 0, "azure-gpt4oTokens": 0,
"azure-gpt5Tokens": 0,
"azure-o-seriesTokens": 0,
"azure-dall-eTokens": 0, "azure-dall-eTokens": 0,
}; };
this.keys.push(newKey); this.keys.push(newKey);
@@ -96,8 +101,14 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
public get(model: string) { public get(model: string) {
const neededFamily = getAzureOpenAIModelFamily(model); const neededFamily = getAzureOpenAIModelFamily(model);
const normalizedModel = model
.replace(/^azure-/, "")
.replace("gpt-35-turbo", "gpt-3.5-turbo");
const availableKeys = this.keys.filter( const availableKeys = this.keys.filter(
(k) => !k.isDisabled && k.modelFamilies.includes(neededFamily) (k) =>
!k.isDisabled &&
k.modelFamilies.includes(neededFamily) &&
(!k.modelIds.length || k.modelIds.includes(normalizedModel))
); );
if (availableKeys.length === 0) { if (availableKeys.length === 0) {
throw new PaymentRequiredError( throw new PaymentRequiredError(
+8 -8
View File
@@ -32,10 +32,10 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
const isInitialCheck = !key.lastChecked; const isInitialCheck = !key.lastChecked;
if (isInitialCheck) { if (isInitialCheck) {
checks = [ checks = [
this.invokeModel("claude-3-haiku@20240307", key, true), this.invokeModel("claude-haiku-4-5@20251001", key, true),
this.invokeModel("claude-3-sonnet@20240229", key, true), this.invokeModel("claude-sonnet-4-5@20250929", key, true),
this.invokeModel("claude-3-opus@20240229", key, true), this.invokeModel("claude-opus-4-1@20250805", key, true),
this.invokeModel("claude-3-5-sonnet@20240620", key, true), this.invokeModel("claude-3-5-haiku@20241022", key, true),
]; ];
const [sonnet, haiku, opus, sonnet35] = const [sonnet, haiku, opus, sonnet35] =
@@ -66,13 +66,13 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
}); });
} else { } else {
if (key.haikuEnabled) { if (key.haikuEnabled) {
await this.invokeModel("claude-3-haiku@20240307", key, false) await this.invokeModel("claude-haiku-4-5@20251001", key, false)
} else if (key.sonnetEnabled) { } else if (key.sonnetEnabled) {
await this.invokeModel("claude-3-sonnet@20240229", key, false) await this.invokeModel("claude-sonnet-4-5@20250929", key, false)
} else if (key.sonnet35Enabled) { } else if (key.sonnet35Enabled) {
await this.invokeModel("claude-3-5-sonnet@20240620", key, false) await this.invokeModel("claude-3-5-haiku@20241022", key, false)
} else { } else {
await this.invokeModel("claude-3-opus@20240229", key, false) await this.invokeModel("claude-opus-4-1@20250805", key, false)
} }
this.updateKey(key.hash, { lastChecked: Date.now() }); this.updateKey(key.hash, { lastChecked: Date.now() });
+1
View File
@@ -4,6 +4,7 @@ import { KeyPool } from "./key-pool";
/** The request and response format used by a model's API. */ /** The request and response format used by a model's API. */
export type APIFormat = export type APIFormat =
| "openai" | "openai"
| "openai-responses"
| "openai-text" | "openai-text"
| "openai-image" | "openai-image"
| "anthropic-chat" // Anthropic's newer messages array format | "anthropic-chat" // Anthropic's newer messages array format
+4 -1
View File
@@ -111,7 +111,10 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
const familiesArray = [...families]; const familiesArray = [...families];
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!; const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
this.updateKey(key.hash, { this.updateKey(key.hash, {
modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)), modelIds: models,
modelSnapshots: models.filter((m) =>
m.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/)
),
modelFamilies: familiesArray, modelFamilies: familiesArray,
lastChecked: keyFromPool.lastChecked, lastChecked: keyFromPool.lastChecked,
}); });
+12 -2
View File
@@ -16,6 +16,8 @@ type OpenAIKeyUsage = {
export interface OpenAIKey extends Key, OpenAIKeyUsage { export interface OpenAIKey extends Key, OpenAIKeyUsage {
readonly service: "openai"; readonly service: "openai";
modelFamilies: OpenAIModelFamily[]; modelFamilies: OpenAIModelFamily[];
/** Exact model IDs reported by the models API for this key. */
modelIds: string[];
/** /**
* Some keys are assigned to multiple organizations, each with their own quota * Some keys are assigned to multiple organizations, each with their own quota
* limits. We clone the key for each organization and track usage/disabled * limits. We clone the key for each organization and track usage/disabled
@@ -97,6 +99,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
"gpt4" as const, "gpt4" as const,
"gpt4-turbo" as const, "gpt4-turbo" as const,
"gpt4o" as const, "gpt4o" as const,
"gpt5" as const,
"o-series" as const,
], ],
isTrial: false, isTrial: false,
isDisabled: false, isDisabled: false,
@@ -118,8 +122,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
"gpt4-32kTokens": 0, "gpt4-32kTokens": 0,
"gpt4-turboTokens": 0, "gpt4-turboTokens": 0,
gpt4oTokens: 0, gpt4oTokens: 0,
gpt5Tokens: 0,
"o-seriesTokens": 0,
"dall-eTokens": 0, "dall-eTokens": 0,
gpt4Rpm: 0, gpt4Rpm: 0,
modelIds: [],
modelSnapshots: [], modelSnapshots: [],
}; };
this.keys.push(newKey); this.keys.push(newKey);
@@ -160,8 +167,10 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
if (model === "gpt-4-32k") model = "gpt-4-32k-0613"; if (model === "gpt-4-32k") model = "gpt-4-32k-0613";
const neededFamily = getOpenAIModelFamily(model); const neededFamily = getOpenAIModelFamily(model);
const excludeTrials = model === "text-embedding-ada-002"; const excludeTrials = /^text-embedding-(?:3-small|3-large|ada-002)$/.test(
const needsSnapshot = model.match(/-\d{4}(-preview)?$/); model
);
const needsSnapshot = model.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/);
const availableKeys = this.keys.filter( const availableKeys = this.keys.filter(
// Allow keys which // Allow keys which
@@ -169,6 +178,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
!key.isDisabled && // are not disabled !key.isDisabled && // are not disabled
key.modelFamilies.includes(neededFamily) && // have access to the model family we need key.modelFamilies.includes(neededFamily) && // have access to the model family we need
(!excludeTrials || !key.isTrial) && // and are not trials if we don't want them (!excludeTrials || !key.isTrial) && // and are not trials if we don't want them
(!key.modelIds.length || key.modelIds.includes(model)) && // and have the requested model if exact inventory is available
(!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need (!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need
); );
+48 -2
View File
@@ -23,6 +23,8 @@ export type OpenAIModelFamily =
| "gpt4-32k" | "gpt4-32k"
| "gpt4-turbo" | "gpt4-turbo"
| "gpt4o" | "gpt4o"
| "gpt5"
| "o-series"
| "dall-e"; | "dall-e";
export type AnthropicModelFamily = "claude" | "claude-opus"; export type AnthropicModelFamily = "claude" | "claude-opus";
export type GoogleAIModelFamily = "gemini-pro"; export type GoogleAIModelFamily = "gemini-pro";
@@ -51,6 +53,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"gpt4-32k", "gpt4-32k",
"gpt4-turbo", "gpt4-turbo",
"gpt4o", "gpt4o",
"gpt5",
"o-series",
"dall-e", "dall-e",
"claude", "claude",
"claude-opus", "claude-opus",
@@ -68,6 +72,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"azure-gpt4-32k", "azure-gpt4-32k",
"azure-gpt4-turbo", "azure-gpt4-turbo",
"azure-gpt4o", "azure-gpt4o",
"azure-gpt5",
"azure-o-series",
"azure-dall-e", "azure-dall-e",
] as const); ] as const);
@@ -84,6 +90,10 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
] as const); ] as const);
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = { export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-5(\\.\\d+)?([-.].+)?$": "gpt5",
"^o\\d([-.].+)?$": "o-series",
"^computer-use-preview$": "o-series",
"^gpt-4\\.1([-.].+)?$": "gpt4o",
"^gpt-4o": "gpt4o", "^gpt-4o": "gpt4o",
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo", "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
"^gpt-4-turbo(-preview)?$": "gpt4-turbo", "^gpt-4-turbo(-preview)?$": "gpt4-turbo",
@@ -94,7 +104,8 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-4-\\d{4}$": "gpt4", "^gpt-4-\\d{4}$": "gpt4",
"^gpt-4$": "gpt4", "^gpt-4$": "gpt4",
"^gpt-3.5-turbo": "turbo", "^gpt-3.5-turbo": "turbo",
"^text-embedding-ada-002$": "turbo", "^text-embedding-(ada-002|3-small|3-large)$": "turbo",
"^gpt-image-1([-.].+)?$": "dall-e",
"^dall-e-\\d{1}$": "dall-e", "^dall-e-\\d{1}$": "dall-e",
}; };
@@ -106,6 +117,8 @@ export const MODEL_FAMILY_SERVICE: {
"gpt4-turbo": "openai", "gpt4-turbo": "openai",
"gpt4-32k": "openai", "gpt4-32k": "openai",
"gpt4o": "openai", "gpt4o": "openai",
gpt5: "openai",
"o-series": "openai",
"dall-e": "openai", "dall-e": "openai",
claude: "anthropic", claude: "anthropic",
"claude-opus": "anthropic", "claude-opus": "anthropic",
@@ -118,6 +131,8 @@ export const MODEL_FAMILY_SERVICE: {
"azure-gpt4-32k": "azure", "azure-gpt4-32k": "azure",
"azure-gpt4-turbo": "azure", "azure-gpt4-turbo": "azure",
"azure-gpt4o": "azure", "azure-gpt4o": "azure",
"azure-gpt5": "azure",
"azure-o-series": "azure",
"azure-dall-e": "azure", "azure-dall-e": "azure",
"gemini-pro": "google-ai", "gemini-pro": "google-ai",
"mistral-tiny": "mistral-ai", "mistral-tiny": "mistral-ai",
@@ -150,7 +165,10 @@ export function getGoogleAIModelFamily(_model: string): ModelFamily {
} }
export function getMistralAIModelFamily(model: string): MistralAIModelFamily { export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
const prunedModel = model.replace(/-(latest|\d{4})$/, ""); const prunedModel = model.replace(
/-(latest|\d{4}|\d{6}|\d+\.\d+|v\d+(:\d+)?)$/,
""
);
switch (prunedModel) { switch (prunedModel) {
case "mistral-tiny": case "mistral-tiny":
case "mistral-small": case "mistral-small":
@@ -161,7 +179,34 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
return "mistral-tiny"; return "mistral-tiny";
case "open-mixtral-8x7b": case "open-mixtral-8x7b":
return "mistral-small"; return "mistral-small";
case "ministral-3b":
case "ministral-8b":
case "mistral-small-3.1":
case "mistral-small-3.2":
return "mistral-small";
case "magistral-medium":
return "mistral-medium";
case "codestral":
case "devstral":
case "mistral-large-2":
case "mistral-large-3":
case "pixtral-large":
return "mistral-large";
default: default:
if (model.startsWith("mistral-small") || model.startsWith("ministral")) {
return "mistral-small";
}
if (model.startsWith("mistral-medium") || model.startsWith("magistral")) {
return "mistral-medium";
}
if (
model.startsWith("mistral-large") ||
model.startsWith("pixtral-large") ||
model.startsWith("codestral") ||
model.startsWith("devstral")
) {
return "mistral-large";
}
return "mistral-tiny"; return "mistral-tiny";
} }
} }
@@ -225,6 +270,7 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
modelFamily = getClaudeModelFamily(model); modelFamily = getClaudeModelFamily(model);
break; break;
case "openai": case "openai":
case "openai-responses":
case "openai-text": case "openai-text":
case "openai-image": case "openai-image":
modelFamily = getOpenAIModelFamily(model); modelFamily = getOpenAIModelFamily(model);
+8
View File
@@ -10,6 +10,14 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
case "azure-gpt4o": case "azure-gpt4o":
cost = 0.000005; cost = 0.000005;
break; break;
case "gpt5":
case "azure-gpt5":
cost = 0.00001;
break;
case "o-series":
case "azure-o-series":
cost = 0.000012;
break;
case "azure-gpt4-turbo": case "azure-gpt4-turbo":
case "gpt4-turbo": case "gpt4-turbo":
cost = 0.00001; cost = 0.00001;
+8 -1
View File
@@ -65,7 +65,14 @@ async function getTokenCountForMessages({
numTokens += encoder.encode(text.normalize("NFKC"), "all").length; numTokens += encoder.encode(text.normalize("NFKC"), "all").length;
break; break;
case "image": case "image":
numTokens += await getImageTokenCount(part.source.data); if (part.source.type === "base64") {
numTokens += await getImageTokenCount(part.source.data);
} else {
// Remote image URLs are already hosted elsewhere, so we cannot
// inspect dimensions locally. Charge the documented worst-case
// token cost instead of undercounting them as zero.
numTokens += 1600;
}
break; break;
default: default:
throw new Error(`Unsupported Anthropic content type.`); throw new Error(`Unsupported Anthropic content type.`);
+36 -8
View File
@@ -179,16 +179,33 @@ export const DALLE_TOKENS_PER_DOLLAR = 100000;
* which we convert to tokens at a rate of 100000 tokens per dollar. * which we convert to tokens at a rate of 100000 tokens per dollar.
*/ */
export function getOpenAIImageCost(params: { export function getOpenAIImageCost(params: {
model: "dall-e-2" | "dall-e-3"; model:
quality: "standard" | "hd"; | "dall-e-2"
resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024"; | "dall-e-3"
| "gpt-image-1"
| "gpt-image-1-mini"
| "gpt-image-1.5";
quality: "auto" | "low" | "medium" | "high" | "standard" | "hd";
resolution:
| "auto"
| "512x512"
| "256x256"
| "1024x1024"
| "1024x1536"
| "1536x1024"
| "1024x1792"
| "1792x1024";
n: number | null; n: number | null;
}) { }) {
const { model, quality, resolution, n } = params; const { model, quality, resolution, n } = params;
const normalizedResolution =
resolution === "auto" ? "1024x1024" : resolution;
const normalizedQuality =
quality === "hd" || quality === "high" ? "hd" : "standard";
const usd = (() => { const usd = (() => {
switch (model) { switch (model) {
case "dall-e-2": case "dall-e-2":
switch (resolution) { switch (normalizedResolution) {
case "512x512": case "512x512":
return 0.018; return 0.018;
case "256x256": case "256x256":
@@ -199,12 +216,20 @@ export function getOpenAIImageCost(params: {
throw new Error("Invalid resolution"); throw new Error("Invalid resolution");
} }
case "dall-e-3": case "dall-e-3":
switch (resolution) { case "gpt-image-1.5":
case "gpt-image-1":
case "gpt-image-1-mini":
// GPT Image models have newer parameter ranges, but we still account
// for them using the existing DALL-E 3-style price buckets so the
// proxy can continue to enforce rough quota/cost limits.
switch (normalizedResolution) {
case "1024x1024": case "1024x1024":
return quality === "standard" ? 0.04 : 0.08; return normalizedQuality === "standard" ? 0.04 : 0.08;
case "1024x1536":
case "1536x1024":
case "1024x1792": case "1024x1792":
case "1792x1024": case "1792x1024":
return quality === "standard" ? 0.08 : 0.12; return normalizedQuality === "standard" ? 0.08 : 0.12;
default: default:
throw new Error("Invalid resolution"); throw new Error("Invalid resolution");
} }
@@ -233,7 +258,10 @@ export function estimateGoogleAITokenCount(
let numTokens = 0; let numTokens = 0;
for (const message of prompt) { for (const message of prompt) {
numTokens += tokensPerMessage; numTokens += tokensPerMessage;
numTokens += encoder.encode(message.parts[0].text).length; const text = message.parts
.map((part) => ("text" in part ? part.text : ""))
.join("\n");
numTokens += encoder.encode(text).length;
} }
numTokens += 3; numTokens += 3;
+2 -1
View File
@@ -55,7 +55,7 @@ type MistralAIChatTokenCountRequest = {
type FlatPromptTokenCountRequest = { type FlatPromptTokenCountRequest = {
prompt: string; prompt: string;
completion?: never; completion?: never;
service: "openai-text" | "anthropic-text" | "google-ai"; service: "openai-text" | "openai-responses" | "anthropic-text" | "google-ai";
}; };
type StringCompletionTokenCountRequest = { type StringCompletionTokenCountRequest = {
@@ -105,6 +105,7 @@ export async function countTokens({
tokenization_duration_ms: getElapsedMs(time), tokenization_duration_ms: getElapsedMs(time),
}; };
case "openai": case "openai":
case "openai-responses":
case "openai-text": case "openai-text":
return { return {
...(await getOpenAITokenCount(prompt ?? completion, req.body.model)), ...(await getOpenAITokenCount(prompt ?? completion, req.body.model)),
+1
View File
@@ -400,6 +400,7 @@ function getModelFamilyForQuotaUsage(
switch (api) { switch (api) {
case "openai": case "openai":
case "openai-responses":
case "openai-text": case "openai-text":
case "openai-image": case "openai-image":
return getOpenAIModelFamily(model); return getOpenAIModelFamily(model);