Update proxy for modern model APIs

This commit is contained in:
Your Name
2026-04-06 03:59:37 -07:00
parent 824adfbbb2
commit 8662eadea7
48 changed files with 1294 additions and 214 deletions
+6 -2
View File
@@ -40,11 +40,11 @@ NODE_ENV=production
# Which model types users are allowed to access.
# The following model families are recognized:
# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-dall-e
# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | gpt5 | o-series | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-gpt5 | azure-o-series | azure-dall-e
# By default, all models are allowed except for 'dall-e' / 'azure-dall-e'.
# To allow DALL-E image generation, uncomment the line below and add 'dall-e' or
# 'azure-dall-e' to the list of allowed model families.
# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o
# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,gpt5,o-series,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o,azure-gpt5,azure-o-series
# Which services can be used to process prompts containing images via multimodal
# models. The following services are recognized:
@@ -115,10 +115,14 @@ NODE_ENV=production
# TOKEN_QUOTA_GPT4=0
# TOKEN_QUOTA_GPT4_32K=0
# TOKEN_QUOTA_GPT4_TURBO=0
# TOKEN_QUOTA_GPT5=0
# TOKEN_QUOTA_O_SERIES=0
# TOKEN_QUOTA_CLAUDE=0
# TOKEN_QUOTA_GEMINI_PRO=0
# TOKEN_QUOTA_AWS_CLAUDE=0
# TOKEN_QUOTA_GCP_CLAUDE=0
# TOKEN_QUOTA_AZURE_GPT5=0
# TOKEN_QUOTA_AZURE_O_SERIES=0
# "Tokens" for image-generation models are counted at a rate of 100000 tokens
# per US$1.00 generated, which is similar to the cost of GPT-4 Turbo.
# DALL-E 3 costs around US$0.10 per image (10000 tokens).
+8 -3
View File
@@ -45,11 +45,16 @@ You can also request Claude Instant, but support for this isn't fully implemente
### Supported model IDs
Users can send these model IDs to the proxy to invoke the corresponding models.
- **Claude**
- `anthropic.claude-v1` (~18k context, claude 1.3 -- EOL 2024-02-28)
- `anthropic.claude-v2` (~100k context, claude 2.0)
- `anthropic.claude-v2:1` (~200k context, claude 2.1)
- **Claude Instant**
- `anthropic.claude-instant-v1` (~100k context, claude instant 1.2)
- `anthropic.claude-haiku-4-5-20251001-v1:0`
- `anthropic.claude-sonnet-4-5-20250929-v1:0`
- `anthropic.claude-opus-4-1-20250805-v1:0`
- `anthropic.claude-3-5-haiku-20241022-v1:0`
- `anthropic.claude-sonnet-4-20250514-v1:0`
- `anthropic.claude-opus-4-20250514-v1:0`
For OpenAI-compatible callers, the proxy will also remap newer Claude-style names such as `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, `claude-opus-4-1-20250805`, and `claude-3-5-haiku-20241022` to the corresponding Bedrock model IDs.
## Note regarding logging
+3 -1
View File
@@ -20,7 +20,9 @@ AZURE_CREDENTIALS=contoso-ml:gpt4-8k:0123456789abcdef0123456789abcdef,northwind-
Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model.
### Supported model IDs
Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. For the most part they work the same with Azure. GPT-3.5 Turbo has an ID of "gpt-35-turbo" because Azure doesn't allow periods in model names, but the proxy should automatically convert this to the correct ID.
Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. The proxy now understands newer Azure-backed OpenAI model families such as GPT-4o, GPT-4.1, GPT-5 / GPT-5.2, o-series reasoning models, and GPT Image deployments including `gpt-image-1.5`, plus the newer Responses API route at `/proxy/azure/openai/v1/responses`.
GPT-3.5 Turbo still has an Azure-specific ID of `gpt-35-turbo` because Azure doesn't allow periods in model names, but the proxy will automatically normalize that for you.
As noted above, you can only use model IDs for which a deployment has been created and added to the proxy.
+8 -4
View File
@@ -29,7 +29,11 @@ GCP_CREDENTIALS=my-first-project:xxx@yyy.com:us-east5:-----BEGIN PRIVATE KEY----
## Supported model IDs
Users can send these model IDs to the proxy to invoke the corresponding models.
- **Claude**
- `claude-3-haiku@20240307`
- `claude-3-sonnet@20240229`
- `claude-3-opus@20240229`
- `claude-3-5-sonnet@20240620`
- `claude-haiku-4-5@20251001`
- `claude-sonnet-4-5@20250929`
- `claude-opus-4-1@20250805`
- `claude-3-5-haiku@20241022`
- `claude-sonnet-4@20250514`
- `claude-opus-4@20250514`
For OpenAI-compatible callers, the proxy will also remap Claude-style aliases like `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, and `claude-opus-4-1-20250805` to the corresponding Vertex AI model IDs.
+51 -7
View File
@@ -7,8 +7,8 @@ Authorization: Bearer {{oai-key-1}}
Content-Type: application/json
{
"model": "gpt-3.5-turbo",
"max_tokens": 30,
"model": "gpt-4.1-mini",
"max_completion_tokens": 30,
"stream": false,
"messages": [
{
@@ -18,6 +18,19 @@ Content-Type: application/json
]
}
###
# @name OpenAI -- Responses API
POST https://api.openai.com/v1/responses
Authorization: Bearer {{oai-key-1}}
Content-Type: application/json
{
"model": "gpt-5.2",
"reasoning": { "effort": "medium" },
"max_output_tokens": 80,
"input": "This is a test prompt."
}
###
# @name OpenAI -- Text Completions
POST https://api.openai.com/v1/completions
@@ -38,7 +51,7 @@ Authorization: Bearer {{oai-key-1}}
Content-Type: application/json
{
"model": "text-embedding-ada-002",
"model": "text-embedding-3-small",
"input": "This is a test embedding input."
}
@@ -81,8 +94,8 @@ Authorization: Bearer {{proxy-key}}
Content-Type: application/json
{
"model": "gpt-4-1106-preview",
"max_tokens": 20,
"model": "gpt-4.1",
"max_completion_tokens": 20,
"stream": true,
"temperature": 1,
"seed": 123,
@@ -94,6 +107,20 @@ Content-Type: application/json
]
}
###
# @name Proxy / OpenAI -- Native Responses API
POST {{proxy-host}}/proxy/openai/v1/responses
Authorization: Bearer {{proxy-key}}
Content-Type: application/json
{
"model": "gpt-5.2",
"reasoning": { "effort": "medium" },
"max_output_tokens": 64,
"stream": false,
"input": "Summarize the purpose of this reverse proxy in one sentence."
}
###
# @name Proxy / OpenAI -- Native Text Completions
POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions
@@ -142,7 +169,7 @@ Authorization: Bearer {{proxy-key}}
Content-Type: application/json
{
"model": "text-embedding-ada-002",
"model": "text-embedding-3-small",
"input": "This is a test embedding input."
}
@@ -185,7 +212,7 @@ Authorization: Bearer {{proxy-key}}
Content-Type: application/json
{
"model": "gpt-3.5-turbo",
"model": "gpt-5.2",
"max_tokens": 20,
"stream": false,
"temperature": 0,
@@ -197,6 +224,23 @@ Content-Type: application/json
]
}
###
# @name Proxy / Google AI -- OpenAI-Compat Image Generation
POST {{proxy-host}}/proxy/google-ai/v1/chat/completions
Authorization: Bearer {{proxy-key}}
Content-Type: application/json
{
"model": "gemini-2.5-flash-image",
"stream": false,
"messages": [
{
"role": "user",
"content": "Generate a flat vector-style illustration of a red fox reading a newspaper at a cafe table."
}
]
}
###
# @name Proxy / AWS Claude -- Native Completion
POST {{proxy-host}}/proxy/aws/claude/v1/complete
+4
View File
@@ -434,6 +434,8 @@ export const config: Config = {
"gpt4-32k",
"gpt4-turbo",
"gpt4o",
"gpt5",
"o-series",
"claude",
"claude-opus",
"gemini-pro",
@@ -450,6 +452,8 @@ export const config: Config = {
"azure-gpt4-32k",
"azure-gpt4-turbo",
"azure-gpt4o",
"azure-gpt5",
"azure-o-series",
]),
rejectPhrases: parseCsv(getEnvWithDefault("REJECT_PHRASES", "")),
rejectMessage: getEnvWithDefault(
+4
View File
@@ -17,6 +17,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
"gpt4-32k": "GPT-4 32k",
"gpt4-turbo": "GPT-4 Turbo",
gpt4o: "GPT-4o",
gpt5: "GPT-5",
"o-series": "o-Series",
"dall-e": "DALL-E",
claude: "Claude (Sonnet)",
"claude-opus": "Claude (Opus)",
@@ -34,6 +36,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
"azure-gpt4-32k": "Azure GPT-4 32k",
"azure-gpt4-turbo": "Azure GPT-4 Turbo",
"azure-gpt4o": "Azure GPT-4o",
"azure-gpt5": "Azure GPT-5",
"azure-o-series": "Azure o-Series",
"azure-dall-e": "Azure DALL-E",
};
+28 -21
View File
@@ -29,24 +29,18 @@ const getModelsResponse = () => {
if (!config.anthropicKey) return { object: "list", data: [] };
const claudeVariants = [
"claude-v1",
"claude-v1-100k",
"claude-instant-v1",
"claude-instant-v1-100k",
"claude-v1.3",
"claude-v1.3-100k",
"claude-v1.2",
"claude-v1.0",
"claude-instant-v1.1",
"claude-instant-v1.1-100k",
"claude-instant-v1.0",
"claude-2",
"claude-2.0",
"claude-2.1",
"claude-3-haiku-20240307",
"claude-3-opus-20240229",
"claude-3-sonnet-20240229",
"claude-3-5-sonnet-20240620"
"claude-sonnet-4-5",
"claude-sonnet-4-5-20250929",
"claude-haiku-4-5",
"claude-haiku-4-5-20251001",
"claude-opus-4-1",
"claude-opus-4-1-20250805",
"claude-opus-4-20250514",
"claude-sonnet-4-20250514",
"claude-3-5-haiku-20241022",
"claude-3-5-haiku-latest",
];
const models = claudeVariants.map((id) => ({
@@ -230,7 +224,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware({
* (claude-3 based models do not support the old text completion endpoint).
*/
const preprocessAnthropicTextRequest: RequestHandler = (req, res, next) => {
if (req.body.model?.startsWith("claude-3")) {
if (requiresAnthropicMessagesApi(req.body.model)) {
textToChatPreprocessor(req, res, next);
} else {
nativeTextPreprocessor(req, res, next);
@@ -255,7 +249,7 @@ const oaiToChatPreprocessor = createPreprocessorMiddleware({
*/
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
maybeReassignModel(req);
if (req.body.model?.includes("claude-3")) {
if (requiresAnthropicMessagesApi(req.body.model)) {
oaiToChatPreprocessor(req, res, next);
} else {
oaiToTextPreprocessor(req, res, next);
@@ -315,7 +309,8 @@ function handleAnthropicTextCompatRequest(
const type = req.params.type;
const action = req.params.action;
const alreadyInChatFormat = Boolean(req.body.messages);
const compatModel = `claude-3-${type}-20240229`;
const compatModel =
type === "opus" ? "claude-opus-4-1-20250805" : "claude-sonnet-4-5-20250929";
req.log.info(
{ type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
"Handling Anthropic compatibility request"
@@ -349,8 +344,20 @@ function handleAnthropicTextCompatRequest(
*/
function maybeReassignModel(req: Request) {
const model = req.body.model;
if (!model.startsWith("gpt-")) return;
req.body.model = "claude-3-sonnet-20240229";
const lower = String(model).toLowerCase();
if (
lower.startsWith("gpt-") ||
lower.startsWith("o1") ||
lower.startsWith("o3") ||
lower.startsWith("o4") ||
lower === "computer-use-preview"
) {
req.body.model = "claude-sonnet-4-5-20250929";
}
}
function requiresAnthropicMessagesApi(model?: string) {
return /^claude-(?:3|sonnet|opus)/.test(model ?? "");
}
export const anthropic = anthropicRouter;
+80 -13
View File
@@ -20,6 +20,12 @@ import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatRe
import { sendErrorToClient } from "./middleware/response/error-generator";
const LATEST_AWS_V2_MINOR_VERSION = "1";
const AWS_CLAUDE_SONNET_45 = "anthropic.claude-sonnet-4-5-20250929-v1:0";
const AWS_CLAUDE_HAIKU_45 = "anthropic.claude-haiku-4-5-20251001-v1:0";
const AWS_CLAUDE_OPUS_41 = "anthropic.claude-opus-4-1-20250805-v1:0";
const AWS_CLAUDE_SONNET_4 = "anthropic.claude-sonnet-4-20250514-v1:0";
const AWS_CLAUDE_OPUS_4 = "anthropic.claude-opus-4-20250514-v1:0";
const AWS_CLAUDE_35_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0";
let modelsCache: any = null;
let modelsCacheTime = 0;
@@ -35,10 +41,12 @@ const getModelsResponse = () => {
const variants = [
"anthropic.claude-v2",
"anthropic.claude-v2:1",
"anthropic.claude-3-haiku-20240307-v1:0",
"anthropic.claude-3-sonnet-20240229-v1:0",
"anthropic.claude-3-5-sonnet-20240620-v1:0",
"anthropic.claude-3-opus-20240229-v1:0",
AWS_CLAUDE_HAIKU_45,
AWS_CLAUDE_SONNET_45,
AWS_CLAUDE_OPUS_41,
AWS_CLAUDE_35_HAIKU,
AWS_CLAUDE_SONNET_4,
AWS_CLAUDE_OPUS_4,
];
const models = variants.map((id) => ({
@@ -164,7 +172,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware(
* (claude-3 based models do not support the old text completion endpoint).
*/
const preprocessAwsTextRequest: RequestHandler = (req, res, next) => {
if (req.body.model?.includes("claude-3")) {
if (requiresAnthropicMessagesApi(req.body.model)) {
textToChatPreprocessor(req, res, next);
} else {
nativeTextPreprocessor(req, res, next);
@@ -186,7 +194,7 @@ const oaiToAwsChatPreprocessor = createPreprocessorMiddleware(
* or the new Claude chat completion endpoint, based on the requested model.
*/
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
if (req.body.model?.includes("claude-3")) {
if (requiresAnthropicMessagesApi(req.body.model)) {
oaiToAwsChatPreprocessor(req, res, next);
} else {
oaiToAwsTextPreprocessor(req, res, next);
@@ -241,12 +249,65 @@ awsRouter.post(
*/
function maybeReassignModel(req: Request) {
const model = req.body.model;
const lower = String(model).toLowerCase();
// If it looks like an AWS model, use it as-is
if (model.includes("anthropic.claude")) {
return;
}
if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
req.body.model = AWS_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
req.body.model = AWS_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4")) {
req.body.model = AWS_CLAUDE_OPUS_4;
return;
}
if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
req.body.model = AWS_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
req.body.model = AWS_CLAUDE_SONNET_45;
return;
}
if (lower.includes("sonnet-4")) {
req.body.model = AWS_CLAUDE_SONNET_4;
return;
}
if (lower.includes("3-5") && lower.includes("haiku")) {
req.body.model = AWS_CLAUDE_35_HAIKU;
return;
}
if (lower.includes("opus")) {
req.body.model = AWS_CLAUDE_OPUS_41;
return;
}
if (lower.includes("haiku")) {
req.body.model = AWS_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet")) {
req.body.model = AWS_CLAUDE_SONNET_45;
return;
}
if (
lower.startsWith("gpt-") ||
lower.startsWith("o1") ||
lower.startsWith("o3") ||
lower.startsWith("o4") ||
lower === "computer-use-preview"
) {
req.body.model = AWS_CLAUDE_SONNET_45;
return;
}
// Anthropic model names can look like:
// - claude-v1
// - claude-2.1
@@ -282,20 +343,22 @@ function maybeReassignModel(req: Request) {
case "3":
case "3.0":
if (name.includes("opus")) {
req.body.model = "anthropic.claude-3-opus-20240229-v1:0";
req.body.model = AWS_CLAUDE_OPUS_41;
} else if (name.includes("haiku")) {
req.body.model = "anthropic.claude-3-haiku-20240307-v1:0";
req.body.model = AWS_CLAUDE_HAIKU_45;
} else {
req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
req.body.model = AWS_CLAUDE_SONNET_45;
}
return;
case "3.5":
req.body.model = "anthropic.claude-3-5-sonnet-20240620-v1:0";
req.body.model = name.includes("haiku")
? AWS_CLAUDE_35_HAIKU
: AWS_CLAUDE_SONNET_45;
return;
}
// Fallback to Claude 2.1
req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
// Fallback to Claude Sonnet 4
req.body.model = AWS_CLAUDE_SONNET_45;
return;
}
@@ -306,7 +369,7 @@ export function handleCompatibilityRequest(
) {
const action = req.params.action;
const alreadyInChatFormat = Boolean(req.body.messages);
const compatModel = "anthropic.claude-3-5-sonnet-20240620-v1:0";
const compatModel = AWS_CLAUDE_SONNET_4;
req.log.info(
{ inputModel: req.body.model, compatModel, alreadyInChatFormat },
"Handling AWS compatibility request"
@@ -335,3 +398,7 @@ export function handleCompatibilityRequest(
}
export const aws = awsRouter;
function requiresAnthropicMessagesApi(model?: string) {
return /claude-(?:3|sonnet|opus)/.test(model ?? "");
}
+26 -2
View File
@@ -32,16 +32,25 @@ function getModelsResponse() {
}
let available = new Set<AzureOpenAIModelFamily>();
const availableModelIds = new Set<string>();
for (const key of keyPool.list()) {
if (key.isDisabled || key.service !== "azure") continue;
const azureKey = key as any;
key.modelFamilies.forEach((family) =>
available.add(family as AzureOpenAIModelFamily)
);
azureKey.modelIds?.forEach((id: string) => availableModelIds.add(id));
}
const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
available = new Set([...available].filter((x) => allowed.has(x)));
const models = KNOWN_OPENAI_MODELS.map((id) => ({
const usingExactModelIds = availableModelIds.size > 0;
const sourceModels = usingExactModelIds
? [...new Set([...KNOWN_OPENAI_MODELS, ...availableModelIds])]
: KNOWN_OPENAI_MODELS;
const models = sourceModels.map((id) => ({
id,
object: "model",
created: new Date().getTime(),
@@ -58,7 +67,12 @@ function getModelsResponse() {
],
root: id,
parent: null,
})).filter((model) => available.has(getAzureOpenAIModelFamily(model.id)));
})).filter((model) => {
if (usingExactModelIds) {
return availableModelIds.has(model.id);
}
return available.has(getAzureOpenAIModelFamily(model.id));
});
modelsCache = { object: "list", data: models };
modelsCacheTime = new Date().getTime();
@@ -115,6 +129,16 @@ azureOpenAIRouter.post(
}),
azureOpenAIProxy
);
azureOpenAIRouter.post(
"/v1/responses",
ipLimiter,
createPreprocessorMiddleware({
inApi: "openai-responses",
outApi: "openai-responses",
service: "azure",
}),
azureOpenAIProxy
);
azureOpenAIRouter.post(
"/v1/images/generations",
ipLimiter,
+74 -12
View File
@@ -19,7 +19,12 @@ import {
import { transformAnthropicChatResponseToOpenAI } from "./anthropic";
import { sendErrorToClient } from "./middleware/response/error-generator";
const LATEST_GCP_SONNET_MINOR_VERSION = "20240229";
const GCP_CLAUDE_SONNET_45 = "claude-sonnet-4-5@20250929";
const GCP_CLAUDE_HAIKU_45 = "claude-haiku-4-5@20251001";
const GCP_CLAUDE_OPUS_41 = "claude-opus-4-1@20250805";
const GCP_CLAUDE_SONNET_4 = "claude-sonnet-4@20250514";
const GCP_CLAUDE_OPUS_4 = "claude-opus-4@20250514";
const GCP_CLAUDE_35_HAIKU = "claude-3-5-haiku@20241022";
let modelsCache: any = null;
let modelsCacheTime = 0;
@@ -33,10 +38,12 @@ const getModelsResponse = () => {
// https://docs.anthropic.com/en/docs/about-claude/models
const variants = [
"claude-3-haiku@20240307",
"claude-3-sonnet@20240229",
"claude-3-opus@20240229",
"claude-3-5-sonnet@20240620",
GCP_CLAUDE_HAIKU_45,
GCP_CLAUDE_SONNET_45,
GCP_CLAUDE_OPUS_41,
GCP_CLAUDE_35_HAIKU,
GCP_CLAUDE_SONNET_4,
GCP_CLAUDE_OPUS_4,
];
const models = variants.map((id) => ({
@@ -147,6 +154,7 @@ gcpRouter.post(
*/
function maybeReassignModel(req: Request) {
const model = req.body.model;
const lower = String(model).toLowerCase();
// If it looks like an GCP model, use it as-is
// if (model.includes("anthropic.claude")) {
@@ -154,6 +162,58 @@ function maybeReassignModel(req: Request) {
return;
}
if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4")) {
req.body.model = GCP_CLAUDE_OPUS_4;
return;
}
if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
req.body.model = GCP_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
if (lower.includes("sonnet-4")) {
req.body.model = GCP_CLAUDE_SONNET_4;
return;
}
if (lower.includes("3-5") && lower.includes("haiku")) {
req.body.model = GCP_CLAUDE_35_HAIKU;
return;
}
if (lower.includes("opus")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("haiku")) {
req.body.model = GCP_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet")) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
if (
lower.startsWith("gpt-") ||
lower.startsWith("o1") ||
lower.startsWith("o3") ||
lower.startsWith("o4") ||
lower === "computer-use-preview"
) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
// Anthropic model names can look like:
// - claude-v1
// - claude-2.1
@@ -165,7 +225,7 @@ function maybeReassignModel(req: Request) {
// If there's no match, fallback to Claude3 Sonnet as it is most likely to be
// available on GCP.
if (!match) {
req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`;
req.body.model = GCP_CLAUDE_SONNET_4;
return;
}
@@ -176,20 +236,22 @@ function maybeReassignModel(req: Request) {
case "3":
case "3.0":
if (name.includes("opus")) {
req.body.model = "claude-3-opus@20240229";
req.body.model = GCP_CLAUDE_OPUS_41;
} else if (name.includes("haiku")) {
req.body.model = "claude-3-haiku@20240307";
req.body.model = GCP_CLAUDE_HAIKU_45;
} else {
req.body.model = "claude-3-sonnet@20240229";
req.body.model = GCP_CLAUDE_SONNET_45;
}
return;
case "3.5":
req.body.model = "claude-3-5-sonnet@20240620";
req.body.model = name.includes("haiku")
? GCP_CLAUDE_35_HAIKU
: GCP_CLAUDE_SONNET_45;
return;
}
// Fallback to Claude3 Sonnet
req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`;
// Fallback to Claude Sonnet 4
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
+23 -7
View File
@@ -16,6 +16,11 @@ import {
ProxyResHandlerWithBody,
} from "./middleware/response";
import { addGoogleAIKey } from "./middleware/request/preprocessors/add-google-ai-key";
import { BadRequestError } from "../shared/errors";
import {
flattenGoogleAIContentParts,
isGoogleAIImageModel,
} from "../shared/api-schemas";
let modelsCache: any = null;
let modelsCacheTime = 0;
@@ -31,10 +36,15 @@ const getModelsResponse = () => {
if (!config.googleAIKey) return { object: "list", data: [] };
const googleAIVariants = [
"gemini-pro",
"gemini-1.0-pro",
"gemini-2.5-pro",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
"gemini-2.5-flash-image",
"gemini-3-pro-image-preview",
"gemini-2.0-flash-preview-image-generation",
"gemini-2.0-flash",
"gemini-1.5-pro",
"gemini-1.5-pro-latest",
"gemini-1.5-flash",
];
const models = googleAIVariants.map((id) => ({
@@ -83,7 +93,8 @@ function transformGoogleAIResponse(
): Record<string, any> {
const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
const parts = resBody.candidates[0].content?.parts ?? [{ text: "" }];
const content = parts[0].text.replace(/^(.{0,50}?): /, () => "");
const content = flattenGoogleAIContentParts(parts)
.replace(/^(.{0,50}?): /, () => "");
return {
id: "goo-" + v4(),
object: "chat.completion",
@@ -136,14 +147,19 @@ googleAIRouter.post(
googleAIProxy
);
/** Replaces requests for non-Google AI models with gemini-pro-1.5-latest. */
/** Replaces requests for non-Google AI models with Gemini 2.5 Flash. */
function maybeReassignModel(req: Request) {
const requested = req.body.model;
if (requested.includes("gemini")) {
if (req.body.stream && isGoogleAIImageModel(requested)) {
throw new BadRequestError(
"Streaming Gemini image-generation models is not currently supported by this proxy. Retry without `stream: true`."
);
}
return;
}
req.log.info({ requested }, "Reassigning model to gemini-pro-1.5-latest");
req.body.model = "gemini-pro-1.5-latest";
req.log.info({ requested }, "Reassigning model to gemini-2.5-flash");
req.body.model = "gemini-2.5-flash";
}
export const googleAI = googleAIRouter;
+11 -1
View File
@@ -5,10 +5,15 @@ import { ZodError } from "zod";
import { generateErrorMessage } from "zod-error";
import { HttpError } from "../../shared/errors";
import { assertNever } from "../../shared/utils";
import {
flattenGoogleAIContentParts,
flattenOpenAIResponsesOutput,
} from "../../shared/api-schemas";
import { QuotaExceededError } from "./request/preprocessors/apply-quota-limits";
import { sendErrorToClient } from "./response/error-generator";
const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
const OPENAI_RESPONSES_ENDPOINT = "/v1/responses";
const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
@@ -22,6 +27,7 @@ export function isTextGenerationRequest(req: Request) {
req.method === "POST" &&
[
OPENAI_CHAT_COMPLETION_ENDPOINT,
OPENAI_RESPONSES_ENDPOINT,
OPENAI_TEXT_COMPLETION_ENDPOINT,
ANTHROPIC_COMPLETION_ENDPOINT,
ANTHROPIC_MESSAGES_ENDPOINT,
@@ -224,6 +230,8 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
// Can be null if the model wants to invoke tools rather than return a
// completion.
return body.choices[0].message.content || "";
case "openai-responses":
return flattenOpenAIResponsesOutput(body);
case "openai-text":
return body.choices[0].text;
case "anthropic-chat":
@@ -252,7 +260,7 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
if ("choices" in body) {
return body.choices[0].message.content;
}
return body.candidates[0].content.parts[0].text;
return flattenGoogleAIContentParts(body.candidates?.[0]?.content?.parts);
case "openai-image":
return body.data?.map((item: any) => item.url).join("\n");
default:
@@ -267,6 +275,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
case "openai-text":
case "mistral-ai":
return body.model;
case "openai-responses":
return body.model || req.body.model;
case "openai-image":
return req.body.model;
case "anthropic-chat":
@@ -47,6 +47,7 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
assignedKey = keyPool.get("dall-e-3", service);
break;
case "openai":
case "openai-responses":
case "google-ai":
case "mistral-ai":
throw new Error(
@@ -109,9 +110,10 @@ export const addKeyForEmbeddingsRequest: HPMRequestCallback = (
throw new Error("Embeddings requests must be from OpenAI");
}
req.body = { input: req.body.input, model: "text-embedding-ada-002" };
const model = req.body.model || "text-embedding-3-small";
req.body = { input: req.body.input, model };
const key = keyPool.get("text-embedding-ada-002", "openai") as OpenAIKey;
const key = keyPool.get(model, "openai") as OpenAIKey;
req.key = key;
req.log.info(
@@ -6,7 +6,7 @@ import {
import { RequestPreprocessor } from "../index";
export const addAzureKey: RequestPreprocessor = (req) => {
const validAPIs: APIFormat[] = ["openai", "openai-image"];
const validAPIs: APIFormat[] = ["openai", "openai-responses", "openai-image"];
const apisValid = [req.outboundApi, req.inboundApi].every((api) =>
validAPIs.includes(api)
);
@@ -50,6 +50,23 @@ export const addAzureKey: RequestPreprocessor = (req) => {
const cred = req.key as AzureOpenAIKey;
const { resourceName, deploymentId, apiKey } = getCredentialsFromKey(cred);
if (req.outboundApi === "openai-responses") {
req.body.model = deploymentId;
req.signedRequest = {
method: "POST",
protocol: "https:",
hostname: `${resourceName}.openai.azure.com`,
path: `/openai/v1/responses?api-version=preview`,
headers: {
["host"]: `${resourceName}.openai.azure.com`,
["content-type"]: "application/json",
["api-key"]: apiKey,
},
body: JSON.stringify(req.body),
};
return;
}
const operation =
req.outboundApi === "openai" ? "/chat/completions" : "/images/generations";
const apiVersion =
@@ -6,6 +6,7 @@ import {
GoogleAIChatMessage,
MistralAIChatMessage,
OpenAIChatMessage,
flattenOpenAIResponsesInput,
} from "../../../../shared/api-schemas";
/**
@@ -18,11 +19,23 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
switch (service) {
case "openai": {
req.outputTokens = req.body.max_tokens;
req.outputTokens =
req.body.max_completion_tokens ?? req.body.max_tokens ?? 0;
const prompt: OpenAIChatMessage[] = req.body.messages;
result = await countTokens({ req, prompt, service });
break;
}
case "openai-responses": {
req.outputTokens = req.body.max_output_tokens ?? 0;
const prompt = [
flattenOpenAIResponsesInput(req.body.instructions),
flattenOpenAIResponsesInput(req.body.input),
]
.filter(Boolean)
.join("\n\n");
result = await countTokens({ req, prompt, service });
break;
}
case "openai-text": {
req.outputTokens = req.body.max_tokens;
const prompt: string = req.body.prompt;
@@ -4,8 +4,10 @@ import { assertNever } from "../../../../shared/utils";
import { RequestPreprocessor } from "../index";
import { BadRequestError } from "../../../../shared/errors";
import {
GoogleAIChatMessage,
MistralAIChatMessage,
OpenAIChatMessage,
flattenOpenAIResponsesInput,
flattenAnthropicMessages,
} from "../../../../shared/api-schemas";
@@ -72,11 +74,27 @@ function getPromptFromRequest(req: Request) {
return `${msg.role}: ${text}`;
})
.join("\n\n");
case "openai-responses":
return [
flattenOpenAIResponsesInput(body.instructions),
flattenOpenAIResponsesInput(body.input),
]
.filter(Boolean)
.join("\n\n");
case "openai-text":
case "openai-image":
return body.prompt;
case "google-ai":
return body.prompt.text;
return body.contents
.map(({ parts, role }: GoogleAIChatMessage) => {
const text = parts
.map((part: any) =>
"text" in part ? part.text : "[image omitted]"
)
.join("\n");
return `${role}: ${text}`;
})
.join("\n\n");
default:
assertNever(service);
}
@@ -6,8 +6,8 @@ import { RequestPreprocessor } from "../index";
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
const GOOGLE_AI_MAX_CONTEXT = 32000;
const MISTRAL_AI_MAX_CONTENT = 32768;
const GOOGLE_AI_MAX_CONTEXT = 1048576;
const MISTRAL_AI_MAX_CONTENT = 256000;
/**
* Assigns `req.promptTokens` and `req.outputTokens` based on the request body
@@ -26,6 +26,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
let proxyMax: number;
switch (req.outboundApi) {
case "openai":
case "openai-responses":
case "openai-text":
proxyMax = OPENAI_MAX_CONTEXT;
break;
@@ -54,6 +55,12 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
let modelMax: number;
if (model.match(/gpt-3.5-turbo-16k/)) {
modelMax = 16384;
} else if (model.match(/^gpt-5(\.|-|\b)/)) {
modelMax = 1050000;
} else if (model.match(/^o\d/)) {
modelMax = 200000;
} else if (model.match(/^gpt-4\.1/)) {
modelMax = 1047576;
} else if (model.match(/^gpt-4o/)) {
modelMax = 128000;
} else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
@@ -80,12 +87,27 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
modelMax = 200000;
} else if (model.match(/^claude-3/)) {
modelMax = 200000;
} else if (model.match(/^claude-(opus|sonnet|haiku)-4/)) {
modelMax = 200000;
} else if (model.match(/^gemini-\d{3}$/)) {
modelMax = GOOGLE_AI_MAX_CONTEXT;
} else if (model.match(/^gemini-(2\.5|2\.0)/)) {
modelMax = 1048576;
} else if (model.match(/^mistral-(tiny|small|medium)$/)) {
modelMax = MISTRAL_AI_MAX_CONTENT;
} else if (
model.match(
/^(mistral|ministral|magistral|pixtral|codestral|devstral|voxtral)-/
)
) {
modelMax = MISTRAL_AI_MAX_CONTENT;
} else if (model.match(/^anthropic\.claude-3/)) {
modelMax = 200000;
} else if (
model.match(/^anthropic\.claude-(opus|sonnet|haiku)-4/) ||
model.match(/^claude-(opus|sonnet|haiku)-4@/)
) {
modelMax = 200000;
} else if (model.match(/^anthropic\.claude-v2:\d/)) {
modelMax = 200000;
} else if (model.match(/^anthropic\.claude/)) {
@@ -121,8 +143,8 @@ function assertRequestHasTokenCounts(
req: Request
): asserts req is Request & { promptTokens: number; outputTokens: number } {
z.object({
promptTokens: z.number().int().min(1),
outputTokens: z.number().int().min(1),
promptTokens: z.number().int().min(0),
outputTokens: z.number().int().min(0),
})
.nonstrict()
.parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
@@ -3,6 +3,7 @@ import { assertNever } from "../../../../shared/utils";
import { RequestPreprocessor } from "../index";
import { containsImageContent as containsImageContentOpenAI } from "../../../../shared/api-schemas/openai";
import { containsImageContent as containsImageContentAnthropic } from "../../../../shared/api-schemas/anthropic";
import { containsOpenAIResponsesImageInput } from "../../../../shared/api-schemas";
import { ForbiddenError } from "../../../../shared/errors";
/**
@@ -22,11 +23,20 @@ export const validateVision: RequestPreprocessor = async (req) => {
case "openai":
hasImage = containsImageContentOpenAI(req.body.messages);
break;
case "openai-responses":
hasImage =
containsOpenAIResponsesImageInput(req.body.instructions) ||
containsOpenAIResponsesImageInput(req.body.input);
break;
case "anthropic-chat":
hasImage = containsImageContentAnthropic(req.body.messages);
break;
case "anthropic-text":
case "google-ai":
hasImage = req.body.contents?.some((message: { parts: any[] }) =>
message.parts?.some((part) => "inline_data" in part)
);
break;
case "anthropic-text":
case "mistral-ai":
case "openai-image":
case "openai-text":
@@ -72,7 +72,15 @@ type ErrorGeneratorOptions = {
};
export function tryInferFormat(body: any): APIFormat | "unknown" {
if (typeof body !== "object" || !body.model) {
if (typeof body !== "object") {
return "unknown";
}
if (body.object === "response" || Array.isArray(body.output)) {
return "openai-responses";
}
if (!body.model) {
return "unknown";
}
@@ -158,7 +166,30 @@ export function buildSpoofedCompletion({
switch (format) {
case "openai":
case "openai-responses":
case "mistral-ai":
if (format === "openai-responses") {
return {
id: "error-" + id,
object: "response",
created_at: Math.floor(Date.now() / 1000),
model,
status: "completed",
error: null,
incomplete_details: null,
output_text: content,
output: [
{
id: "msg-error-" + id,
type: "message",
role: "assistant",
status: "completed",
content: [{ type: "output_text", text: content, annotations: [] }],
},
],
usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
};
}
return {
id: "error-" + id,
object: "chat.completion",
@@ -248,7 +279,23 @@ export function buildSpoofedSSE({
switch (format) {
case "openai":
case "openai-responses":
case "mistral-ai":
if (format === "openai-responses") {
return (
`data: ${JSON.stringify({
type: "response.completed",
response: buildSpoofedCompletion({
format,
title,
message,
obj,
reqId,
model,
}),
})}\n\n`
);
}
event = {
id: "chatcmpl-" + id,
object: "chat.completion.chunk",
+27 -1
View File
@@ -11,6 +11,7 @@ import { ProxyResHandlerWithBody } from ".";
import { assertNever } from "../../../shared/utils";
import {
AnthropicChatMessage,
flattenOpenAIResponsesInput,
flattenAnthropicMessages, GoogleAIChatMessage,
MistralAIChatMessage,
OpenAIChatMessage,
@@ -62,6 +63,7 @@ const getPromptForRequest = (
):
| string
| OpenAIChatMessage[]
| { instructions?: unknown; input?: unknown }
| { contents: GoogleAIChatMessage[] }
| { system: string; messages: AnthropicChatMessage[] }
| MistralAIChatMessage[]
@@ -73,6 +75,11 @@ const getPromptForRequest = (
case "openai":
case "mistral-ai":
return req.body.messages;
case "openai-responses":
return {
instructions: req.body.instructions,
input: req.body.input,
};
case "anthropic-chat":
return { system: req.body.system, messages: req.body.messages };
case "openai-text":
@@ -99,6 +106,7 @@ const flattenMessages = (
| string
| OaiImageResult
| OpenAIChatMessage[]
| { instructions?: unknown; input?: unknown }
| { contents: GoogleAIChatMessage[] }
| { system: string; messages: AnthropicChatMessage[] }
| MistralAIChatMessage[]
@@ -114,12 +122,20 @@ const flattenMessages = (
return val.contents
.map(({ parts, role }) => {
const text = parts
.map((p) => p.text)
.map((p: any) => ("text" in p ? p.text : "(( Attached Image ))"))
.join("\n");
return `${role}: ${text}`;
})
.join("\n");
}
if (isOpenAIResponsesPrompt(val)) {
return [
flattenOpenAIResponsesInput(val.instructions),
flattenOpenAIResponsesInput(val.input),
]
.filter(Boolean)
.join("\n\n");
}
if (Array.isArray(val)) {
return val
.map(({ content, role }) => {
@@ -140,6 +156,16 @@ const flattenMessages = (
return val.prompt.trim();
};
function isOpenAIResponsesPrompt(
val: unknown
): val is { instructions?: unknown; input?: unknown } {
return (
typeof val === "object" &&
val !== null &&
("instructions" in val || "input" in val)
);
}
function isGoogleAIChatPrompt(
val: unknown
): val is { contents: GoogleAIChatMessage[] } {
@@ -8,6 +8,7 @@ import {
mergeEventsForOpenAIText,
AnthropicV2StreamEvent,
OpenAIChatCompletionStreamEvent,
OpenAIResponsesStreamEvent,
} from "./index";
/**
@@ -17,13 +18,36 @@ import {
export class EventAggregator {
private readonly format: APIFormat;
private readonly events: OpenAIChatCompletionStreamEvent[];
private responseBody: Record<string, any> | null;
private responseEventCount: number;
private responseOutputText: string;
constructor({ format }: { format: APIFormat }) {
this.events = [];
this.format = format;
this.responseBody = null;
this.responseEventCount = 0;
this.responseOutputText = "";
}
addEvent(
event:
| OpenAIChatCompletionStreamEvent
| AnthropicV2StreamEvent
| OpenAIResponsesStreamEvent
) {
if (eventIsOpenAIResponsesEvent(event)) {
this.responseEventCount++;
if (event.response && typeof event.response === "object") {
this.responseBody = event.response;
}
if (event.type === "response.output_text.delta") {
this.responseOutputText += event.delta || event.text || "";
}
return;
}
addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) {
if (eventIsOpenAIEvent(event)) {
this.events.push(event);
} else {
@@ -52,8 +76,15 @@ export class EventAggregator {
getFinalResponse() {
switch (this.format) {
case "openai":
case "openai-responses":
case "google-ai":
case "mistral-ai":
if (this.format === "openai-responses") {
if (this.responseBody) {
return this.responseBody;
}
return { output_text: this.responseOutputText };
}
return mergeEventsForOpenAIChat(this.events);
case "openai-text":
return mergeEventsForOpenAIText(this.events);
@@ -69,7 +100,7 @@ export class EventAggregator {
}
hasEvents() {
return this.events.length > 0;
return this.events.length > 0 || this.responseEventCount > 0;
}
}
@@ -78,3 +109,9 @@ function eventIsOpenAIEvent(
): event is OpenAIChatCompletionStreamEvent {
return event?.object === "chat.completion.chunk";
}
function eventIsOpenAIResponsesEvent(
event: any
): event is OpenAIResponsesStreamEvent {
return typeof event?.type === "string" && event.type.startsWith("response.");
}
@@ -26,6 +26,14 @@ export type OpenAIChatCompletionStreamEvent = {
}[];
};
export type OpenAIResponsesStreamEvent = {
type: string;
response?: Record<string, any>;
delta?: string;
text?: string;
[key: string]: any;
};
export type StreamingCompletionTransformer<
T = OpenAIChatCompletionStreamEvent,
S = any,
@@ -42,6 +50,7 @@ export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-ant
export { anthropicChatToOpenAI } from "./transformers/anthropic-chat-to-openai";
export { googleAIToOpenAI } from "./transformers/google-ai-to-openai";
export { passthroughToOpenAI } from "./transformers/passthrough-to-openai";
export { passthroughToOpenAIResponses } from "./transformers/passthrough-to-openai-responses";
export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text";
@@ -10,8 +10,10 @@ import {
anthropicV2ToOpenAI,
googleAIToOpenAI,
OpenAIChatCompletionStreamEvent,
OpenAIResponsesStreamEvent,
openAITextToOpenAIChat,
passthroughToOpenAI,
passthroughToOpenAIResponses,
StreamingCompletionTransformer,
} from "./index";
@@ -35,7 +37,9 @@ export class SSEMessageTransformer extends Transform {
private readonly inputFormat: APIFormat;
private readonly transformFn: StreamingCompletionTransformer<
// TODO: Refactor transformers to not assume only OpenAI events as output
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
| OpenAIChatCompletionStreamEvent
| AnthropicV2StreamEvent
| OpenAIResponsesStreamEvent
>;
private readonly log;
private readonly fallbackId: string;
@@ -126,12 +130,14 @@ function getTransformer(
// used for that case.
requestApi: APIFormat = "openai"
): StreamingCompletionTransformer<
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent | OpenAIResponsesStreamEvent
> {
switch (responseApi) {
case "openai":
case "mistral-ai":
return passthroughToOpenAI;
case "openai-responses":
return passthroughToOpenAIResponses;
case "openai-text":
return openAITextToOpenAIChat;
case "anthropic-text":
@@ -0,0 +1,43 @@
import {
OpenAIResponsesStreamEvent,
SSEResponseTransformArgs,
StreamingCompletionTransformer,
} from "../index";
import { parseEvent, ServerSentEvent } from "../parse-sse";
import { logger } from "../../../../../logger";
const log = logger.child({
module: "sse-transformer",
transformer: "openai-responses-to-openai-responses",
});
export const passthroughToOpenAIResponses: StreamingCompletionTransformer<
OpenAIResponsesStreamEvent
> = (
params: SSEResponseTransformArgs
) => {
const { data } = params;
const rawEvent = parseEvent(data);
if (!rawEvent.data || rawEvent.data === "[DONE]") {
return { position: -1 };
}
const responseEvent = asResponseEvent(rawEvent);
if (!responseEvent) {
return { position: -1 };
}
return { position: -1, event: responseEvent };
};
function asResponseEvent(
event: ServerSentEvent
): OpenAIResponsesStreamEvent | null {
try {
return JSON.parse(event.data) as OpenAIResponsesStreamEvent;
} catch (error) {
log.warn({ error: error.stack, event }, "Received invalid event");
}
return null;
}
+20 -16
View File
@@ -24,25 +24,29 @@ import {
// https://docs.mistral.ai/platform/endpoints
export const KNOWN_MISTRAL_AI_MODELS = [
// Mistral 7b (open weight, legacy)
"open-mistral-7b",
"mistral-tiny-2312",
// Mixtral 8x7b (open weight, legacy)
"open-mixtral-8x7b",
"mistral-small-2312",
// Mixtral Small (newer 8x7b, closed weight)
"mistral-small-latest",
"mistral-small-2402",
// Mistral Medium
"mistral-small-2603",
"mistral-small-2506",
"mistral-medium-latest",
"mistral-medium-2312",
// Mistral Large
"mistral-medium-2508",
"mistral-medium-2505",
"magistral-medium-latest",
"magistral-medium-2507",
"magistral-small-2507",
"mistral-large-latest",
"mistral-large-2402",
// Deprecated identifiers (2024-05-01)
"mistral-tiny",
"mistral-small",
"mistral-medium",
"mistral-large-2512",
"ministral-14b-2512",
"ministral-8b-latest",
"ministral-8b-2512",
"ministral-3b-latest",
"ministral-3b-2512",
"pixtral-large-latest",
"pixtral-large-2411",
"codestral-latest",
"codestral-2508",
"devstral-small-latest",
"devstral-small-2507",
"devstral-medium-2507",
];
let modelsCache: any = null;
+7 -1
View File
@@ -18,7 +18,13 @@ import {
import { generateModelList } from "./openai";
import { OpenAIImageGenerationResult } from "../shared/file-storage/mirror-generated-image";
const KNOWN_MODELS = ["dall-e-2", "dall-e-3"];
const KNOWN_MODELS = [
"dall-e-2",
"dall-e-3",
"gpt-image-1.5",
"gpt-image-1",
"gpt-image-1-mini",
];
let modelListCache: any = null;
let modelListValid = 0;
+63 -10
View File
@@ -28,28 +28,57 @@ import {
// https://platform.openai.com/docs/models/overview
export const KNOWN_OPENAI_MODELS = [
"gpt-5.2",
"gpt-5.2-chat",
"gpt-5.2-chat-latest",
"gpt-5.2-pro",
"gpt-5.2-codex",
"gpt-5.1",
"gpt-5.1-chat",
"gpt-5.1-codex",
"gpt-5.1-codex-mini",
"gpt-5.1-codex-max",
"gpt-5",
"gpt-5-chat",
"gpt-5-pro",
"gpt-5-codex",
"gpt-5-mini",
"gpt-5-nano",
"gpt-4.1",
"gpt-4.1-2025-04-14",
"gpt-4.1-mini",
"gpt-4.1-nano",
"o3-pro",
"o3-deep-research",
"computer-use-preview",
"o4-mini",
"o4-mini-deep-research",
"o3",
"o3-mini",
"o1",
"o1-pro",
"gpt-4o",
"gpt-4o-2024-08-06",
"gpt-4o-mini",
"gpt-4o-2024-05-13",
"gpt-4-turbo", // alias for latest gpt4-turbo stable
"gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
"gpt-4-turbo-preview", // alias for latest turbo preview
"gpt-4-0125-preview", // gpt4-turbo preview 2
"gpt-4-1106-preview", // gpt4-turbo preview 1
"gpt-4-vision-preview", // gpt4-turbo preview 1 with vision
"gpt-4",
"gpt-4-0613",
"gpt-4-0314", // EOL 2024-06-13
"gpt-4-32k",
"gpt-4-32k-0314", // EOL 2024-06-13
"gpt-4-32k-0613",
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301", // EOL 2024-06-13
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-instruct",
"gpt-3.5-turbo-instruct-0914",
"text-embedding-3-small",
"text-embedding-3-large",
"text-embedding-ada-002",
"gpt-image-1.5",
"gpt-image-1",
"gpt-image-1-mini",
"dall-e-3",
"dall-e-2",
];
let modelsCache: any = null;
@@ -59,11 +88,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
// Get available families and snapshots
let availableFamilies = new Set<OpenAIModelFamily>();
const availableSnapshots = new Set<string>();
const availableModelIds = new Set<string>();
for (const key of keyPool.list()) {
if (key.isDisabled || key.service !== "openai") continue;
const asOpenAIKey = key as OpenAIKey;
asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f));
asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s));
asOpenAIKey.modelIds.forEach((id) => availableModelIds.add(id));
}
// Remove disabled families
@@ -71,8 +102,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
availableFamilies = new Set(
[...availableFamilies].filter((x) => allowed.has(x))
);
const usingExactModelIds = availableModelIds.size > 0;
return models
const sourceModels = usingExactModelIds
? [...new Set([...models, ...availableModelIds])]
: models;
return sourceModels
.map((id) => ({
id,
object: "model",
@@ -92,6 +128,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
parent: null,
}))
.filter((model) => {
if (usingExactModelIds) {
return (
allowed.has(getOpenAIModelFamily(model.id)) &&
availableModelIds.has(model.id)
);
}
// First check if the family is available
const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id));
if (!hasFamily) return false;
@@ -233,6 +276,16 @@ openaiRouter.post(
}),
openaiProxy
);
openaiRouter.post(
"/v1/responses",
ipLimiter,
createPreprocessorMiddleware({
inApi: "openai-responses",
outApi: "openai-responses",
service: "openai",
}),
openaiProxy
);
// Embeddings endpoint.
openaiRouter.post(
"/v1/embeddings",
+16 -9
View File
@@ -31,18 +31,24 @@ export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge(
.int()
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
})
);
).passthrough();
const AnthropicV1MessageMultimodalContentSchema = z.array(
z.union([
z.object({ type: z.literal("text"), text: z.string() }),
z.object({
type: z.literal("image"),
source: z.object({
source: z.union([
z.object({
type: z.literal("base64"),
media_type: z.string().max(100),
data: z.string(),
}),
z.object({
type: z.literal("url"),
url: z.string().url(),
}),
]),
}),
])
);
@@ -65,7 +71,7 @@ export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge(
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
system: z.string().optional(),
})
);
).passthrough();
export type AnthropicChatMessage = z.infer<
typeof AnthropicV1MessagesSchema
>["messages"][0];
@@ -77,7 +83,7 @@ function openAIMessagesToClaudeTextPrompt(messages: OpenAIChatMessage[]) {
let role: string = m.role;
if (role === "assistant") {
role = "Assistant";
} else if (role === "system") {
} else if (role === "system" || role === "developer") {
role = "System";
} else if (role === "user") {
role = "Human";
@@ -115,12 +121,13 @@ export const transformOpenAIToAnthropicChat: APIFormatTransformer<
system,
messages: newMessages,
model: rest.model,
max_tokens: rest.max_tokens,
max_tokens: rest.max_completion_tokens ?? rest.max_tokens,
stream: rest.stream,
temperature: rest.temperature,
top_p: rest.top_p,
stop_sequences:
typeof rest.stop === "string" ? [rest.stop] : rest.stop || undefined,
...(rest.thinking ? { thinking: rest.thinking } : {}),
...(rest.user ? { metadata: { user_id: rest.user } } : {}),
// Anthropic supports top_k, but OpenAI does not
// OpenAI supports frequency_penalty, presence_penalty, logit_bias, n, seed,
@@ -162,7 +169,7 @@ export const transformOpenAIToAnthropicText: APIFormatTransformer<
return {
model: rest.model,
prompt: prompt,
max_tokens_to_sample: rest.max_tokens,
max_tokens_to_sample: rest.max_completion_tokens ?? rest.max_tokens,
stop_sequences: stops,
stream: rest.stream,
temperature: rest.temperature,
@@ -366,7 +373,7 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
// Here we will lose the original name if it was a system message, but that
// is generally okay because the system message is usually a prompt and not
// a character in the chat.
const name = msg.role === "system" ? "System" : msg.name?.trim();
const name = isSystemOpenAIRole(msg.role) ? "System" : msg.name?.trim();
const content = convertOpenAIContent(msg.content);
// Prepend the display name to the first text content in the current message
@@ -396,8 +403,8 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
function isSystemOpenAIRole(
role: OpenAIChatMessage["role"]
): role is "system" | "function" | "tool" {
return ["system", "function", "tool"].includes(role);
): role is "system" | "developer" | "function" | "tool" {
return ["system", "developer", "function", "tool"].includes(role);
}
function getFirstTextContent(content: OpenAIChatMessage["content"]) {
+200 -28
View File
@@ -1,10 +1,21 @@
import { z } from "zod";
import {
flattenOpenAIMessageContent,
OpenAIChatMessage,
OpenAIV1ChatCompletionSchema,
} from "./openai";
import { APIFormatTransformer } from "./index";
const GoogleAIContentPartSchema = z.union([
z.object({ text: z.string() }),
z.object({
inline_data: z.object({
mime_type: z.string().max(100),
data: z.string(),
}),
}),
]);
// https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent
export const GoogleAIV1GenerateContentSchema = z
.object({
@@ -12,31 +23,40 @@ export const GoogleAIV1GenerateContentSchema = z
stream: z.boolean().optional().default(false), // also used for router
contents: z.array(
z.object({
parts: z.array(z.object({ text: z.string() })),
parts: z.array(GoogleAIContentPartSchema),
role: z.enum(["user", "model"]),
})
),
tools: z.array(z.object({})).max(0).optional(),
safetySettings: z.array(z.object({})).max(0).optional(),
tools: z.array(z.any()).optional(),
toolConfig: z.any().optional(),
safetySettings: z.array(z.any()).optional(),
systemInstruction: z.any().optional(),
generationConfig: z.object({
temperature: z.number().optional(),
maxOutputTokens: z.coerce
.number()
.int()
.optional()
.default(16)
.transform((v) => Math.min(v, 1024)), // TODO: Add config
.default(1024)
.transform((v) => Math.min(v, 65536)),
candidateCount: z.literal(1).optional(),
topP: z.number().optional(),
topK: z.number().optional(),
responseMimeType: z.string().optional(),
responseSchema: z.any().optional(),
responseJsonSchema: z.any().optional(),
responseModalities: z.array(z.string()).optional(),
thinkingConfig: z.any().optional(),
stopSequences: z.array(z.string().max(500)).max(5).optional(),
}),
})
.strip();
.passthrough();
export type GoogleAIChatMessage = z.infer<
typeof GoogleAIV1GenerateContentSchema
>["contents"][0];
type GoogleAIPart = GoogleAIChatMessage["parts"][number];
export const transformOpenAIToGoogleAI: APIFormatTransformer<
typeof GoogleAIV1GenerateContentSchema
> = async (req) => {
@@ -54,40 +74,51 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
}
const { messages, ...rest } = result.data;
const systemMessages = messages.filter(
(m) => m.role === "system" || m.role === "developer"
);
const foundNames = new Set<string>();
const model = req.body.model;
const customThinkingConfig =
getObjectField(body, "thinkingConfig") ??
getObjectField(getObjectField(body, "generationConfig"), "thinkingConfig");
const customResponseModalities = getStringArrayField(
getObjectField(body, "generationConfig"),
"responseModalities"
);
const contents = messages
.filter((m) => m.role !== "system" && m.role !== "developer")
.map((m) => {
const role = m.role === "assistant" ? "model" : "user";
// Detects character names so we can set stop sequences for them as Gemini
// is prone to continuing as the next character.
// If names are not available, we'll still try to prefix the message
// with generic names so we can set stops for them but they don't work
// as well as real names.
const text = flattenOpenAIMessageContent(m.content);
const parts = convertOpenAIContent(m.content);
const text = parts
.map((part) => ("text" in part ? part.text : ""))
.join("\n");
const propName = m.name?.trim();
const textName =
m.role === "system" ? "" : text.match(/^(.{0,50}?): /)?.[1]?.trim();
const name =
propName || textName || (role === "model" ? "Character" : "User");
const textName = text.match(/^(.{0,50}?): /)?.[1]?.trim();
const name = propName || textName || (role === "model" ? "Character" : "User");
foundNames.add(name);
// Prefixing messages with their character name seems to help avoid
// Gemini trying to continue as the next character, or at the very least
// ensures it will hit the stop sequence. Otherwise it will start a new
// paragraph and switch perspectives.
// The response will be very likely to include this prefix so frontends
// will need to strip it out.
// Prefixing speaker names helps Gemini avoid continuing as the next
// character in multi-party roleplay/chat prompts.
const textPrefix = textName ? "" : `${name}: `;
const firstTextPart = parts.find(
(part): part is Extract<GoogleAIPart, { text: string }> => "text" in part
);
if (firstTextPart) {
firstTextPart.text = textPrefix + firstTextPart.text;
}
return {
parts: [{ text: textPrefix + text }],
parts,
role: m.role === "assistant" ? ("model" as const) : ("user" as const),
};
})
.reduce<GoogleAIChatMessage[]>((acc, msg) => {
const last = acc[acc.length - 1];
if (last?.role === msg.role) {
last.parts[0].text += "\n\n" + msg.parts[0].text;
last.parts.push(...msg.parts);
} else {
acc.push(msg);
}
@@ -102,17 +133,44 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
stops.push(...Array.from(foundNames).map((name) => `\n${name}:`));
stops = [...new Set(stops)].slice(0, 5);
const responseFormat = rest.response_format as Record<string, any> | undefined;
const maxOutputTokens =
rest.max_completion_tokens ?? rest.max_tokens ?? 1024;
return {
model: req.body.model,
model,
stream: rest.stream,
contents,
tools: [],
tools: Array.isArray(rest.tools) ? rest.tools : undefined,
systemInstruction: systemMessages.length
? {
parts: [
{
text: systemMessages
.map((msg) => flattenOpenAIMessageContent(msg.content))
.join("\n\n"),
},
],
}
: undefined,
generationConfig: {
maxOutputTokens: rest.max_tokens,
maxOutputTokens,
stopSequences: stops,
topP: rest.top_p,
topK: 40, // openai schema doesn't have this, google ai defaults to 40
topK: 40, // OpenAI schema doesn't expose this; Gemini defaults to 40.
temperature: rest.temperature,
responseMimeType:
responseFormat?.type === "json_object" ||
responseFormat?.type === "json_schema"
? "application/json"
: undefined,
responseSchema: responseFormat?.json_schema?.schema,
responseJsonSchema: responseFormat?.json_schema?.schema,
responseModalities:
customResponseModalities ??
(isGoogleAIImageModel(model) ? ["TEXT", "IMAGE"] : undefined),
thinkingConfig:
customThinkingConfig ?? getThinkingConfig(model, rest.reasoning_effort),
},
safetySettings: [
{ category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
@@ -122,3 +180,117 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
],
};
};
function convertOpenAIContent(
content: OpenAIChatMessage["content"]
): GoogleAIPart[] {
if (typeof content === "string") {
return [{ text: content }];
}
return content.map((item) => {
if ("text" in item) {
return { text: item.text };
}
if ("refusal" in item) {
return { text: item.refusal };
}
const url = item.image_url.url;
if (!url.startsWith("data:")) {
return { text: "[ Unsupported image URL ]" };
}
const [meta, data = ""] = url.split(",", 2);
const mimeType = meta.split(";")[0].replace("data:", "");
return { inline_data: { mime_type: mimeType, data } };
});
}
function getThinkingConfig(model: string, reasoningEffort?: string) {
if (model.startsWith("gemini-2.5")) {
switch (reasoningEffort) {
case "none":
case "minimal":
case "low":
return { thinkingBudget: 0 };
default:
return undefined;
}
}
switch (reasoningEffort) {
case "low":
case "minimal":
case "none":
return { thinkingLevel: "LOW" };
case "medium":
case "high":
case "xhigh":
return { thinkingLevel: "HIGH" };
default:
return undefined;
}
}
export function isGoogleAIImageModel(model: string) {
return [
"gemini-2.0-flash-preview-image-generation",
"gemini-2.5-flash-image",
"gemini-3-pro-image-preview",
].includes(model);
}
export function flattenGoogleAIContentParts(
parts: Array<Record<string, any>> | undefined
) {
return (parts ?? [])
.map((part) => {
if (typeof part?.text === "string") {
return part.text;
}
const inlineData = part?.inline_data ?? part?.inlineData;
if (inlineData?.data) {
const mimeType = inlineData.mime_type ?? inlineData.mimeType ?? "image/png";
return `![generated image](data:${mimeType};base64,${inlineData.data})`;
}
return "";
})
.filter(Boolean)
.join("\n\n");
}
function getObjectField(
value: unknown,
key: string
): Record<string, any> | undefined {
if (
value &&
typeof value === "object" &&
!Array.isArray(value) &&
key in value &&
value[key as keyof typeof value] &&
typeof value[key as keyof typeof value] === "object" &&
!Array.isArray(value[key as keyof typeof value])
) {
return value[key as keyof typeof value] as Record<string, any>;
}
return undefined;
}
function getStringArrayField(value: unknown, key: string) {
if (
value &&
typeof value === "object" &&
!Array.isArray(value) &&
key in value &&
Array.isArray(value[key as keyof typeof value])
) {
return (value[key as keyof typeof value] as unknown[]).filter(
(item): item is string => typeof item === "string"
);
}
return undefined;
}
+12 -1
View File
@@ -17,6 +17,7 @@ import {
OpenAIV1ImagesGenerationSchema,
transformOpenAIToOpenAIImage,
} from "./openai-image";
import { OpenAIResponsesCreateSchema } from "./openai-responses";
import {
GoogleAIV1GenerateContentSchema,
transformOpenAIToGoogleAI,
@@ -24,13 +25,22 @@ import {
import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";
export { OpenAIChatMessage } from "./openai";
export {
containsOpenAIResponsesImageInput,
flattenOpenAIResponsesInput,
flattenOpenAIResponsesOutput,
} from "./openai-responses";
export {
AnthropicChatMessage,
AnthropicV1TextSchema,
AnthropicV1MessagesSchema,
flattenAnthropicMessages,
} from "./anthropic";
export { GoogleAIChatMessage } from "./google-ai";
export {
GoogleAIChatMessage,
flattenGoogleAIContentParts,
isGoogleAIImageModel,
} from "./google-ai";
export { MistralAIChatMessage } from "./mistral-ai";
type APIPair = `${APIFormat}->${APIFormat}`;
@@ -55,6 +65,7 @@ export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
"anthropic-chat": AnthropicV1MessagesSchema,
"anthropic-text": AnthropicV1TextSchema,
openai: OpenAIV1ChatCompletionSchema,
"openai-responses": OpenAIResponsesCreateSchema,
"openai-text": OpenAIV1TextCompletionSchema,
"openai-image": OpenAIV1ImagesGenerationSchema,
"google-ai": GoogleAIV1GenerateContentSchema,
+1 -1
View File
@@ -20,7 +20,7 @@ export const MistralAIV1ChatCompletionsSchema = z.object({
stream: z.boolean().optional().default(false),
safe_prompt: z.boolean().optional().default(false),
random_seed: z.number().int().optional(),
});
}).passthrough();
export type MistralAIChatMessage = z.infer<
typeof MistralAIV1ChatCompletionsSchema
>["messages"][0];
+33 -9
View File
@@ -5,19 +5,34 @@ import { APIFormatTransformer } from "./index";
// https://platform.openai.com/docs/api-reference/images/create
export const OpenAIV1ImagesGenerationSchema = z
.object({
prompt: z.string().max(4000),
prompt: z.string().max(32000),
model: z.string().max(100).optional(),
quality: z.enum(["standard", "hd"]).optional().default("standard"),
n: z.number().int().min(1).max(4).optional().default(1),
quality: z
.enum(["auto", "low", "medium", "high", "standard", "hd"])
.optional(),
n: z.number().int().min(1).max(10).optional().default(1),
response_format: z.enum(["url", "b64_json"]).optional(),
output_format: z.string().optional(),
output_compression: z.number().int().min(0).max(100).optional(),
size: z
.enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"])
.enum([
"auto",
"256x256",
"512x512",
"1024x1024",
"1024x1536",
"1536x1024",
"1792x1024",
"1024x1792",
])
.optional()
.default("1024x1024"),
style: z.enum(["vivid", "natural"]).optional().default("vivid"),
background: z.string().optional(),
moderation: z.string().optional(),
user: z.string().max(500).optional(),
})
.strip();
.passthrough();
// Takes the last chat message and uses it verbatim as the image prompt.
export const transformOpenAIToOpenAIImage: APIFormatTransformer<
@@ -57,12 +72,21 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
}
// TODO: Add some way to specify parameters via chat message
const transformed = {
model: body.model.includes("dall-e") ? body.model : "dall-e-3",
quality: "standard",
const requestedModel = String(body.model ?? "");
const model =
requestedModel.includes("dall-e") || requestedModel.includes("gpt-image")
? requestedModel
: "gpt-image-1.5";
const transformed: Record<string, any> = {
model,
size: "1024x1024",
response_format: "url",
prompt: prompt.slice(index! + 6).trim(),
};
if (model.includes("dall-e")) {
transformed.quality = "standard";
transformed.response_format = "url";
}
return OpenAIV1ImagesGenerationSchema.parse(transformed);
};
+136
View File
@@ -0,0 +1,136 @@
import { z } from "zod";
import { OPENAI_OUTPUT_MAX } from "./openai";
const OpenAIResponsesReasoningSchema = z
.object({
effort: z.string().optional(),
summary: z.union([z.string(), z.array(z.string())]).optional(),
})
.passthrough();
const OpenAIResponsesTextSchema = z
.object({
format: z.any().optional(),
verbosity: z.enum(["low", "medium", "high"]).optional(),
})
.passthrough();
export const OpenAIResponsesCreateSchema = z
.object({
model: z.string().max(100),
input: z.union([z.string(), z.array(z.any())]).optional(),
instructions: z.union([z.string(), z.array(z.any())]).optional(),
previous_response_id: z.string().max(100).optional(),
stream: z.boolean().optional().default(false),
max_output_tokens: z.coerce
.number()
.int()
.nullish()
.default(OPENAI_OUTPUT_MAX)
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
temperature: z.number().optional(),
top_p: z.number().optional(),
user: z.string().max(500).optional(),
safety_identifier: z.string().max(500).optional(),
metadata: z.record(z.any()).optional(),
tools: z.array(z.any()).optional(),
tool_choice: z.any().optional(),
parallel_tool_calls: z.boolean().optional(),
include: z.array(z.string()).optional(),
store: z.boolean().optional(),
background: z.boolean().optional(),
reasoning: OpenAIResponsesReasoningSchema.optional(),
text: OpenAIResponsesTextSchema.optional(),
})
.passthrough();
export function flattenOpenAIResponsesInput(input: unknown): string {
return flattenResponseValue(input).trim();
}
export function flattenOpenAIResponsesOutput(body: Record<string, any>): string {
if (typeof body.output_text === "string" && body.output_text.trim()) {
return body.output_text.trim();
}
return flattenResponseValue(body.output ?? body.output_text).trim();
}
export function containsOpenAIResponsesImageInput(input: unknown): boolean {
return containsImage(input);
}
function flattenResponseValue(value: unknown): string {
if (value === null || value === undefined) return "";
if (typeof value === "string") return value;
if (typeof value === "number" || typeof value === "boolean") {
return String(value);
}
if (Array.isArray(value)) {
return value
.map((item) => flattenResponseValue(item))
.filter(Boolean)
.join("\n");
}
if (!isRecord(value)) return "";
const typed = value;
if (hasStringProp(typed, "text")) return typed.text;
if (hasStringProp(typed, "refusal")) return typed.refusal;
if (hasStringProp(typed, "summary")) return typed.summary;
if (hasStringProp(typed, "arguments")) return typed.arguments;
if (hasStringProp(typed, "result")) return typed.result;
const type = String(typed.type ?? "");
if (type.includes("image")) return "[ Uploaded Image Omitted ]";
if (type.includes("file")) return "[ File Omitted ]";
if (typeof typed.role === "string" && typed.content !== undefined) {
const content = flattenResponseValue(typed.content);
return content ? `${typed.role}: ${content}` : typed.role;
}
const nested = [
typed.content,
typed.input,
typed.output,
typed.summary,
typed.results,
typed.item,
typed.items,
];
for (const candidate of nested) {
const flattened = flattenResponseValue(candidate);
if (flattened) return flattened;
}
return "";
}
function containsImage(value: unknown): boolean {
if (value === null || value === undefined) return false;
if (Array.isArray(value)) return value.some((item) => containsImage(item));
if (!isRecord(value)) return false;
const typed = value;
const type = String(typed.type ?? "");
if (type.includes("image")) return true;
if (typed.image_url || typed.image || typed.input_image || typed.inline_data) {
return true;
}
return Object.values(typed).some((item) => containsImage(item));
}
function hasStringProp<T extends string>(
value: Record<string, unknown>,
key: T
): value is Record<string, unknown> & Record<T, string> {
return typeof value[key] === "string";
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
}
+32 -5
View File
@@ -7,6 +7,7 @@ export const OPENAI_OUTPUT_MAX = config.maxOutputTokensOpenAI;
const OpenAIV1ChatContentArraySchema = z.array(
z.union([
z.object({ type: z.literal("text"), text: z.string() }),
z.object({ type: z.literal("refusal"), refusal: z.string() }),
z.object({
type: z.union([z.literal("image"), z.literal("image_url")]),
image_url: z.object({
@@ -21,7 +22,14 @@ export const OpenAIV1ChatCompletionSchema = z
model: z.string().max(100),
messages: z.array(
z.object({
role: z.enum(["system", "user", "assistant", "tool", "function"]),
role: z.enum([
"system",
"developer",
"user",
"assistant",
"tool",
"function",
]),
content: z.union([z.string(), OpenAIV1ChatContentArraySchema]),
name: z.string().optional(),
tool_calls: z.array(z.any()).optional(),
@@ -54,11 +62,20 @@ export const OpenAIV1ChatCompletionSchema = z
.nullish()
.default(Math.min(OPENAI_OUTPUT_MAX, 4096))
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
max_completion_tokens: z.coerce
.number()
.int()
.nullish()
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
frequency_penalty: z.number().optional().default(0),
presence_penalty: z.number().optional().default(0),
logit_bias: z.any().optional(),
metadata: z.record(z.any()).optional(),
user: z.string().max(500).optional(),
safety_identifier: z.string().max(500).optional(),
seed: z.number().int().optional(),
prompt_cache_key: z.string().max(500).optional(),
prompt_cache_retention: z.string().optional(),
// Be warned that Azure OpenAI combines these two into a single field.
// It's the only deviation from the OpenAI API that I'm aware of so I have
// special cased it in `addAzureKey` rather than expecting clients to do it.
@@ -70,14 +87,23 @@ export const OpenAIV1ChatCompletionSchema = z
functions: z.array(z.any()).optional(),
tool_choice: z.any().optional(),
function_choice: z.any().optional(),
response_format: z.any(),
response_format: z.any().optional(),
parallel_tool_calls: z.boolean().optional(),
reasoning_effort: z.string().optional(),
stream_options: z.any().optional(),
modalities: z.array(z.string()).optional(),
audio: z.any().optional(),
prediction: z.any().optional(),
web_search_options: z.any().optional(),
service_tier: z.string().optional(),
verbosity: z.enum(["low", "medium", "high"]).optional(),
})
// Tool usage must be enabled via config because we currently have no way to
// track quota usage for them or enforce limits.
.omit(
Boolean(config.allowOpenAIToolUsage) ? {} : { tools: true, functions: true }
)
.strip();
.passthrough();
export type OpenAIChatMessage = z.infer<
typeof OpenAIV1ChatCompletionSchema
>["messages"][0];
@@ -89,6 +115,7 @@ export function flattenOpenAIMessageContent(
? content
.map((contentItem) => {
if ("text" in contentItem) return contentItem.text;
if ("refusal" in contentItem) return contentItem.refusal;
if ("image_url" in contentItem) return "[ Uploaded Image Omitted ]";
})
.join("\n")
@@ -107,7 +134,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
let role: string = m.role;
if (role === "assistant") {
role = "Assistant";
} else if (role === "system") {
} else if (role === "system" || role === "developer") {
role = "System";
} else if (role === "user") {
role = "User";
@@ -121,7 +148,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
.map((m) => {
// Claude without prefixes (except system) and no Assistant priming
let role: string = "";
if (role === "system") {
if (m.role === "system" || m.role === "developer") {
role = "System: ";
}
return `\n\n${role}${flattenOpenAIMessageContent(m.content)}`;
+4 -4
View File
@@ -54,10 +54,10 @@ export class AwsKeyChecker extends KeyCheckerBase<AwsBedrockKey> {
if (isInitialCheck) {
checks = [
this.invokeModel("anthropic.claude-v2", key),
this.invokeModel("anthropic.claude-3-sonnet-20240229-v1:0", key),
this.invokeModel("anthropic.claude-3-haiku-20240307-v1:0", key),
this.invokeModel("anthropic.claude-3-opus-20240229-v1:0", key),
this.invokeModel("anthropic.claude-3-5-sonnet-20240620-v1:0", key),
this.invokeModel("anthropic.claude-sonnet-4-5-20250929-v1:0", key),
this.invokeModel("anthropic.claude-haiku-4-5-20251001-v1:0", key),
this.invokeModel("anthropic.claude-opus-4-1-20250805-v1:0", key),
this.invokeModel("anthropic.claude-3-5-haiku-20241022-v1:0", key),
];
}
+29 -8
View File
@@ -35,9 +35,15 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
}
protected async testKeyOrFail(key: AzureOpenAIKey) {
const model = await this.testModel(key);
this.log.info({ key: key.hash, deploymentModel: model }, "Checked key.");
this.updateKey(key.hash, { modelFamilies: [model] });
const result = await this.testModel(key);
this.log.info(
{ key: key.hash, deploymentModel: result.modelIds[0] ?? result.family },
"Checked key."
);
this.updateKey(key.hash, {
modelFamilies: [result.family],
modelIds: result.modelIds,
});
}
protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) {
@@ -107,7 +113,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
this.updateKey(key.hash, { lastChecked: next });
}
private async testModel(key: AzureOpenAIKey) {
private async testModel(key: AzureOpenAIKey): Promise<{
family: ReturnType<typeof getAzureOpenAIModelFamily>;
modelIds: string[];
}> {
const { apiKey, deploymentId, resourceName } =
AzureOpenAIKeyChecker.getCredentialsFromKey(key);
const url = POST_CHAT_COMPLETIONS(resourceName, deploymentId);
@@ -126,7 +135,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
// we try to invoke /chat/completions on dall-e-3. This is expected and
// indicates a DALL-E deployment.
if (response.status === 400) {
if (data.error.code === "OperationNotSupported") return "azure-dall-e";
if (data.error.code === "OperationNotSupported") {
return {
family: "azure-dall-e",
modelIds: ["dall-e-3", "gpt-image-1", "gpt-image-1-mini", "gpt-image-1.5"],
};
}
throw new AxiosError(
`Unexpected error when testing deployment ${deploymentId}`,
"AZURE_TEST_ERROR",
@@ -137,11 +151,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
}
const family = getAzureOpenAIModelFamily(data.model);
const normalizedModel = normalizeAzureModelId(data.model);
// Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks.
// Otherwise we can use the model family Azure returned.
if (family !== "azure-gpt4") {
return family;
return { family, modelIds: [normalizedModel] };
}
// Try to send an oversized prompt. GPT-4 Turbo can handle this but regular
@@ -160,8 +175,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
const code = contextTest.error?.code;
this.log.debug({ code, status }, "Performed Azure GPT4 context size test.");
if (code === "context_length_exceeded") return "azure-gpt4";
return "azure-gpt4-turbo";
if (code === "context_length_exceeded") {
return { family: "azure-gpt4", modelIds: ["gpt-4"] };
}
return { family: "azure-gpt4-turbo", modelIds: ["gpt-4-turbo"] };
}
static errorIsAzureError(error: AxiosError): error is AxiosError<AzureError> {
@@ -179,3 +196,7 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
return { resourceName, deploymentId, apiKey };
}
}
function normalizeAzureModelId(model: string) {
return model.replace("gpt-35-turbo", "gpt-3.5-turbo");
}
+12 -1
View File
@@ -14,6 +14,8 @@ type AzureOpenAIKeyUsage = {
export interface AzureOpenAIKey extends Key, AzureOpenAIKeyUsage {
readonly service: "azure";
readonly modelFamilies: AzureOpenAIModelFamily[];
/** Exact model IDs or deployment aliases known to be backed by this key. */
modelIds: string[];
/** The time at which this key was last rate limited. */
rateLimitedAt: number;
/** The time until which this key is rate limited. */
@@ -62,6 +64,7 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
rateLimitedAt: 0,
rateLimitedUntil: 0,
contentFiltering: false,
modelIds: [],
hash: `azu-${crypto
.createHash("sha256")
.update(key)
@@ -73,6 +76,8 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
"azure-gpt4-32kTokens": 0,
"azure-gpt4-turboTokens": 0,
"azure-gpt4oTokens": 0,
"azure-gpt5Tokens": 0,
"azure-o-seriesTokens": 0,
"azure-dall-eTokens": 0,
};
this.keys.push(newKey);
@@ -96,8 +101,14 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
public get(model: string) {
const neededFamily = getAzureOpenAIModelFamily(model);
const normalizedModel = model
.replace(/^azure-/, "")
.replace("gpt-35-turbo", "gpt-3.5-turbo");
const availableKeys = this.keys.filter(
(k) => !k.isDisabled && k.modelFamilies.includes(neededFamily)
(k) =>
!k.isDisabled &&
k.modelFamilies.includes(neededFamily) &&
(!k.modelIds.length || k.modelIds.includes(normalizedModel))
);
if (availableKeys.length === 0) {
throw new PaymentRequiredError(
+8 -8
View File
@@ -32,10 +32,10 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
const isInitialCheck = !key.lastChecked;
if (isInitialCheck) {
checks = [
this.invokeModel("claude-3-haiku@20240307", key, true),
this.invokeModel("claude-3-sonnet@20240229", key, true),
this.invokeModel("claude-3-opus@20240229", key, true),
this.invokeModel("claude-3-5-sonnet@20240620", key, true),
this.invokeModel("claude-haiku-4-5@20251001", key, true),
this.invokeModel("claude-sonnet-4-5@20250929", key, true),
this.invokeModel("claude-opus-4-1@20250805", key, true),
this.invokeModel("claude-3-5-haiku@20241022", key, true),
];
const [sonnet, haiku, opus, sonnet35] =
@@ -66,13 +66,13 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
});
} else {
if (key.haikuEnabled) {
await this.invokeModel("claude-3-haiku@20240307", key, false)
await this.invokeModel("claude-haiku-4-5@20251001", key, false)
} else if (key.sonnetEnabled) {
await this.invokeModel("claude-3-sonnet@20240229", key, false)
await this.invokeModel("claude-sonnet-4-5@20250929", key, false)
} else if (key.sonnet35Enabled) {
await this.invokeModel("claude-3-5-sonnet@20240620", key, false)
await this.invokeModel("claude-3-5-haiku@20241022", key, false)
} else {
await this.invokeModel("claude-3-opus@20240229", key, false)
await this.invokeModel("claude-opus-4-1@20250805", key, false)
}
this.updateKey(key.hash, { lastChecked: Date.now() });
+1
View File
@@ -4,6 +4,7 @@ import { KeyPool } from "./key-pool";
/** The request and response format used by a model's API. */
export type APIFormat =
| "openai"
| "openai-responses"
| "openai-text"
| "openai-image"
| "anthropic-chat" // Anthropic's newer messages array format
+4 -1
View File
@@ -111,7 +111,10 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
const familiesArray = [...families];
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
this.updateKey(key.hash, {
modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)),
modelIds: models,
modelSnapshots: models.filter((m) =>
m.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/)
),
modelFamilies: familiesArray,
lastChecked: keyFromPool.lastChecked,
});
+12 -2
View File
@@ -16,6 +16,8 @@ type OpenAIKeyUsage = {
export interface OpenAIKey extends Key, OpenAIKeyUsage {
readonly service: "openai";
modelFamilies: OpenAIModelFamily[];
/** Exact model IDs reported by the models API for this key. */
modelIds: string[];
/**
* Some keys are assigned to multiple organizations, each with their own quota
* limits. We clone the key for each organization and track usage/disabled
@@ -97,6 +99,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
"gpt4" as const,
"gpt4-turbo" as const,
"gpt4o" as const,
"gpt5" as const,
"o-series" as const,
],
isTrial: false,
isDisabled: false,
@@ -118,8 +122,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
"gpt4-32kTokens": 0,
"gpt4-turboTokens": 0,
gpt4oTokens: 0,
gpt5Tokens: 0,
"o-seriesTokens": 0,
"dall-eTokens": 0,
gpt4Rpm: 0,
modelIds: [],
modelSnapshots: [],
};
this.keys.push(newKey);
@@ -160,8 +167,10 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
if (model === "gpt-4-32k") model = "gpt-4-32k-0613";
const neededFamily = getOpenAIModelFamily(model);
const excludeTrials = model === "text-embedding-ada-002";
const needsSnapshot = model.match(/-\d{4}(-preview)?$/);
const excludeTrials = /^text-embedding-(?:3-small|3-large|ada-002)$/.test(
model
);
const needsSnapshot = model.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/);
const availableKeys = this.keys.filter(
// Allow keys which
@@ -169,6 +178,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
!key.isDisabled && // are not disabled
key.modelFamilies.includes(neededFamily) && // have access to the model family we need
(!excludeTrials || !key.isTrial) && // and are not trials if we don't want them
(!key.modelIds.length || key.modelIds.includes(model)) && // and have the requested model if exact inventory is available
(!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need
);
+48 -2
View File
@@ -23,6 +23,8 @@ export type OpenAIModelFamily =
| "gpt4-32k"
| "gpt4-turbo"
| "gpt4o"
| "gpt5"
| "o-series"
| "dall-e";
export type AnthropicModelFamily = "claude" | "claude-opus";
export type GoogleAIModelFamily = "gemini-pro";
@@ -51,6 +53,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"gpt4-32k",
"gpt4-turbo",
"gpt4o",
"gpt5",
"o-series",
"dall-e",
"claude",
"claude-opus",
@@ -68,6 +72,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"azure-gpt4-32k",
"azure-gpt4-turbo",
"azure-gpt4o",
"azure-gpt5",
"azure-o-series",
"azure-dall-e",
] as const);
@@ -84,6 +90,10 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
] as const);
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-5(\\.\\d+)?([-.].+)?$": "gpt5",
"^o\\d([-.].+)?$": "o-series",
"^computer-use-preview$": "o-series",
"^gpt-4\\.1([-.].+)?$": "gpt4o",
"^gpt-4o": "gpt4o",
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
"^gpt-4-turbo(-preview)?$": "gpt4-turbo",
@@ -94,7 +104,8 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-4-\\d{4}$": "gpt4",
"^gpt-4$": "gpt4",
"^gpt-3.5-turbo": "turbo",
"^text-embedding-ada-002$": "turbo",
"^text-embedding-(ada-002|3-small|3-large)$": "turbo",
"^gpt-image-1([-.].+)?$": "dall-e",
"^dall-e-\\d{1}$": "dall-e",
};
@@ -106,6 +117,8 @@ export const MODEL_FAMILY_SERVICE: {
"gpt4-turbo": "openai",
"gpt4-32k": "openai",
"gpt4o": "openai",
gpt5: "openai",
"o-series": "openai",
"dall-e": "openai",
claude: "anthropic",
"claude-opus": "anthropic",
@@ -118,6 +131,8 @@ export const MODEL_FAMILY_SERVICE: {
"azure-gpt4-32k": "azure",
"azure-gpt4-turbo": "azure",
"azure-gpt4o": "azure",
"azure-gpt5": "azure",
"azure-o-series": "azure",
"azure-dall-e": "azure",
"gemini-pro": "google-ai",
"mistral-tiny": "mistral-ai",
@@ -150,7 +165,10 @@ export function getGoogleAIModelFamily(_model: string): ModelFamily {
}
export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
const prunedModel = model.replace(/-(latest|\d{4})$/, "");
const prunedModel = model.replace(
/-(latest|\d{4}|\d{6}|\d+\.\d+|v\d+(:\d+)?)$/,
""
);
switch (prunedModel) {
case "mistral-tiny":
case "mistral-small":
@@ -161,7 +179,34 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
return "mistral-tiny";
case "open-mixtral-8x7b":
return "mistral-small";
case "ministral-3b":
case "ministral-8b":
case "mistral-small-3.1":
case "mistral-small-3.2":
return "mistral-small";
case "magistral-medium":
return "mistral-medium";
case "codestral":
case "devstral":
case "mistral-large-2":
case "mistral-large-3":
case "pixtral-large":
return "mistral-large";
default:
if (model.startsWith("mistral-small") || model.startsWith("ministral")) {
return "mistral-small";
}
if (model.startsWith("mistral-medium") || model.startsWith("magistral")) {
return "mistral-medium";
}
if (
model.startsWith("mistral-large") ||
model.startsWith("pixtral-large") ||
model.startsWith("codestral") ||
model.startsWith("devstral")
) {
return "mistral-large";
}
return "mistral-tiny";
}
}
@@ -225,6 +270,7 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
modelFamily = getClaudeModelFamily(model);
break;
case "openai":
case "openai-responses":
case "openai-text":
case "openai-image":
modelFamily = getOpenAIModelFamily(model);
+8
View File
@@ -10,6 +10,14 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
case "azure-gpt4o":
cost = 0.000005;
break;
case "gpt5":
case "azure-gpt5":
cost = 0.00001;
break;
case "o-series":
case "azure-o-series":
cost = 0.000012;
break;
case "azure-gpt4-turbo":
case "gpt4-turbo":
cost = 0.00001;
+7
View File
@@ -65,7 +65,14 @@ async function getTokenCountForMessages({
numTokens += encoder.encode(text.normalize("NFKC"), "all").length;
break;
case "image":
if (part.source.type === "base64") {
numTokens += await getImageTokenCount(part.source.data);
} else {
// Remote image URLs are already hosted elsewhere, so we cannot
// inspect dimensions locally. Charge the documented worst-case
// token cost instead of undercounting them as zero.
numTokens += 1600;
}
break;
default:
throw new Error(`Unsupported Anthropic content type.`);
+36 -8
View File
@@ -179,16 +179,33 @@ export const DALLE_TOKENS_PER_DOLLAR = 100000;
* which we convert to tokens at a rate of 100000 tokens per dollar.
*/
export function getOpenAIImageCost(params: {
model: "dall-e-2" | "dall-e-3";
quality: "standard" | "hd";
resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024";
model:
| "dall-e-2"
| "dall-e-3"
| "gpt-image-1"
| "gpt-image-1-mini"
| "gpt-image-1.5";
quality: "auto" | "low" | "medium" | "high" | "standard" | "hd";
resolution:
| "auto"
| "512x512"
| "256x256"
| "1024x1024"
| "1024x1536"
| "1536x1024"
| "1024x1792"
| "1792x1024";
n: number | null;
}) {
const { model, quality, resolution, n } = params;
const normalizedResolution =
resolution === "auto" ? "1024x1024" : resolution;
const normalizedQuality =
quality === "hd" || quality === "high" ? "hd" : "standard";
const usd = (() => {
switch (model) {
case "dall-e-2":
switch (resolution) {
switch (normalizedResolution) {
case "512x512":
return 0.018;
case "256x256":
@@ -199,12 +216,20 @@ export function getOpenAIImageCost(params: {
throw new Error("Invalid resolution");
}
case "dall-e-3":
switch (resolution) {
case "gpt-image-1.5":
case "gpt-image-1":
case "gpt-image-1-mini":
// GPT Image models have newer parameter ranges, but we still account
// for them using the existing DALL-E 3-style price buckets so the
// proxy can continue to enforce rough quota/cost limits.
switch (normalizedResolution) {
case "1024x1024":
return quality === "standard" ? 0.04 : 0.08;
return normalizedQuality === "standard" ? 0.04 : 0.08;
case "1024x1536":
case "1536x1024":
case "1024x1792":
case "1792x1024":
return quality === "standard" ? 0.08 : 0.12;
return normalizedQuality === "standard" ? 0.08 : 0.12;
default:
throw new Error("Invalid resolution");
}
@@ -233,7 +258,10 @@ export function estimateGoogleAITokenCount(
let numTokens = 0;
for (const message of prompt) {
numTokens += tokensPerMessage;
numTokens += encoder.encode(message.parts[0].text).length;
const text = message.parts
.map((part) => ("text" in part ? part.text : ""))
.join("\n");
numTokens += encoder.encode(text).length;
}
numTokens += 3;
+2 -1
View File
@@ -55,7 +55,7 @@ type MistralAIChatTokenCountRequest = {
type FlatPromptTokenCountRequest = {
prompt: string;
completion?: never;
service: "openai-text" | "anthropic-text" | "google-ai";
service: "openai-text" | "openai-responses" | "anthropic-text" | "google-ai";
};
type StringCompletionTokenCountRequest = {
@@ -105,6 +105,7 @@ export async function countTokens({
tokenization_duration_ms: getElapsedMs(time),
};
case "openai":
case "openai-responses":
case "openai-text":
return {
...(await getOpenAITokenCount(prompt ?? completion, req.body.model)),
+1
View File
@@ -400,6 +400,7 @@ function getModelFamilyForQuotaUsage(
switch (api) {
case "openai":
case "openai-responses":
case "openai-text":
case "openai-image":
return getOpenAIModelFamily(model);