Update proxy for modern model APIs
This commit is contained in:
+6
-2
@@ -40,11 +40,11 @@ NODE_ENV=production
|
||||
|
||||
# Which model types users are allowed to access.
|
||||
# The following model families are recognized:
|
||||
# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-dall-e
|
||||
# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | gpt5 | o-series | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-gpt5 | azure-o-series | azure-dall-e
|
||||
# By default, all models are allowed except for 'dall-e' / 'azure-dall-e'.
|
||||
# To allow DALL-E image generation, uncomment the line below and add 'dall-e' or
|
||||
# 'azure-dall-e' to the list of allowed model families.
|
||||
# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o
|
||||
# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,gpt5,o-series,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o,azure-gpt5,azure-o-series
|
||||
|
||||
# Which services can be used to process prompts containing images via multimodal
|
||||
# models. The following services are recognized:
|
||||
@@ -115,10 +115,14 @@ NODE_ENV=production
|
||||
# TOKEN_QUOTA_GPT4=0
|
||||
# TOKEN_QUOTA_GPT4_32K=0
|
||||
# TOKEN_QUOTA_GPT4_TURBO=0
|
||||
# TOKEN_QUOTA_GPT5=0
|
||||
# TOKEN_QUOTA_O_SERIES=0
|
||||
# TOKEN_QUOTA_CLAUDE=0
|
||||
# TOKEN_QUOTA_GEMINI_PRO=0
|
||||
# TOKEN_QUOTA_AWS_CLAUDE=0
|
||||
# TOKEN_QUOTA_GCP_CLAUDE=0
|
||||
# TOKEN_QUOTA_AZURE_GPT5=0
|
||||
# TOKEN_QUOTA_AZURE_O_SERIES=0
|
||||
# "Tokens" for image-generation models are counted at a rate of 100000 tokens
|
||||
# per US$1.00 generated, which is similar to the cost of GPT-4 Turbo.
|
||||
# DALL-E 3 costs around US$0.10 per image (10000 tokens).
|
||||
|
||||
@@ -45,11 +45,16 @@ You can also request Claude Instant, but support for this isn't fully implemente
|
||||
### Supported model IDs
|
||||
Users can send these model IDs to the proxy to invoke the corresponding models.
|
||||
- **Claude**
|
||||
- `anthropic.claude-v1` (~18k context, claude 1.3 -- EOL 2024-02-28)
|
||||
- `anthropic.claude-v2` (~100k context, claude 2.0)
|
||||
- `anthropic.claude-v2:1` (~200k context, claude 2.1)
|
||||
- **Claude Instant**
|
||||
- `anthropic.claude-instant-v1` (~100k context, claude instant 1.2)
|
||||
- `anthropic.claude-haiku-4-5-20251001-v1:0`
|
||||
- `anthropic.claude-sonnet-4-5-20250929-v1:0`
|
||||
- `anthropic.claude-opus-4-1-20250805-v1:0`
|
||||
- `anthropic.claude-3-5-haiku-20241022-v1:0`
|
||||
- `anthropic.claude-sonnet-4-20250514-v1:0`
|
||||
- `anthropic.claude-opus-4-20250514-v1:0`
|
||||
|
||||
For OpenAI-compatible callers, the proxy will also remap newer Claude-style names such as `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, `claude-opus-4-1-20250805`, and `claude-3-5-haiku-20241022` to the corresponding Bedrock model IDs.
|
||||
|
||||
## Note regarding logging
|
||||
|
||||
|
||||
@@ -20,7 +20,9 @@ AZURE_CREDENTIALS=contoso-ml:gpt4-8k:0123456789abcdef0123456789abcdef,northwind-
|
||||
Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model.
|
||||
|
||||
### Supported model IDs
|
||||
Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. For the most part they work the same with Azure. GPT-3.5 Turbo has an ID of "gpt-35-turbo" because Azure doesn't allow periods in model names, but the proxy should automatically convert this to the correct ID.
|
||||
Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. The proxy now understands newer Azure-backed OpenAI model families such as GPT-4o, GPT-4.1, GPT-5 / GPT-5.2, o-series reasoning models, and GPT Image deployments including `gpt-image-1.5`, plus the newer Responses API route at `/proxy/azure/openai/v1/responses`.
|
||||
|
||||
GPT-3.5 Turbo still has an Azure-specific ID of `gpt-35-turbo` because Azure doesn't allow periods in model names, but the proxy will automatically normalize that for you.
|
||||
|
||||
As noted above, you can only use model IDs for which a deployment has been created and added to the proxy.
|
||||
|
||||
|
||||
@@ -29,7 +29,11 @@ GCP_CREDENTIALS=my-first-project:xxx@yyy.com:us-east5:-----BEGIN PRIVATE KEY----
|
||||
## Supported model IDs
|
||||
Users can send these model IDs to the proxy to invoke the corresponding models.
|
||||
- **Claude**
|
||||
- `claude-3-haiku@20240307`
|
||||
- `claude-3-sonnet@20240229`
|
||||
- `claude-3-opus@20240229`
|
||||
- `claude-3-5-sonnet@20240620`
|
||||
- `claude-haiku-4-5@20251001`
|
||||
- `claude-sonnet-4-5@20250929`
|
||||
- `claude-opus-4-1@20250805`
|
||||
- `claude-3-5-haiku@20241022`
|
||||
- `claude-sonnet-4@20250514`
|
||||
- `claude-opus-4@20250514`
|
||||
|
||||
For OpenAI-compatible callers, the proxy will also remap Claude-style aliases like `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, and `claude-opus-4-1-20250805` to the corresponding Vertex AI model IDs.
|
||||
|
||||
@@ -7,8 +7,8 @@ Authorization: Bearer {{oai-key-1}}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"max_tokens": 30,
|
||||
"model": "gpt-4.1-mini",
|
||||
"max_completion_tokens": 30,
|
||||
"stream": false,
|
||||
"messages": [
|
||||
{
|
||||
@@ -18,6 +18,19 @@ Content-Type: application/json
|
||||
]
|
||||
}
|
||||
|
||||
###
|
||||
# @name OpenAI -- Responses API
|
||||
POST https://api.openai.com/v1/responses
|
||||
Authorization: Bearer {{oai-key-1}}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-5.2",
|
||||
"reasoning": { "effort": "medium" },
|
||||
"max_output_tokens": 80,
|
||||
"input": "This is a test prompt."
|
||||
}
|
||||
|
||||
###
|
||||
# @name OpenAI -- Text Completions
|
||||
POST https://api.openai.com/v1/completions
|
||||
@@ -38,7 +51,7 @@ Authorization: Bearer {{oai-key-1}}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "text-embedding-ada-002",
|
||||
"model": "text-embedding-3-small",
|
||||
"input": "This is a test embedding input."
|
||||
}
|
||||
|
||||
@@ -81,8 +94,8 @@ Authorization: Bearer {{proxy-key}}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4-1106-preview",
|
||||
"max_tokens": 20,
|
||||
"model": "gpt-4.1",
|
||||
"max_completion_tokens": 20,
|
||||
"stream": true,
|
||||
"temperature": 1,
|
||||
"seed": 123,
|
||||
@@ -94,6 +107,20 @@ Content-Type: application/json
|
||||
]
|
||||
}
|
||||
|
||||
###
|
||||
# @name Proxy / OpenAI -- Native Responses API
|
||||
POST {{proxy-host}}/proxy/openai/v1/responses
|
||||
Authorization: Bearer {{proxy-key}}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-5.2",
|
||||
"reasoning": { "effort": "medium" },
|
||||
"max_output_tokens": 64,
|
||||
"stream": false,
|
||||
"input": "Summarize the purpose of this reverse proxy in one sentence."
|
||||
}
|
||||
|
||||
###
|
||||
# @name Proxy / OpenAI -- Native Text Completions
|
||||
POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions
|
||||
@@ -142,7 +169,7 @@ Authorization: Bearer {{proxy-key}}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "text-embedding-ada-002",
|
||||
"model": "text-embedding-3-small",
|
||||
"input": "This is a test embedding input."
|
||||
}
|
||||
|
||||
@@ -185,7 +212,7 @@ Authorization: Bearer {{proxy-key}}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"model": "gpt-5.2",
|
||||
"max_tokens": 20,
|
||||
"stream": false,
|
||||
"temperature": 0,
|
||||
@@ -197,6 +224,23 @@ Content-Type: application/json
|
||||
]
|
||||
}
|
||||
|
||||
###
|
||||
# @name Proxy / Google AI -- OpenAI-Compat Image Generation
|
||||
POST {{proxy-host}}/proxy/google-ai/v1/chat/completions
|
||||
Authorization: Bearer {{proxy-key}}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gemini-2.5-flash-image",
|
||||
"stream": false,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Generate a flat vector-style illustration of a red fox reading a newspaper at a cafe table."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
###
|
||||
# @name Proxy / AWS Claude -- Native Completion
|
||||
POST {{proxy-host}}/proxy/aws/claude/v1/complete
|
||||
|
||||
@@ -434,6 +434,8 @@ export const config: Config = {
|
||||
"gpt4-32k",
|
||||
"gpt4-turbo",
|
||||
"gpt4o",
|
||||
"gpt5",
|
||||
"o-series",
|
||||
"claude",
|
||||
"claude-opus",
|
||||
"gemini-pro",
|
||||
@@ -450,6 +452,8 @@ export const config: Config = {
|
||||
"azure-gpt4-32k",
|
||||
"azure-gpt4-turbo",
|
||||
"azure-gpt4o",
|
||||
"azure-gpt5",
|
||||
"azure-o-series",
|
||||
]),
|
||||
rejectPhrases: parseCsv(getEnvWithDefault("REJECT_PHRASES", "")),
|
||||
rejectMessage: getEnvWithDefault(
|
||||
|
||||
@@ -17,6 +17,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
|
||||
"gpt4-32k": "GPT-4 32k",
|
||||
"gpt4-turbo": "GPT-4 Turbo",
|
||||
gpt4o: "GPT-4o",
|
||||
gpt5: "GPT-5",
|
||||
"o-series": "o-Series",
|
||||
"dall-e": "DALL-E",
|
||||
claude: "Claude (Sonnet)",
|
||||
"claude-opus": "Claude (Opus)",
|
||||
@@ -34,6 +36,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
|
||||
"azure-gpt4-32k": "Azure GPT-4 32k",
|
||||
"azure-gpt4-turbo": "Azure GPT-4 Turbo",
|
||||
"azure-gpt4o": "Azure GPT-4o",
|
||||
"azure-gpt5": "Azure GPT-5",
|
||||
"azure-o-series": "Azure o-Series",
|
||||
"azure-dall-e": "Azure DALL-E",
|
||||
};
|
||||
|
||||
|
||||
+28
-21
@@ -29,24 +29,18 @@ const getModelsResponse = () => {
|
||||
if (!config.anthropicKey) return { object: "list", data: [] };
|
||||
|
||||
const claudeVariants = [
|
||||
"claude-v1",
|
||||
"claude-v1-100k",
|
||||
"claude-instant-v1",
|
||||
"claude-instant-v1-100k",
|
||||
"claude-v1.3",
|
||||
"claude-v1.3-100k",
|
||||
"claude-v1.2",
|
||||
"claude-v1.0",
|
||||
"claude-instant-v1.1",
|
||||
"claude-instant-v1.1-100k",
|
||||
"claude-instant-v1.0",
|
||||
"claude-2",
|
||||
"claude-2.0",
|
||||
"claude-2.1",
|
||||
"claude-3-haiku-20240307",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
"claude-3-5-sonnet-20240620"
|
||||
"claude-sonnet-4-5",
|
||||
"claude-sonnet-4-5-20250929",
|
||||
"claude-haiku-4-5",
|
||||
"claude-haiku-4-5-20251001",
|
||||
"claude-opus-4-1",
|
||||
"claude-opus-4-1-20250805",
|
||||
"claude-opus-4-20250514",
|
||||
"claude-sonnet-4-20250514",
|
||||
"claude-3-5-haiku-20241022",
|
||||
"claude-3-5-haiku-latest",
|
||||
];
|
||||
|
||||
const models = claudeVariants.map((id) => ({
|
||||
@@ -230,7 +224,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware({
|
||||
* (claude-3 based models do not support the old text completion endpoint).
|
||||
*/
|
||||
const preprocessAnthropicTextRequest: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.startsWith("claude-3")) {
|
||||
if (requiresAnthropicMessagesApi(req.body.model)) {
|
||||
textToChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
nativeTextPreprocessor(req, res, next);
|
||||
@@ -255,7 +249,7 @@ const oaiToChatPreprocessor = createPreprocessorMiddleware({
|
||||
*/
|
||||
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
|
||||
maybeReassignModel(req);
|
||||
if (req.body.model?.includes("claude-3")) {
|
||||
if (requiresAnthropicMessagesApi(req.body.model)) {
|
||||
oaiToChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
oaiToTextPreprocessor(req, res, next);
|
||||
@@ -315,7 +309,8 @@ function handleAnthropicTextCompatRequest(
|
||||
const type = req.params.type;
|
||||
const action = req.params.action;
|
||||
const alreadyInChatFormat = Boolean(req.body.messages);
|
||||
const compatModel = `claude-3-${type}-20240229`;
|
||||
const compatModel =
|
||||
type === "opus" ? "claude-opus-4-1-20250805" : "claude-sonnet-4-5-20250929";
|
||||
req.log.info(
|
||||
{ type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
|
||||
"Handling Anthropic compatibility request"
|
||||
@@ -349,8 +344,20 @@ function handleAnthropicTextCompatRequest(
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
if (!model.startsWith("gpt-")) return;
|
||||
req.body.model = "claude-3-sonnet-20240229";
|
||||
const lower = String(model).toLowerCase();
|
||||
if (
|
||||
lower.startsWith("gpt-") ||
|
||||
lower.startsWith("o1") ||
|
||||
lower.startsWith("o3") ||
|
||||
lower.startsWith("o4") ||
|
||||
lower === "computer-use-preview"
|
||||
) {
|
||||
req.body.model = "claude-sonnet-4-5-20250929";
|
||||
}
|
||||
}
|
||||
|
||||
function requiresAnthropicMessagesApi(model?: string) {
|
||||
return /^claude-(?:3|sonnet|opus)/.test(model ?? "");
|
||||
}
|
||||
|
||||
export const anthropic = anthropicRouter;
|
||||
|
||||
+80
-13
@@ -20,6 +20,12 @@ import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatRe
|
||||
import { sendErrorToClient } from "./middleware/response/error-generator";
|
||||
|
||||
const LATEST_AWS_V2_MINOR_VERSION = "1";
|
||||
const AWS_CLAUDE_SONNET_45 = "anthropic.claude-sonnet-4-5-20250929-v1:0";
|
||||
const AWS_CLAUDE_HAIKU_45 = "anthropic.claude-haiku-4-5-20251001-v1:0";
|
||||
const AWS_CLAUDE_OPUS_41 = "anthropic.claude-opus-4-1-20250805-v1:0";
|
||||
const AWS_CLAUDE_SONNET_4 = "anthropic.claude-sonnet-4-20250514-v1:0";
|
||||
const AWS_CLAUDE_OPUS_4 = "anthropic.claude-opus-4-20250514-v1:0";
|
||||
const AWS_CLAUDE_35_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0";
|
||||
|
||||
let modelsCache: any = null;
|
||||
let modelsCacheTime = 0;
|
||||
@@ -35,10 +41,12 @@ const getModelsResponse = () => {
|
||||
const variants = [
|
||||
"anthropic.claude-v2",
|
||||
"anthropic.claude-v2:1",
|
||||
"anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
"anthropic.claude-3-opus-20240229-v1:0",
|
||||
AWS_CLAUDE_HAIKU_45,
|
||||
AWS_CLAUDE_SONNET_45,
|
||||
AWS_CLAUDE_OPUS_41,
|
||||
AWS_CLAUDE_35_HAIKU,
|
||||
AWS_CLAUDE_SONNET_4,
|
||||
AWS_CLAUDE_OPUS_4,
|
||||
];
|
||||
|
||||
const models = variants.map((id) => ({
|
||||
@@ -164,7 +172,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware(
|
||||
* (claude-3 based models do not support the old text completion endpoint).
|
||||
*/
|
||||
const preprocessAwsTextRequest: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.includes("claude-3")) {
|
||||
if (requiresAnthropicMessagesApi(req.body.model)) {
|
||||
textToChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
nativeTextPreprocessor(req, res, next);
|
||||
@@ -186,7 +194,7 @@ const oaiToAwsChatPreprocessor = createPreprocessorMiddleware(
|
||||
* or the new Claude chat completion endpoint, based on the requested model.
|
||||
*/
|
||||
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.includes("claude-3")) {
|
||||
if (requiresAnthropicMessagesApi(req.body.model)) {
|
||||
oaiToAwsChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
oaiToAwsTextPreprocessor(req, res, next);
|
||||
@@ -241,12 +249,65 @@ awsRouter.post(
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
const lower = String(model).toLowerCase();
|
||||
|
||||
// If it looks like an AWS model, use it as-is
|
||||
if (model.includes("anthropic.claude")) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
|
||||
req.body.model = AWS_CLAUDE_OPUS_41;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
|
||||
req.body.model = AWS_CLAUDE_OPUS_41;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("opus-4")) {
|
||||
req.body.model = AWS_CLAUDE_OPUS_4;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
|
||||
req.body.model = AWS_CLAUDE_HAIKU_45;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
|
||||
req.body.model = AWS_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("sonnet-4")) {
|
||||
req.body.model = AWS_CLAUDE_SONNET_4;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("3-5") && lower.includes("haiku")) {
|
||||
req.body.model = AWS_CLAUDE_35_HAIKU;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("opus")) {
|
||||
req.body.model = AWS_CLAUDE_OPUS_41;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("haiku")) {
|
||||
req.body.model = AWS_CLAUDE_HAIKU_45;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("sonnet")) {
|
||||
req.body.model = AWS_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
lower.startsWith("gpt-") ||
|
||||
lower.startsWith("o1") ||
|
||||
lower.startsWith("o3") ||
|
||||
lower.startsWith("o4") ||
|
||||
lower === "computer-use-preview"
|
||||
) {
|
||||
req.body.model = AWS_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
|
||||
// Anthropic model names can look like:
|
||||
// - claude-v1
|
||||
// - claude-2.1
|
||||
@@ -282,20 +343,22 @@ function maybeReassignModel(req: Request) {
|
||||
case "3":
|
||||
case "3.0":
|
||||
if (name.includes("opus")) {
|
||||
req.body.model = "anthropic.claude-3-opus-20240229-v1:0";
|
||||
req.body.model = AWS_CLAUDE_OPUS_41;
|
||||
} else if (name.includes("haiku")) {
|
||||
req.body.model = "anthropic.claude-3-haiku-20240307-v1:0";
|
||||
req.body.model = AWS_CLAUDE_HAIKU_45;
|
||||
} else {
|
||||
req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
|
||||
req.body.model = AWS_CLAUDE_SONNET_45;
|
||||
}
|
||||
return;
|
||||
case "3.5":
|
||||
req.body.model = "anthropic.claude-3-5-sonnet-20240620-v1:0";
|
||||
req.body.model = name.includes("haiku")
|
||||
? AWS_CLAUDE_35_HAIKU
|
||||
: AWS_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback to Claude 2.1
|
||||
req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
|
||||
// Fallback to Claude Sonnet 4
|
||||
req.body.model = AWS_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -306,7 +369,7 @@ export function handleCompatibilityRequest(
|
||||
) {
|
||||
const action = req.params.action;
|
||||
const alreadyInChatFormat = Boolean(req.body.messages);
|
||||
const compatModel = "anthropic.claude-3-5-sonnet-20240620-v1:0";
|
||||
const compatModel = AWS_CLAUDE_SONNET_4;
|
||||
req.log.info(
|
||||
{ inputModel: req.body.model, compatModel, alreadyInChatFormat },
|
||||
"Handling AWS compatibility request"
|
||||
@@ -335,3 +398,7 @@ export function handleCompatibilityRequest(
|
||||
}
|
||||
|
||||
export const aws = awsRouter;
|
||||
|
||||
function requiresAnthropicMessagesApi(model?: string) {
|
||||
return /claude-(?:3|sonnet|opus)/.test(model ?? "");
|
||||
}
|
||||
|
||||
+26
-2
@@ -32,16 +32,25 @@ function getModelsResponse() {
|
||||
}
|
||||
|
||||
let available = new Set<AzureOpenAIModelFamily>();
|
||||
const availableModelIds = new Set<string>();
|
||||
for (const key of keyPool.list()) {
|
||||
if (key.isDisabled || key.service !== "azure") continue;
|
||||
const azureKey = key as any;
|
||||
key.modelFamilies.forEach((family) =>
|
||||
available.add(family as AzureOpenAIModelFamily)
|
||||
);
|
||||
azureKey.modelIds?.forEach((id: string) => availableModelIds.add(id));
|
||||
}
|
||||
const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
|
||||
available = new Set([...available].filter((x) => allowed.has(x)));
|
||||
|
||||
const models = KNOWN_OPENAI_MODELS.map((id) => ({
|
||||
const usingExactModelIds = availableModelIds.size > 0;
|
||||
|
||||
const sourceModels = usingExactModelIds
|
||||
? [...new Set([...KNOWN_OPENAI_MODELS, ...availableModelIds])]
|
||||
: KNOWN_OPENAI_MODELS;
|
||||
|
||||
const models = sourceModels.map((id) => ({
|
||||
id,
|
||||
object: "model",
|
||||
created: new Date().getTime(),
|
||||
@@ -58,7 +67,12 @@ function getModelsResponse() {
|
||||
],
|
||||
root: id,
|
||||
parent: null,
|
||||
})).filter((model) => available.has(getAzureOpenAIModelFamily(model.id)));
|
||||
})).filter((model) => {
|
||||
if (usingExactModelIds) {
|
||||
return availableModelIds.has(model.id);
|
||||
}
|
||||
return available.has(getAzureOpenAIModelFamily(model.id));
|
||||
});
|
||||
|
||||
modelsCache = { object: "list", data: models };
|
||||
modelsCacheTime = new Date().getTime();
|
||||
@@ -115,6 +129,16 @@ azureOpenAIRouter.post(
|
||||
}),
|
||||
azureOpenAIProxy
|
||||
);
|
||||
azureOpenAIRouter.post(
|
||||
"/v1/responses",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "openai-responses",
|
||||
outApi: "openai-responses",
|
||||
service: "azure",
|
||||
}),
|
||||
azureOpenAIProxy
|
||||
);
|
||||
azureOpenAIRouter.post(
|
||||
"/v1/images/generations",
|
||||
ipLimiter,
|
||||
|
||||
+74
-12
@@ -19,7 +19,12 @@ import {
|
||||
import { transformAnthropicChatResponseToOpenAI } from "./anthropic";
|
||||
import { sendErrorToClient } from "./middleware/response/error-generator";
|
||||
|
||||
const LATEST_GCP_SONNET_MINOR_VERSION = "20240229";
|
||||
const GCP_CLAUDE_SONNET_45 = "claude-sonnet-4-5@20250929";
|
||||
const GCP_CLAUDE_HAIKU_45 = "claude-haiku-4-5@20251001";
|
||||
const GCP_CLAUDE_OPUS_41 = "claude-opus-4-1@20250805";
|
||||
const GCP_CLAUDE_SONNET_4 = "claude-sonnet-4@20250514";
|
||||
const GCP_CLAUDE_OPUS_4 = "claude-opus-4@20250514";
|
||||
const GCP_CLAUDE_35_HAIKU = "claude-3-5-haiku@20241022";
|
||||
|
||||
let modelsCache: any = null;
|
||||
let modelsCacheTime = 0;
|
||||
@@ -33,10 +38,12 @@ const getModelsResponse = () => {
|
||||
|
||||
// https://docs.anthropic.com/en/docs/about-claude/models
|
||||
const variants = [
|
||||
"claude-3-haiku@20240307",
|
||||
"claude-3-sonnet@20240229",
|
||||
"claude-3-opus@20240229",
|
||||
"claude-3-5-sonnet@20240620",
|
||||
GCP_CLAUDE_HAIKU_45,
|
||||
GCP_CLAUDE_SONNET_45,
|
||||
GCP_CLAUDE_OPUS_41,
|
||||
GCP_CLAUDE_35_HAIKU,
|
||||
GCP_CLAUDE_SONNET_4,
|
||||
GCP_CLAUDE_OPUS_4,
|
||||
];
|
||||
|
||||
const models = variants.map((id) => ({
|
||||
@@ -147,6 +154,7 @@ gcpRouter.post(
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
const lower = String(model).toLowerCase();
|
||||
|
||||
// If it looks like an GCP model, use it as-is
|
||||
// if (model.includes("anthropic.claude")) {
|
||||
@@ -154,6 +162,58 @@ function maybeReassignModel(req: Request) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
|
||||
req.body.model = GCP_CLAUDE_OPUS_41;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
|
||||
req.body.model = GCP_CLAUDE_OPUS_41;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("opus-4")) {
|
||||
req.body.model = GCP_CLAUDE_OPUS_4;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
|
||||
req.body.model = GCP_CLAUDE_HAIKU_45;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
|
||||
req.body.model = GCP_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("sonnet-4")) {
|
||||
req.body.model = GCP_CLAUDE_SONNET_4;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("3-5") && lower.includes("haiku")) {
|
||||
req.body.model = GCP_CLAUDE_35_HAIKU;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("opus")) {
|
||||
req.body.model = GCP_CLAUDE_OPUS_41;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("haiku")) {
|
||||
req.body.model = GCP_CLAUDE_HAIKU_45;
|
||||
return;
|
||||
}
|
||||
if (lower.includes("sonnet")) {
|
||||
req.body.model = GCP_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
lower.startsWith("gpt-") ||
|
||||
lower.startsWith("o1") ||
|
||||
lower.startsWith("o3") ||
|
||||
lower.startsWith("o4") ||
|
||||
lower === "computer-use-preview"
|
||||
) {
|
||||
req.body.model = GCP_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
|
||||
// Anthropic model names can look like:
|
||||
// - claude-v1
|
||||
// - claude-2.1
|
||||
@@ -165,7 +225,7 @@ function maybeReassignModel(req: Request) {
|
||||
// If there's no match, fallback to Claude3 Sonnet as it is most likely to be
|
||||
// available on GCP.
|
||||
if (!match) {
|
||||
req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`;
|
||||
req.body.model = GCP_CLAUDE_SONNET_4;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -176,20 +236,22 @@ function maybeReassignModel(req: Request) {
|
||||
case "3":
|
||||
case "3.0":
|
||||
if (name.includes("opus")) {
|
||||
req.body.model = "claude-3-opus@20240229";
|
||||
req.body.model = GCP_CLAUDE_OPUS_41;
|
||||
} else if (name.includes("haiku")) {
|
||||
req.body.model = "claude-3-haiku@20240307";
|
||||
req.body.model = GCP_CLAUDE_HAIKU_45;
|
||||
} else {
|
||||
req.body.model = "claude-3-sonnet@20240229";
|
||||
req.body.model = GCP_CLAUDE_SONNET_45;
|
||||
}
|
||||
return;
|
||||
case "3.5":
|
||||
req.body.model = "claude-3-5-sonnet@20240620";
|
||||
req.body.model = name.includes("haiku")
|
||||
? GCP_CLAUDE_35_HAIKU
|
||||
: GCP_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback to Claude3 Sonnet
|
||||
req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`;
|
||||
// Fallback to Claude Sonnet 4
|
||||
req.body.model = GCP_CLAUDE_SONNET_45;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
+23
-7
@@ -16,6 +16,11 @@ import {
|
||||
ProxyResHandlerWithBody,
|
||||
} from "./middleware/response";
|
||||
import { addGoogleAIKey } from "./middleware/request/preprocessors/add-google-ai-key";
|
||||
import { BadRequestError } from "../shared/errors";
|
||||
import {
|
||||
flattenGoogleAIContentParts,
|
||||
isGoogleAIImageModel,
|
||||
} from "../shared/api-schemas";
|
||||
|
||||
let modelsCache: any = null;
|
||||
let modelsCacheTime = 0;
|
||||
@@ -31,10 +36,15 @@ const getModelsResponse = () => {
|
||||
if (!config.googleAIKey) return { object: "list", data: [] };
|
||||
|
||||
const googleAIVariants = [
|
||||
"gemini-pro",
|
||||
"gemini-1.0-pro",
|
||||
"gemini-2.5-pro",
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.5-flash-lite",
|
||||
"gemini-2.5-flash-image",
|
||||
"gemini-3-pro-image-preview",
|
||||
"gemini-2.0-flash-preview-image-generation",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-1.5-pro",
|
||||
"gemini-1.5-pro-latest",
|
||||
"gemini-1.5-flash",
|
||||
];
|
||||
|
||||
const models = googleAIVariants.map((id) => ({
|
||||
@@ -83,7 +93,8 @@ function transformGoogleAIResponse(
|
||||
): Record<string, any> {
|
||||
const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
|
||||
const parts = resBody.candidates[0].content?.parts ?? [{ text: "" }];
|
||||
const content = parts[0].text.replace(/^(.{0,50}?): /, () => "");
|
||||
const content = flattenGoogleAIContentParts(parts)
|
||||
.replace(/^(.{0,50}?): /, () => "");
|
||||
return {
|
||||
id: "goo-" + v4(),
|
||||
object: "chat.completion",
|
||||
@@ -136,14 +147,19 @@ googleAIRouter.post(
|
||||
googleAIProxy
|
||||
);
|
||||
|
||||
/** Replaces requests for non-Google AI models with gemini-pro-1.5-latest. */
|
||||
/** Replaces requests for non-Google AI models with Gemini 2.5 Flash. */
|
||||
function maybeReassignModel(req: Request) {
|
||||
const requested = req.body.model;
|
||||
if (requested.includes("gemini")) {
|
||||
if (req.body.stream && isGoogleAIImageModel(requested)) {
|
||||
throw new BadRequestError(
|
||||
"Streaming Gemini image-generation models is not currently supported by this proxy. Retry without `stream: true`."
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
req.log.info({ requested }, "Reassigning model to gemini-pro-1.5-latest");
|
||||
req.body.model = "gemini-pro-1.5-latest";
|
||||
req.log.info({ requested }, "Reassigning model to gemini-2.5-flash");
|
||||
req.body.model = "gemini-2.5-flash";
|
||||
}
|
||||
|
||||
export const googleAI = googleAIRouter;
|
||||
|
||||
@@ -5,10 +5,15 @@ import { ZodError } from "zod";
|
||||
import { generateErrorMessage } from "zod-error";
|
||||
import { HttpError } from "../../shared/errors";
|
||||
import { assertNever } from "../../shared/utils";
|
||||
import {
|
||||
flattenGoogleAIContentParts,
|
||||
flattenOpenAIResponsesOutput,
|
||||
} from "../../shared/api-schemas";
|
||||
import { QuotaExceededError } from "./request/preprocessors/apply-quota-limits";
|
||||
import { sendErrorToClient } from "./response/error-generator";
|
||||
|
||||
const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
|
||||
const OPENAI_RESPONSES_ENDPOINT = "/v1/responses";
|
||||
const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
|
||||
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
|
||||
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
|
||||
@@ -22,6 +27,7 @@ export function isTextGenerationRequest(req: Request) {
|
||||
req.method === "POST" &&
|
||||
[
|
||||
OPENAI_CHAT_COMPLETION_ENDPOINT,
|
||||
OPENAI_RESPONSES_ENDPOINT,
|
||||
OPENAI_TEXT_COMPLETION_ENDPOINT,
|
||||
ANTHROPIC_COMPLETION_ENDPOINT,
|
||||
ANTHROPIC_MESSAGES_ENDPOINT,
|
||||
@@ -224,6 +230,8 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
|
||||
// Can be null if the model wants to invoke tools rather than return a
|
||||
// completion.
|
||||
return body.choices[0].message.content || "";
|
||||
case "openai-responses":
|
||||
return flattenOpenAIResponsesOutput(body);
|
||||
case "openai-text":
|
||||
return body.choices[0].text;
|
||||
case "anthropic-chat":
|
||||
@@ -252,7 +260,7 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
|
||||
if ("choices" in body) {
|
||||
return body.choices[0].message.content;
|
||||
}
|
||||
return body.candidates[0].content.parts[0].text;
|
||||
return flattenGoogleAIContentParts(body.candidates?.[0]?.content?.parts);
|
||||
case "openai-image":
|
||||
return body.data?.map((item: any) => item.url).join("\n");
|
||||
default:
|
||||
@@ -267,6 +275,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
|
||||
case "openai-text":
|
||||
case "mistral-ai":
|
||||
return body.model;
|
||||
case "openai-responses":
|
||||
return body.model || req.body.model;
|
||||
case "openai-image":
|
||||
return req.body.model;
|
||||
case "anthropic-chat":
|
||||
|
||||
@@ -47,6 +47,7 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
|
||||
assignedKey = keyPool.get("dall-e-3", service);
|
||||
break;
|
||||
case "openai":
|
||||
case "openai-responses":
|
||||
case "google-ai":
|
||||
case "mistral-ai":
|
||||
throw new Error(
|
||||
@@ -109,9 +110,10 @@ export const addKeyForEmbeddingsRequest: HPMRequestCallback = (
|
||||
throw new Error("Embeddings requests must be from OpenAI");
|
||||
}
|
||||
|
||||
req.body = { input: req.body.input, model: "text-embedding-ada-002" };
|
||||
const model = req.body.model || "text-embedding-3-small";
|
||||
req.body = { input: req.body.input, model };
|
||||
|
||||
const key = keyPool.get("text-embedding-ada-002", "openai") as OpenAIKey;
|
||||
const key = keyPool.get(model, "openai") as OpenAIKey;
|
||||
|
||||
req.key = key;
|
||||
req.log.info(
|
||||
|
||||
@@ -6,7 +6,7 @@ import {
|
||||
import { RequestPreprocessor } from "../index";
|
||||
|
||||
export const addAzureKey: RequestPreprocessor = (req) => {
|
||||
const validAPIs: APIFormat[] = ["openai", "openai-image"];
|
||||
const validAPIs: APIFormat[] = ["openai", "openai-responses", "openai-image"];
|
||||
const apisValid = [req.outboundApi, req.inboundApi].every((api) =>
|
||||
validAPIs.includes(api)
|
||||
);
|
||||
@@ -50,6 +50,23 @@ export const addAzureKey: RequestPreprocessor = (req) => {
|
||||
const cred = req.key as AzureOpenAIKey;
|
||||
const { resourceName, deploymentId, apiKey } = getCredentialsFromKey(cred);
|
||||
|
||||
if (req.outboundApi === "openai-responses") {
|
||||
req.body.model = deploymentId;
|
||||
req.signedRequest = {
|
||||
method: "POST",
|
||||
protocol: "https:",
|
||||
hostname: `${resourceName}.openai.azure.com`,
|
||||
path: `/openai/v1/responses?api-version=preview`,
|
||||
headers: {
|
||||
["host"]: `${resourceName}.openai.azure.com`,
|
||||
["content-type"]: "application/json",
|
||||
["api-key"]: apiKey,
|
||||
},
|
||||
body: JSON.stringify(req.body),
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
const operation =
|
||||
req.outboundApi === "openai" ? "/chat/completions" : "/images/generations";
|
||||
const apiVersion =
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
GoogleAIChatMessage,
|
||||
MistralAIChatMessage,
|
||||
OpenAIChatMessage,
|
||||
flattenOpenAIResponsesInput,
|
||||
} from "../../../../shared/api-schemas";
|
||||
|
||||
/**
|
||||
@@ -18,11 +19,23 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
|
||||
|
||||
switch (service) {
|
||||
case "openai": {
|
||||
req.outputTokens = req.body.max_tokens;
|
||||
req.outputTokens =
|
||||
req.body.max_completion_tokens ?? req.body.max_tokens ?? 0;
|
||||
const prompt: OpenAIChatMessage[] = req.body.messages;
|
||||
result = await countTokens({ req, prompt, service });
|
||||
break;
|
||||
}
|
||||
case "openai-responses": {
|
||||
req.outputTokens = req.body.max_output_tokens ?? 0;
|
||||
const prompt = [
|
||||
flattenOpenAIResponsesInput(req.body.instructions),
|
||||
flattenOpenAIResponsesInput(req.body.input),
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n\n");
|
||||
result = await countTokens({ req, prompt, service });
|
||||
break;
|
||||
}
|
||||
case "openai-text": {
|
||||
req.outputTokens = req.body.max_tokens;
|
||||
const prompt: string = req.body.prompt;
|
||||
|
||||
@@ -4,8 +4,10 @@ import { assertNever } from "../../../../shared/utils";
|
||||
import { RequestPreprocessor } from "../index";
|
||||
import { BadRequestError } from "../../../../shared/errors";
|
||||
import {
|
||||
GoogleAIChatMessage,
|
||||
MistralAIChatMessage,
|
||||
OpenAIChatMessage,
|
||||
flattenOpenAIResponsesInput,
|
||||
flattenAnthropicMessages,
|
||||
} from "../../../../shared/api-schemas";
|
||||
|
||||
@@ -72,11 +74,27 @@ function getPromptFromRequest(req: Request) {
|
||||
return `${msg.role}: ${text}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
case "openai-responses":
|
||||
return [
|
||||
flattenOpenAIResponsesInput(body.instructions),
|
||||
flattenOpenAIResponsesInput(body.input),
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n\n");
|
||||
case "openai-text":
|
||||
case "openai-image":
|
||||
return body.prompt;
|
||||
case "google-ai":
|
||||
return body.prompt.text;
|
||||
return body.contents
|
||||
.map(({ parts, role }: GoogleAIChatMessage) => {
|
||||
const text = parts
|
||||
.map((part: any) =>
|
||||
"text" in part ? part.text : "[image omitted]"
|
||||
)
|
||||
.join("\n");
|
||||
return `${role}: ${text}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
default:
|
||||
assertNever(service);
|
||||
}
|
||||
|
||||
@@ -6,8 +6,8 @@ import { RequestPreprocessor } from "../index";
|
||||
|
||||
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
|
||||
const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
|
||||
const GOOGLE_AI_MAX_CONTEXT = 32000;
|
||||
const MISTRAL_AI_MAX_CONTENT = 32768;
|
||||
const GOOGLE_AI_MAX_CONTEXT = 1048576;
|
||||
const MISTRAL_AI_MAX_CONTENT = 256000;
|
||||
|
||||
/**
|
||||
* Assigns `req.promptTokens` and `req.outputTokens` based on the request body
|
||||
@@ -26,6 +26,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
let proxyMax: number;
|
||||
switch (req.outboundApi) {
|
||||
case "openai":
|
||||
case "openai-responses":
|
||||
case "openai-text":
|
||||
proxyMax = OPENAI_MAX_CONTEXT;
|
||||
break;
|
||||
@@ -54,6 +55,12 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
let modelMax: number;
|
||||
if (model.match(/gpt-3.5-turbo-16k/)) {
|
||||
modelMax = 16384;
|
||||
} else if (model.match(/^gpt-5(\.|-|\b)/)) {
|
||||
modelMax = 1050000;
|
||||
} else if (model.match(/^o\d/)) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^gpt-4\.1/)) {
|
||||
modelMax = 1047576;
|
||||
} else if (model.match(/^gpt-4o/)) {
|
||||
modelMax = 128000;
|
||||
} else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
|
||||
@@ -80,12 +87,27 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^claude-3/)) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^claude-(opus|sonnet|haiku)-4/)) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^gemini-\d{3}$/)) {
|
||||
modelMax = GOOGLE_AI_MAX_CONTEXT;
|
||||
} else if (model.match(/^gemini-(2\.5|2\.0)/)) {
|
||||
modelMax = 1048576;
|
||||
} else if (model.match(/^mistral-(tiny|small|medium)$/)) {
|
||||
modelMax = MISTRAL_AI_MAX_CONTENT;
|
||||
} else if (
|
||||
model.match(
|
||||
/^(mistral|ministral|magistral|pixtral|codestral|devstral|voxtral)-/
|
||||
)
|
||||
) {
|
||||
modelMax = MISTRAL_AI_MAX_CONTENT;
|
||||
} else if (model.match(/^anthropic\.claude-3/)) {
|
||||
modelMax = 200000;
|
||||
} else if (
|
||||
model.match(/^anthropic\.claude-(opus|sonnet|haiku)-4/) ||
|
||||
model.match(/^claude-(opus|sonnet|haiku)-4@/)
|
||||
) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^anthropic\.claude-v2:\d/)) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^anthropic\.claude/)) {
|
||||
@@ -121,8 +143,8 @@ function assertRequestHasTokenCounts(
|
||||
req: Request
|
||||
): asserts req is Request & { promptTokens: number; outputTokens: number } {
|
||||
z.object({
|
||||
promptTokens: z.number().int().min(1),
|
||||
outputTokens: z.number().int().min(1),
|
||||
promptTokens: z.number().int().min(0),
|
||||
outputTokens: z.number().int().min(0),
|
||||
})
|
||||
.nonstrict()
|
||||
.parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
|
||||
|
||||
@@ -3,6 +3,7 @@ import { assertNever } from "../../../../shared/utils";
|
||||
import { RequestPreprocessor } from "../index";
|
||||
import { containsImageContent as containsImageContentOpenAI } from "../../../../shared/api-schemas/openai";
|
||||
import { containsImageContent as containsImageContentAnthropic } from "../../../../shared/api-schemas/anthropic";
|
||||
import { containsOpenAIResponsesImageInput } from "../../../../shared/api-schemas";
|
||||
import { ForbiddenError } from "../../../../shared/errors";
|
||||
|
||||
/**
|
||||
@@ -22,11 +23,20 @@ export const validateVision: RequestPreprocessor = async (req) => {
|
||||
case "openai":
|
||||
hasImage = containsImageContentOpenAI(req.body.messages);
|
||||
break;
|
||||
case "openai-responses":
|
||||
hasImage =
|
||||
containsOpenAIResponsesImageInput(req.body.instructions) ||
|
||||
containsOpenAIResponsesImageInput(req.body.input);
|
||||
break;
|
||||
case "anthropic-chat":
|
||||
hasImage = containsImageContentAnthropic(req.body.messages);
|
||||
break;
|
||||
case "anthropic-text":
|
||||
case "google-ai":
|
||||
hasImage = req.body.contents?.some((message: { parts: any[] }) =>
|
||||
message.parts?.some((part) => "inline_data" in part)
|
||||
);
|
||||
break;
|
||||
case "anthropic-text":
|
||||
case "mistral-ai":
|
||||
case "openai-image":
|
||||
case "openai-text":
|
||||
|
||||
@@ -72,7 +72,15 @@ type ErrorGeneratorOptions = {
|
||||
};
|
||||
|
||||
export function tryInferFormat(body: any): APIFormat | "unknown" {
|
||||
if (typeof body !== "object" || !body.model) {
|
||||
if (typeof body !== "object") {
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
if (body.object === "response" || Array.isArray(body.output)) {
|
||||
return "openai-responses";
|
||||
}
|
||||
|
||||
if (!body.model) {
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
@@ -158,7 +166,30 @@ export function buildSpoofedCompletion({
|
||||
|
||||
switch (format) {
|
||||
case "openai":
|
||||
case "openai-responses":
|
||||
case "mistral-ai":
|
||||
if (format === "openai-responses") {
|
||||
return {
|
||||
id: "error-" + id,
|
||||
object: "response",
|
||||
created_at: Math.floor(Date.now() / 1000),
|
||||
model,
|
||||
status: "completed",
|
||||
error: null,
|
||||
incomplete_details: null,
|
||||
output_text: content,
|
||||
output: [
|
||||
{
|
||||
id: "msg-error-" + id,
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
status: "completed",
|
||||
content: [{ type: "output_text", text: content, annotations: [] }],
|
||||
},
|
||||
],
|
||||
usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
|
||||
};
|
||||
}
|
||||
return {
|
||||
id: "error-" + id,
|
||||
object: "chat.completion",
|
||||
@@ -248,7 +279,23 @@ export function buildSpoofedSSE({
|
||||
|
||||
switch (format) {
|
||||
case "openai":
|
||||
case "openai-responses":
|
||||
case "mistral-ai":
|
||||
if (format === "openai-responses") {
|
||||
return (
|
||||
`data: ${JSON.stringify({
|
||||
type: "response.completed",
|
||||
response: buildSpoofedCompletion({
|
||||
format,
|
||||
title,
|
||||
message,
|
||||
obj,
|
||||
reqId,
|
||||
model,
|
||||
}),
|
||||
})}\n\n`
|
||||
);
|
||||
}
|
||||
event = {
|
||||
id: "chatcmpl-" + id,
|
||||
object: "chat.completion.chunk",
|
||||
|
||||
@@ -11,6 +11,7 @@ import { ProxyResHandlerWithBody } from ".";
|
||||
import { assertNever } from "../../../shared/utils";
|
||||
import {
|
||||
AnthropicChatMessage,
|
||||
flattenOpenAIResponsesInput,
|
||||
flattenAnthropicMessages, GoogleAIChatMessage,
|
||||
MistralAIChatMessage,
|
||||
OpenAIChatMessage,
|
||||
@@ -62,6 +63,7 @@ const getPromptForRequest = (
|
||||
):
|
||||
| string
|
||||
| OpenAIChatMessage[]
|
||||
| { instructions?: unknown; input?: unknown }
|
||||
| { contents: GoogleAIChatMessage[] }
|
||||
| { system: string; messages: AnthropicChatMessage[] }
|
||||
| MistralAIChatMessage[]
|
||||
@@ -73,6 +75,11 @@ const getPromptForRequest = (
|
||||
case "openai":
|
||||
case "mistral-ai":
|
||||
return req.body.messages;
|
||||
case "openai-responses":
|
||||
return {
|
||||
instructions: req.body.instructions,
|
||||
input: req.body.input,
|
||||
};
|
||||
case "anthropic-chat":
|
||||
return { system: req.body.system, messages: req.body.messages };
|
||||
case "openai-text":
|
||||
@@ -99,6 +106,7 @@ const flattenMessages = (
|
||||
| string
|
||||
| OaiImageResult
|
||||
| OpenAIChatMessage[]
|
||||
| { instructions?: unknown; input?: unknown }
|
||||
| { contents: GoogleAIChatMessage[] }
|
||||
| { system: string; messages: AnthropicChatMessage[] }
|
||||
| MistralAIChatMessage[]
|
||||
@@ -114,12 +122,20 @@ const flattenMessages = (
|
||||
return val.contents
|
||||
.map(({ parts, role }) => {
|
||||
const text = parts
|
||||
.map((p) => p.text)
|
||||
.map((p: any) => ("text" in p ? p.text : "(( Attached Image ))"))
|
||||
.join("\n");
|
||||
return `${role}: ${text}`;
|
||||
})
|
||||
.join("\n");
|
||||
}
|
||||
if (isOpenAIResponsesPrompt(val)) {
|
||||
return [
|
||||
flattenOpenAIResponsesInput(val.instructions),
|
||||
flattenOpenAIResponsesInput(val.input),
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n\n");
|
||||
}
|
||||
if (Array.isArray(val)) {
|
||||
return val
|
||||
.map(({ content, role }) => {
|
||||
@@ -140,6 +156,16 @@ const flattenMessages = (
|
||||
return val.prompt.trim();
|
||||
};
|
||||
|
||||
function isOpenAIResponsesPrompt(
|
||||
val: unknown
|
||||
): val is { instructions?: unknown; input?: unknown } {
|
||||
return (
|
||||
typeof val === "object" &&
|
||||
val !== null &&
|
||||
("instructions" in val || "input" in val)
|
||||
);
|
||||
}
|
||||
|
||||
function isGoogleAIChatPrompt(
|
||||
val: unknown
|
||||
): val is { contents: GoogleAIChatMessage[] } {
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
mergeEventsForOpenAIText,
|
||||
AnthropicV2StreamEvent,
|
||||
OpenAIChatCompletionStreamEvent,
|
||||
OpenAIResponsesStreamEvent,
|
||||
} from "./index";
|
||||
|
||||
/**
|
||||
@@ -17,13 +18,36 @@ import {
|
||||
export class EventAggregator {
|
||||
private readonly format: APIFormat;
|
||||
private readonly events: OpenAIChatCompletionStreamEvent[];
|
||||
private responseBody: Record<string, any> | null;
|
||||
private responseEventCount: number;
|
||||
private responseOutputText: string;
|
||||
|
||||
constructor({ format }: { format: APIFormat }) {
|
||||
this.events = [];
|
||||
this.format = format;
|
||||
this.responseBody = null;
|
||||
this.responseEventCount = 0;
|
||||
this.responseOutputText = "";
|
||||
}
|
||||
|
||||
addEvent(
|
||||
event:
|
||||
| OpenAIChatCompletionStreamEvent
|
||||
| AnthropicV2StreamEvent
|
||||
| OpenAIResponsesStreamEvent
|
||||
) {
|
||||
if (eventIsOpenAIResponsesEvent(event)) {
|
||||
this.responseEventCount++;
|
||||
if (event.response && typeof event.response === "object") {
|
||||
this.responseBody = event.response;
|
||||
}
|
||||
|
||||
if (event.type === "response.output_text.delta") {
|
||||
this.responseOutputText += event.delta || event.text || "";
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) {
|
||||
if (eventIsOpenAIEvent(event)) {
|
||||
this.events.push(event);
|
||||
} else {
|
||||
@@ -52,8 +76,15 @@ export class EventAggregator {
|
||||
getFinalResponse() {
|
||||
switch (this.format) {
|
||||
case "openai":
|
||||
case "openai-responses":
|
||||
case "google-ai":
|
||||
case "mistral-ai":
|
||||
if (this.format === "openai-responses") {
|
||||
if (this.responseBody) {
|
||||
return this.responseBody;
|
||||
}
|
||||
return { output_text: this.responseOutputText };
|
||||
}
|
||||
return mergeEventsForOpenAIChat(this.events);
|
||||
case "openai-text":
|
||||
return mergeEventsForOpenAIText(this.events);
|
||||
@@ -69,7 +100,7 @@ export class EventAggregator {
|
||||
}
|
||||
|
||||
hasEvents() {
|
||||
return this.events.length > 0;
|
||||
return this.events.length > 0 || this.responseEventCount > 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,3 +109,9 @@ function eventIsOpenAIEvent(
|
||||
): event is OpenAIChatCompletionStreamEvent {
|
||||
return event?.object === "chat.completion.chunk";
|
||||
}
|
||||
|
||||
function eventIsOpenAIResponsesEvent(
|
||||
event: any
|
||||
): event is OpenAIResponsesStreamEvent {
|
||||
return typeof event?.type === "string" && event.type.startsWith("response.");
|
||||
}
|
||||
|
||||
@@ -26,6 +26,14 @@ export type OpenAIChatCompletionStreamEvent = {
|
||||
}[];
|
||||
};
|
||||
|
||||
export type OpenAIResponsesStreamEvent = {
|
||||
type: string;
|
||||
response?: Record<string, any>;
|
||||
delta?: string;
|
||||
text?: string;
|
||||
[key: string]: any;
|
||||
};
|
||||
|
||||
export type StreamingCompletionTransformer<
|
||||
T = OpenAIChatCompletionStreamEvent,
|
||||
S = any,
|
||||
@@ -42,6 +50,7 @@ export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-ant
|
||||
export { anthropicChatToOpenAI } from "./transformers/anthropic-chat-to-openai";
|
||||
export { googleAIToOpenAI } from "./transformers/google-ai-to-openai";
|
||||
export { passthroughToOpenAI } from "./transformers/passthrough-to-openai";
|
||||
export { passthroughToOpenAIResponses } from "./transformers/passthrough-to-openai-responses";
|
||||
export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
|
||||
export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
|
||||
export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text";
|
||||
|
||||
@@ -10,8 +10,10 @@ import {
|
||||
anthropicV2ToOpenAI,
|
||||
googleAIToOpenAI,
|
||||
OpenAIChatCompletionStreamEvent,
|
||||
OpenAIResponsesStreamEvent,
|
||||
openAITextToOpenAIChat,
|
||||
passthroughToOpenAI,
|
||||
passthroughToOpenAIResponses,
|
||||
StreamingCompletionTransformer,
|
||||
} from "./index";
|
||||
|
||||
@@ -35,7 +37,9 @@ export class SSEMessageTransformer extends Transform {
|
||||
private readonly inputFormat: APIFormat;
|
||||
private readonly transformFn: StreamingCompletionTransformer<
|
||||
// TODO: Refactor transformers to not assume only OpenAI events as output
|
||||
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
|
||||
| OpenAIChatCompletionStreamEvent
|
||||
| AnthropicV2StreamEvent
|
||||
| OpenAIResponsesStreamEvent
|
||||
>;
|
||||
private readonly log;
|
||||
private readonly fallbackId: string;
|
||||
@@ -126,12 +130,14 @@ function getTransformer(
|
||||
// used for that case.
|
||||
requestApi: APIFormat = "openai"
|
||||
): StreamingCompletionTransformer<
|
||||
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
|
||||
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent | OpenAIResponsesStreamEvent
|
||||
> {
|
||||
switch (responseApi) {
|
||||
case "openai":
|
||||
case "mistral-ai":
|
||||
return passthroughToOpenAI;
|
||||
case "openai-responses":
|
||||
return passthroughToOpenAIResponses;
|
||||
case "openai-text":
|
||||
return openAITextToOpenAIChat;
|
||||
case "anthropic-text":
|
||||
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
import {
|
||||
OpenAIResponsesStreamEvent,
|
||||
SSEResponseTransformArgs,
|
||||
StreamingCompletionTransformer,
|
||||
} from "../index";
|
||||
import { parseEvent, ServerSentEvent } from "../parse-sse";
|
||||
import { logger } from "../../../../../logger";
|
||||
|
||||
const log = logger.child({
|
||||
module: "sse-transformer",
|
||||
transformer: "openai-responses-to-openai-responses",
|
||||
});
|
||||
|
||||
export const passthroughToOpenAIResponses: StreamingCompletionTransformer<
|
||||
OpenAIResponsesStreamEvent
|
||||
> = (
|
||||
params: SSEResponseTransformArgs
|
||||
) => {
|
||||
const { data } = params;
|
||||
|
||||
const rawEvent = parseEvent(data);
|
||||
if (!rawEvent.data || rawEvent.data === "[DONE]") {
|
||||
return { position: -1 };
|
||||
}
|
||||
|
||||
const responseEvent = asResponseEvent(rawEvent);
|
||||
if (!responseEvent) {
|
||||
return { position: -1 };
|
||||
}
|
||||
|
||||
return { position: -1, event: responseEvent };
|
||||
};
|
||||
|
||||
function asResponseEvent(
|
||||
event: ServerSentEvent
|
||||
): OpenAIResponsesStreamEvent | null {
|
||||
try {
|
||||
return JSON.parse(event.data) as OpenAIResponsesStreamEvent;
|
||||
} catch (error) {
|
||||
log.warn({ error: error.stack, event }, "Received invalid event");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
+20
-16
@@ -24,25 +24,29 @@ import {
|
||||
|
||||
// https://docs.mistral.ai/platform/endpoints
|
||||
export const KNOWN_MISTRAL_AI_MODELS = [
|
||||
// Mistral 7b (open weight, legacy)
|
||||
"open-mistral-7b",
|
||||
"mistral-tiny-2312",
|
||||
// Mixtral 8x7b (open weight, legacy)
|
||||
"open-mixtral-8x7b",
|
||||
"mistral-small-2312",
|
||||
// Mixtral Small (newer 8x7b, closed weight)
|
||||
"mistral-small-latest",
|
||||
"mistral-small-2402",
|
||||
// Mistral Medium
|
||||
"mistral-small-2603",
|
||||
"mistral-small-2506",
|
||||
"mistral-medium-latest",
|
||||
"mistral-medium-2312",
|
||||
// Mistral Large
|
||||
"mistral-medium-2508",
|
||||
"mistral-medium-2505",
|
||||
"magistral-medium-latest",
|
||||
"magistral-medium-2507",
|
||||
"magistral-small-2507",
|
||||
"mistral-large-latest",
|
||||
"mistral-large-2402",
|
||||
// Deprecated identifiers (2024-05-01)
|
||||
"mistral-tiny",
|
||||
"mistral-small",
|
||||
"mistral-medium",
|
||||
"mistral-large-2512",
|
||||
"ministral-14b-2512",
|
||||
"ministral-8b-latest",
|
||||
"ministral-8b-2512",
|
||||
"ministral-3b-latest",
|
||||
"ministral-3b-2512",
|
||||
"pixtral-large-latest",
|
||||
"pixtral-large-2411",
|
||||
"codestral-latest",
|
||||
"codestral-2508",
|
||||
"devstral-small-latest",
|
||||
"devstral-small-2507",
|
||||
"devstral-medium-2507",
|
||||
];
|
||||
|
||||
let modelsCache: any = null;
|
||||
|
||||
@@ -18,7 +18,13 @@ import {
|
||||
import { generateModelList } from "./openai";
|
||||
import { OpenAIImageGenerationResult } from "../shared/file-storage/mirror-generated-image";
|
||||
|
||||
const KNOWN_MODELS = ["dall-e-2", "dall-e-3"];
|
||||
const KNOWN_MODELS = [
|
||||
"dall-e-2",
|
||||
"dall-e-3",
|
||||
"gpt-image-1.5",
|
||||
"gpt-image-1",
|
||||
"gpt-image-1-mini",
|
||||
];
|
||||
|
||||
let modelListCache: any = null;
|
||||
let modelListValid = 0;
|
||||
|
||||
+63
-10
@@ -28,28 +28,57 @@ import {
|
||||
|
||||
// https://platform.openai.com/docs/models/overview
|
||||
export const KNOWN_OPENAI_MODELS = [
|
||||
"gpt-5.2",
|
||||
"gpt-5.2-chat",
|
||||
"gpt-5.2-chat-latest",
|
||||
"gpt-5.2-pro",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1",
|
||||
"gpt-5.1-chat",
|
||||
"gpt-5.1-codex",
|
||||
"gpt-5.1-codex-mini",
|
||||
"gpt-5.1-codex-max",
|
||||
"gpt-5",
|
||||
"gpt-5-chat",
|
||||
"gpt-5-pro",
|
||||
"gpt-5-codex",
|
||||
"gpt-5-mini",
|
||||
"gpt-5-nano",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-2025-04-14",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4.1-nano",
|
||||
"o3-pro",
|
||||
"o3-deep-research",
|
||||
"computer-use-preview",
|
||||
"o4-mini",
|
||||
"o4-mini-deep-research",
|
||||
"o3",
|
||||
"o3-mini",
|
||||
"o1",
|
||||
"o1-pro",
|
||||
"gpt-4o",
|
||||
"gpt-4o-2024-08-06",
|
||||
"gpt-4o-mini",
|
||||
"gpt-4o-2024-05-13",
|
||||
"gpt-4-turbo", // alias for latest gpt4-turbo stable
|
||||
"gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
|
||||
"gpt-4-turbo-preview", // alias for latest turbo preview
|
||||
"gpt-4-0125-preview", // gpt4-turbo preview 2
|
||||
"gpt-4-1106-preview", // gpt4-turbo preview 1
|
||||
"gpt-4-vision-preview", // gpt4-turbo preview 1 with vision
|
||||
"gpt-4",
|
||||
"gpt-4-0613",
|
||||
"gpt-4-0314", // EOL 2024-06-13
|
||||
"gpt-4-32k",
|
||||
"gpt-4-32k-0314", // EOL 2024-06-13
|
||||
"gpt-4-32k-0613",
|
||||
"gpt-3.5-turbo",
|
||||
"gpt-3.5-turbo-0301", // EOL 2024-06-13
|
||||
"gpt-3.5-turbo-0613",
|
||||
"gpt-3.5-turbo-16k",
|
||||
"gpt-3.5-turbo-16k-0613",
|
||||
"gpt-3.5-turbo-instruct",
|
||||
"gpt-3.5-turbo-instruct-0914",
|
||||
"text-embedding-3-small",
|
||||
"text-embedding-3-large",
|
||||
"text-embedding-ada-002",
|
||||
"gpt-image-1.5",
|
||||
"gpt-image-1",
|
||||
"gpt-image-1-mini",
|
||||
"dall-e-3",
|
||||
"dall-e-2",
|
||||
];
|
||||
|
||||
let modelsCache: any = null;
|
||||
@@ -59,11 +88,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
|
||||
// Get available families and snapshots
|
||||
let availableFamilies = new Set<OpenAIModelFamily>();
|
||||
const availableSnapshots = new Set<string>();
|
||||
const availableModelIds = new Set<string>();
|
||||
for (const key of keyPool.list()) {
|
||||
if (key.isDisabled || key.service !== "openai") continue;
|
||||
const asOpenAIKey = key as OpenAIKey;
|
||||
asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f));
|
||||
asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s));
|
||||
asOpenAIKey.modelIds.forEach((id) => availableModelIds.add(id));
|
||||
}
|
||||
|
||||
// Remove disabled families
|
||||
@@ -71,8 +102,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
|
||||
availableFamilies = new Set(
|
||||
[...availableFamilies].filter((x) => allowed.has(x))
|
||||
);
|
||||
const usingExactModelIds = availableModelIds.size > 0;
|
||||
|
||||
return models
|
||||
const sourceModels = usingExactModelIds
|
||||
? [...new Set([...models, ...availableModelIds])]
|
||||
: models;
|
||||
|
||||
return sourceModels
|
||||
.map((id) => ({
|
||||
id,
|
||||
object: "model",
|
||||
@@ -92,6 +128,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
|
||||
parent: null,
|
||||
}))
|
||||
.filter((model) => {
|
||||
if (usingExactModelIds) {
|
||||
return (
|
||||
allowed.has(getOpenAIModelFamily(model.id)) &&
|
||||
availableModelIds.has(model.id)
|
||||
);
|
||||
}
|
||||
|
||||
// First check if the family is available
|
||||
const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id));
|
||||
if (!hasFamily) return false;
|
||||
@@ -233,6 +276,16 @@ openaiRouter.post(
|
||||
}),
|
||||
openaiProxy
|
||||
);
|
||||
openaiRouter.post(
|
||||
"/v1/responses",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "openai-responses",
|
||||
outApi: "openai-responses",
|
||||
service: "openai",
|
||||
}),
|
||||
openaiProxy
|
||||
);
|
||||
// Embeddings endpoint.
|
||||
openaiRouter.post(
|
||||
"/v1/embeddings",
|
||||
|
||||
@@ -31,18 +31,24 @@ export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge(
|
||||
.int()
|
||||
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
|
||||
})
|
||||
);
|
||||
).passthrough();
|
||||
|
||||
const AnthropicV1MessageMultimodalContentSchema = z.array(
|
||||
z.union([
|
||||
z.object({ type: z.literal("text"), text: z.string() }),
|
||||
z.object({
|
||||
type: z.literal("image"),
|
||||
source: z.object({
|
||||
source: z.union([
|
||||
z.object({
|
||||
type: z.literal("base64"),
|
||||
media_type: z.string().max(100),
|
||||
data: z.string(),
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal("url"),
|
||||
url: z.string().url(),
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
])
|
||||
);
|
||||
@@ -65,7 +71,7 @@ export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge(
|
||||
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
|
||||
system: z.string().optional(),
|
||||
})
|
||||
);
|
||||
).passthrough();
|
||||
export type AnthropicChatMessage = z.infer<
|
||||
typeof AnthropicV1MessagesSchema
|
||||
>["messages"][0];
|
||||
@@ -77,7 +83,7 @@ function openAIMessagesToClaudeTextPrompt(messages: OpenAIChatMessage[]) {
|
||||
let role: string = m.role;
|
||||
if (role === "assistant") {
|
||||
role = "Assistant";
|
||||
} else if (role === "system") {
|
||||
} else if (role === "system" || role === "developer") {
|
||||
role = "System";
|
||||
} else if (role === "user") {
|
||||
role = "Human";
|
||||
@@ -115,12 +121,13 @@ export const transformOpenAIToAnthropicChat: APIFormatTransformer<
|
||||
system,
|
||||
messages: newMessages,
|
||||
model: rest.model,
|
||||
max_tokens: rest.max_tokens,
|
||||
max_tokens: rest.max_completion_tokens ?? rest.max_tokens,
|
||||
stream: rest.stream,
|
||||
temperature: rest.temperature,
|
||||
top_p: rest.top_p,
|
||||
stop_sequences:
|
||||
typeof rest.stop === "string" ? [rest.stop] : rest.stop || undefined,
|
||||
...(rest.thinking ? { thinking: rest.thinking } : {}),
|
||||
...(rest.user ? { metadata: { user_id: rest.user } } : {}),
|
||||
// Anthropic supports top_k, but OpenAI does not
|
||||
// OpenAI supports frequency_penalty, presence_penalty, logit_bias, n, seed,
|
||||
@@ -162,7 +169,7 @@ export const transformOpenAIToAnthropicText: APIFormatTransformer<
|
||||
return {
|
||||
model: rest.model,
|
||||
prompt: prompt,
|
||||
max_tokens_to_sample: rest.max_tokens,
|
||||
max_tokens_to_sample: rest.max_completion_tokens ?? rest.max_tokens,
|
||||
stop_sequences: stops,
|
||||
stream: rest.stream,
|
||||
temperature: rest.temperature,
|
||||
@@ -366,7 +373,7 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
|
||||
// Here we will lose the original name if it was a system message, but that
|
||||
// is generally okay because the system message is usually a prompt and not
|
||||
// a character in the chat.
|
||||
const name = msg.role === "system" ? "System" : msg.name?.trim();
|
||||
const name = isSystemOpenAIRole(msg.role) ? "System" : msg.name?.trim();
|
||||
const content = convertOpenAIContent(msg.content);
|
||||
|
||||
// Prepend the display name to the first text content in the current message
|
||||
@@ -396,8 +403,8 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
|
||||
|
||||
function isSystemOpenAIRole(
|
||||
role: OpenAIChatMessage["role"]
|
||||
): role is "system" | "function" | "tool" {
|
||||
return ["system", "function", "tool"].includes(role);
|
||||
): role is "system" | "developer" | "function" | "tool" {
|
||||
return ["system", "developer", "function", "tool"].includes(role);
|
||||
}
|
||||
|
||||
function getFirstTextContent(content: OpenAIChatMessage["content"]) {
|
||||
|
||||
@@ -1,10 +1,21 @@
|
||||
import { z } from "zod";
|
||||
import {
|
||||
flattenOpenAIMessageContent,
|
||||
OpenAIChatMessage,
|
||||
OpenAIV1ChatCompletionSchema,
|
||||
} from "./openai";
|
||||
import { APIFormatTransformer } from "./index";
|
||||
|
||||
const GoogleAIContentPartSchema = z.union([
|
||||
z.object({ text: z.string() }),
|
||||
z.object({
|
||||
inline_data: z.object({
|
||||
mime_type: z.string().max(100),
|
||||
data: z.string(),
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
|
||||
// https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent
|
||||
export const GoogleAIV1GenerateContentSchema = z
|
||||
.object({
|
||||
@@ -12,31 +23,40 @@ export const GoogleAIV1GenerateContentSchema = z
|
||||
stream: z.boolean().optional().default(false), // also used for router
|
||||
contents: z.array(
|
||||
z.object({
|
||||
parts: z.array(z.object({ text: z.string() })),
|
||||
parts: z.array(GoogleAIContentPartSchema),
|
||||
role: z.enum(["user", "model"]),
|
||||
})
|
||||
),
|
||||
tools: z.array(z.object({})).max(0).optional(),
|
||||
safetySettings: z.array(z.object({})).max(0).optional(),
|
||||
tools: z.array(z.any()).optional(),
|
||||
toolConfig: z.any().optional(),
|
||||
safetySettings: z.array(z.any()).optional(),
|
||||
systemInstruction: z.any().optional(),
|
||||
generationConfig: z.object({
|
||||
temperature: z.number().optional(),
|
||||
maxOutputTokens: z.coerce
|
||||
.number()
|
||||
.int()
|
||||
.optional()
|
||||
.default(16)
|
||||
.transform((v) => Math.min(v, 1024)), // TODO: Add config
|
||||
.default(1024)
|
||||
.transform((v) => Math.min(v, 65536)),
|
||||
candidateCount: z.literal(1).optional(),
|
||||
topP: z.number().optional(),
|
||||
topK: z.number().optional(),
|
||||
responseMimeType: z.string().optional(),
|
||||
responseSchema: z.any().optional(),
|
||||
responseJsonSchema: z.any().optional(),
|
||||
responseModalities: z.array(z.string()).optional(),
|
||||
thinkingConfig: z.any().optional(),
|
||||
stopSequences: z.array(z.string().max(500)).max(5).optional(),
|
||||
}),
|
||||
})
|
||||
.strip();
|
||||
.passthrough();
|
||||
export type GoogleAIChatMessage = z.infer<
|
||||
typeof GoogleAIV1GenerateContentSchema
|
||||
>["contents"][0];
|
||||
|
||||
type GoogleAIPart = GoogleAIChatMessage["parts"][number];
|
||||
|
||||
export const transformOpenAIToGoogleAI: APIFormatTransformer<
|
||||
typeof GoogleAIV1GenerateContentSchema
|
||||
> = async (req) => {
|
||||
@@ -54,40 +74,51 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
|
||||
}
|
||||
|
||||
const { messages, ...rest } = result.data;
|
||||
const systemMessages = messages.filter(
|
||||
(m) => m.role === "system" || m.role === "developer"
|
||||
);
|
||||
const foundNames = new Set<string>();
|
||||
const model = req.body.model;
|
||||
const customThinkingConfig =
|
||||
getObjectField(body, "thinkingConfig") ??
|
||||
getObjectField(getObjectField(body, "generationConfig"), "thinkingConfig");
|
||||
const customResponseModalities = getStringArrayField(
|
||||
getObjectField(body, "generationConfig"),
|
||||
"responseModalities"
|
||||
);
|
||||
const contents = messages
|
||||
.filter((m) => m.role !== "system" && m.role !== "developer")
|
||||
.map((m) => {
|
||||
const role = m.role === "assistant" ? "model" : "user";
|
||||
// Detects character names so we can set stop sequences for them as Gemini
|
||||
// is prone to continuing as the next character.
|
||||
// If names are not available, we'll still try to prefix the message
|
||||
// with generic names so we can set stops for them but they don't work
|
||||
// as well as real names.
|
||||
const text = flattenOpenAIMessageContent(m.content);
|
||||
const parts = convertOpenAIContent(m.content);
|
||||
const text = parts
|
||||
.map((part) => ("text" in part ? part.text : ""))
|
||||
.join("\n");
|
||||
const propName = m.name?.trim();
|
||||
const textName =
|
||||
m.role === "system" ? "" : text.match(/^(.{0,50}?): /)?.[1]?.trim();
|
||||
const name =
|
||||
propName || textName || (role === "model" ? "Character" : "User");
|
||||
const textName = text.match(/^(.{0,50}?): /)?.[1]?.trim();
|
||||
const name = propName || textName || (role === "model" ? "Character" : "User");
|
||||
|
||||
foundNames.add(name);
|
||||
|
||||
// Prefixing messages with their character name seems to help avoid
|
||||
// Gemini trying to continue as the next character, or at the very least
|
||||
// ensures it will hit the stop sequence. Otherwise it will start a new
|
||||
// paragraph and switch perspectives.
|
||||
// The response will be very likely to include this prefix so frontends
|
||||
// will need to strip it out.
|
||||
// Prefixing speaker names helps Gemini avoid continuing as the next
|
||||
// character in multi-party roleplay/chat prompts.
|
||||
const textPrefix = textName ? "" : `${name}: `;
|
||||
const firstTextPart = parts.find(
|
||||
(part): part is Extract<GoogleAIPart, { text: string }> => "text" in part
|
||||
);
|
||||
if (firstTextPart) {
|
||||
firstTextPart.text = textPrefix + firstTextPart.text;
|
||||
}
|
||||
|
||||
return {
|
||||
parts: [{ text: textPrefix + text }],
|
||||
parts,
|
||||
role: m.role === "assistant" ? ("model" as const) : ("user" as const),
|
||||
};
|
||||
})
|
||||
.reduce<GoogleAIChatMessage[]>((acc, msg) => {
|
||||
const last = acc[acc.length - 1];
|
||||
if (last?.role === msg.role) {
|
||||
last.parts[0].text += "\n\n" + msg.parts[0].text;
|
||||
last.parts.push(...msg.parts);
|
||||
} else {
|
||||
acc.push(msg);
|
||||
}
|
||||
@@ -102,17 +133,44 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
|
||||
stops.push(...Array.from(foundNames).map((name) => `\n${name}:`));
|
||||
stops = [...new Set(stops)].slice(0, 5);
|
||||
|
||||
const responseFormat = rest.response_format as Record<string, any> | undefined;
|
||||
const maxOutputTokens =
|
||||
rest.max_completion_tokens ?? rest.max_tokens ?? 1024;
|
||||
|
||||
return {
|
||||
model: req.body.model,
|
||||
model,
|
||||
stream: rest.stream,
|
||||
contents,
|
||||
tools: [],
|
||||
tools: Array.isArray(rest.tools) ? rest.tools : undefined,
|
||||
systemInstruction: systemMessages.length
|
||||
? {
|
||||
parts: [
|
||||
{
|
||||
text: systemMessages
|
||||
.map((msg) => flattenOpenAIMessageContent(msg.content))
|
||||
.join("\n\n"),
|
||||
},
|
||||
],
|
||||
}
|
||||
: undefined,
|
||||
generationConfig: {
|
||||
maxOutputTokens: rest.max_tokens,
|
||||
maxOutputTokens,
|
||||
stopSequences: stops,
|
||||
topP: rest.top_p,
|
||||
topK: 40, // openai schema doesn't have this, google ai defaults to 40
|
||||
topK: 40, // OpenAI schema doesn't expose this; Gemini defaults to 40.
|
||||
temperature: rest.temperature,
|
||||
responseMimeType:
|
||||
responseFormat?.type === "json_object" ||
|
||||
responseFormat?.type === "json_schema"
|
||||
? "application/json"
|
||||
: undefined,
|
||||
responseSchema: responseFormat?.json_schema?.schema,
|
||||
responseJsonSchema: responseFormat?.json_schema?.schema,
|
||||
responseModalities:
|
||||
customResponseModalities ??
|
||||
(isGoogleAIImageModel(model) ? ["TEXT", "IMAGE"] : undefined),
|
||||
thinkingConfig:
|
||||
customThinkingConfig ?? getThinkingConfig(model, rest.reasoning_effort),
|
||||
},
|
||||
safetySettings: [
|
||||
{ category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
|
||||
@@ -122,3 +180,117 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
|
||||
],
|
||||
};
|
||||
};
|
||||
|
||||
function convertOpenAIContent(
|
||||
content: OpenAIChatMessage["content"]
|
||||
): GoogleAIPart[] {
|
||||
if (typeof content === "string") {
|
||||
return [{ text: content }];
|
||||
}
|
||||
|
||||
return content.map((item) => {
|
||||
if ("text" in item) {
|
||||
return { text: item.text };
|
||||
}
|
||||
if ("refusal" in item) {
|
||||
return { text: item.refusal };
|
||||
}
|
||||
|
||||
const url = item.image_url.url;
|
||||
if (!url.startsWith("data:")) {
|
||||
return { text: "[ Unsupported image URL ]" };
|
||||
}
|
||||
|
||||
const [meta, data = ""] = url.split(",", 2);
|
||||
const mimeType = meta.split(";")[0].replace("data:", "");
|
||||
return { inline_data: { mime_type: mimeType, data } };
|
||||
});
|
||||
}
|
||||
|
||||
function getThinkingConfig(model: string, reasoningEffort?: string) {
|
||||
if (model.startsWith("gemini-2.5")) {
|
||||
switch (reasoningEffort) {
|
||||
case "none":
|
||||
case "minimal":
|
||||
case "low":
|
||||
return { thinkingBudget: 0 };
|
||||
default:
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
switch (reasoningEffort) {
|
||||
case "low":
|
||||
case "minimal":
|
||||
case "none":
|
||||
return { thinkingLevel: "LOW" };
|
||||
case "medium":
|
||||
case "high":
|
||||
case "xhigh":
|
||||
return { thinkingLevel: "HIGH" };
|
||||
default:
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function isGoogleAIImageModel(model: string) {
|
||||
return [
|
||||
"gemini-2.0-flash-preview-image-generation",
|
||||
"gemini-2.5-flash-image",
|
||||
"gemini-3-pro-image-preview",
|
||||
].includes(model);
|
||||
}
|
||||
|
||||
export function flattenGoogleAIContentParts(
|
||||
parts: Array<Record<string, any>> | undefined
|
||||
) {
|
||||
return (parts ?? [])
|
||||
.map((part) => {
|
||||
if (typeof part?.text === "string") {
|
||||
return part.text;
|
||||
}
|
||||
|
||||
const inlineData = part?.inline_data ?? part?.inlineData;
|
||||
if (inlineData?.data) {
|
||||
const mimeType = inlineData.mime_type ?? inlineData.mimeType ?? "image/png";
|
||||
return ``;
|
||||
}
|
||||
|
||||
return "";
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join("\n\n");
|
||||
}
|
||||
|
||||
function getObjectField(
|
||||
value: unknown,
|
||||
key: string
|
||||
): Record<string, any> | undefined {
|
||||
if (
|
||||
value &&
|
||||
typeof value === "object" &&
|
||||
!Array.isArray(value) &&
|
||||
key in value &&
|
||||
value[key as keyof typeof value] &&
|
||||
typeof value[key as keyof typeof value] === "object" &&
|
||||
!Array.isArray(value[key as keyof typeof value])
|
||||
) {
|
||||
return value[key as keyof typeof value] as Record<string, any>;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function getStringArrayField(value: unknown, key: string) {
|
||||
if (
|
||||
value &&
|
||||
typeof value === "object" &&
|
||||
!Array.isArray(value) &&
|
||||
key in value &&
|
||||
Array.isArray(value[key as keyof typeof value])
|
||||
) {
|
||||
return (value[key as keyof typeof value] as unknown[]).filter(
|
||||
(item): item is string => typeof item === "string"
|
||||
);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
OpenAIV1ImagesGenerationSchema,
|
||||
transformOpenAIToOpenAIImage,
|
||||
} from "./openai-image";
|
||||
import { OpenAIResponsesCreateSchema } from "./openai-responses";
|
||||
import {
|
||||
GoogleAIV1GenerateContentSchema,
|
||||
transformOpenAIToGoogleAI,
|
||||
@@ -24,13 +25,22 @@ import {
|
||||
import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";
|
||||
|
||||
export { OpenAIChatMessage } from "./openai";
|
||||
export {
|
||||
containsOpenAIResponsesImageInput,
|
||||
flattenOpenAIResponsesInput,
|
||||
flattenOpenAIResponsesOutput,
|
||||
} from "./openai-responses";
|
||||
export {
|
||||
AnthropicChatMessage,
|
||||
AnthropicV1TextSchema,
|
||||
AnthropicV1MessagesSchema,
|
||||
flattenAnthropicMessages,
|
||||
} from "./anthropic";
|
||||
export { GoogleAIChatMessage } from "./google-ai";
|
||||
export {
|
||||
GoogleAIChatMessage,
|
||||
flattenGoogleAIContentParts,
|
||||
isGoogleAIImageModel,
|
||||
} from "./google-ai";
|
||||
export { MistralAIChatMessage } from "./mistral-ai";
|
||||
|
||||
type APIPair = `${APIFormat}->${APIFormat}`;
|
||||
@@ -55,6 +65,7 @@ export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
|
||||
"anthropic-chat": AnthropicV1MessagesSchema,
|
||||
"anthropic-text": AnthropicV1TextSchema,
|
||||
openai: OpenAIV1ChatCompletionSchema,
|
||||
"openai-responses": OpenAIResponsesCreateSchema,
|
||||
"openai-text": OpenAIV1TextCompletionSchema,
|
||||
"openai-image": OpenAIV1ImagesGenerationSchema,
|
||||
"google-ai": GoogleAIV1GenerateContentSchema,
|
||||
|
||||
@@ -20,7 +20,7 @@ export const MistralAIV1ChatCompletionsSchema = z.object({
|
||||
stream: z.boolean().optional().default(false),
|
||||
safe_prompt: z.boolean().optional().default(false),
|
||||
random_seed: z.number().int().optional(),
|
||||
});
|
||||
}).passthrough();
|
||||
export type MistralAIChatMessage = z.infer<
|
||||
typeof MistralAIV1ChatCompletionsSchema
|
||||
>["messages"][0];
|
||||
|
||||
@@ -5,19 +5,34 @@ import { APIFormatTransformer } from "./index";
|
||||
// https://platform.openai.com/docs/api-reference/images/create
|
||||
export const OpenAIV1ImagesGenerationSchema = z
|
||||
.object({
|
||||
prompt: z.string().max(4000),
|
||||
prompt: z.string().max(32000),
|
||||
model: z.string().max(100).optional(),
|
||||
quality: z.enum(["standard", "hd"]).optional().default("standard"),
|
||||
n: z.number().int().min(1).max(4).optional().default(1),
|
||||
quality: z
|
||||
.enum(["auto", "low", "medium", "high", "standard", "hd"])
|
||||
.optional(),
|
||||
n: z.number().int().min(1).max(10).optional().default(1),
|
||||
response_format: z.enum(["url", "b64_json"]).optional(),
|
||||
output_format: z.string().optional(),
|
||||
output_compression: z.number().int().min(0).max(100).optional(),
|
||||
size: z
|
||||
.enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"])
|
||||
.enum([
|
||||
"auto",
|
||||
"256x256",
|
||||
"512x512",
|
||||
"1024x1024",
|
||||
"1024x1536",
|
||||
"1536x1024",
|
||||
"1792x1024",
|
||||
"1024x1792",
|
||||
])
|
||||
.optional()
|
||||
.default("1024x1024"),
|
||||
style: z.enum(["vivid", "natural"]).optional().default("vivid"),
|
||||
background: z.string().optional(),
|
||||
moderation: z.string().optional(),
|
||||
user: z.string().max(500).optional(),
|
||||
})
|
||||
.strip();
|
||||
.passthrough();
|
||||
|
||||
// Takes the last chat message and uses it verbatim as the image prompt.
|
||||
export const transformOpenAIToOpenAIImage: APIFormatTransformer<
|
||||
@@ -57,12 +72,21 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
|
||||
}
|
||||
|
||||
// TODO: Add some way to specify parameters via chat message
|
||||
const transformed = {
|
||||
model: body.model.includes("dall-e") ? body.model : "dall-e-3",
|
||||
quality: "standard",
|
||||
const requestedModel = String(body.model ?? "");
|
||||
const model =
|
||||
requestedModel.includes("dall-e") || requestedModel.includes("gpt-image")
|
||||
? requestedModel
|
||||
: "gpt-image-1.5";
|
||||
const transformed: Record<string, any> = {
|
||||
model,
|
||||
size: "1024x1024",
|
||||
response_format: "url",
|
||||
prompt: prompt.slice(index! + 6).trim(),
|
||||
};
|
||||
|
||||
if (model.includes("dall-e")) {
|
||||
transformed.quality = "standard";
|
||||
transformed.response_format = "url";
|
||||
}
|
||||
|
||||
return OpenAIV1ImagesGenerationSchema.parse(transformed);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
import { z } from "zod";
|
||||
import { OPENAI_OUTPUT_MAX } from "./openai";
|
||||
|
||||
const OpenAIResponsesReasoningSchema = z
|
||||
.object({
|
||||
effort: z.string().optional(),
|
||||
summary: z.union([z.string(), z.array(z.string())]).optional(),
|
||||
})
|
||||
.passthrough();
|
||||
|
||||
const OpenAIResponsesTextSchema = z
|
||||
.object({
|
||||
format: z.any().optional(),
|
||||
verbosity: z.enum(["low", "medium", "high"]).optional(),
|
||||
})
|
||||
.passthrough();
|
||||
|
||||
export const OpenAIResponsesCreateSchema = z
|
||||
.object({
|
||||
model: z.string().max(100),
|
||||
input: z.union([z.string(), z.array(z.any())]).optional(),
|
||||
instructions: z.union([z.string(), z.array(z.any())]).optional(),
|
||||
previous_response_id: z.string().max(100).optional(),
|
||||
stream: z.boolean().optional().default(false),
|
||||
max_output_tokens: z.coerce
|
||||
.number()
|
||||
.int()
|
||||
.nullish()
|
||||
.default(OPENAI_OUTPUT_MAX)
|
||||
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
|
||||
temperature: z.number().optional(),
|
||||
top_p: z.number().optional(),
|
||||
user: z.string().max(500).optional(),
|
||||
safety_identifier: z.string().max(500).optional(),
|
||||
metadata: z.record(z.any()).optional(),
|
||||
tools: z.array(z.any()).optional(),
|
||||
tool_choice: z.any().optional(),
|
||||
parallel_tool_calls: z.boolean().optional(),
|
||||
include: z.array(z.string()).optional(),
|
||||
store: z.boolean().optional(),
|
||||
background: z.boolean().optional(),
|
||||
reasoning: OpenAIResponsesReasoningSchema.optional(),
|
||||
text: OpenAIResponsesTextSchema.optional(),
|
||||
})
|
||||
.passthrough();
|
||||
|
||||
export function flattenOpenAIResponsesInput(input: unknown): string {
|
||||
return flattenResponseValue(input).trim();
|
||||
}
|
||||
|
||||
export function flattenOpenAIResponsesOutput(body: Record<string, any>): string {
|
||||
if (typeof body.output_text === "string" && body.output_text.trim()) {
|
||||
return body.output_text.trim();
|
||||
}
|
||||
|
||||
return flattenResponseValue(body.output ?? body.output_text).trim();
|
||||
}
|
||||
|
||||
export function containsOpenAIResponsesImageInput(input: unknown): boolean {
|
||||
return containsImage(input);
|
||||
}
|
||||
|
||||
function flattenResponseValue(value: unknown): string {
|
||||
if (value === null || value === undefined) return "";
|
||||
if (typeof value === "string") return value;
|
||||
if (typeof value === "number" || typeof value === "boolean") {
|
||||
return String(value);
|
||||
}
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
return value
|
||||
.map((item) => flattenResponseValue(item))
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
if (!isRecord(value)) return "";
|
||||
|
||||
const typed = value;
|
||||
if (hasStringProp(typed, "text")) return typed.text;
|
||||
if (hasStringProp(typed, "refusal")) return typed.refusal;
|
||||
if (hasStringProp(typed, "summary")) return typed.summary;
|
||||
if (hasStringProp(typed, "arguments")) return typed.arguments;
|
||||
if (hasStringProp(typed, "result")) return typed.result;
|
||||
|
||||
const type = String(typed.type ?? "");
|
||||
if (type.includes("image")) return "[ Uploaded Image Omitted ]";
|
||||
if (type.includes("file")) return "[ File Omitted ]";
|
||||
|
||||
if (typeof typed.role === "string" && typed.content !== undefined) {
|
||||
const content = flattenResponseValue(typed.content);
|
||||
return content ? `${typed.role}: ${content}` : typed.role;
|
||||
}
|
||||
|
||||
const nested = [
|
||||
typed.content,
|
||||
typed.input,
|
||||
typed.output,
|
||||
typed.summary,
|
||||
typed.results,
|
||||
typed.item,
|
||||
typed.items,
|
||||
];
|
||||
for (const candidate of nested) {
|
||||
const flattened = flattenResponseValue(candidate);
|
||||
if (flattened) return flattened;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
function containsImage(value: unknown): boolean {
|
||||
if (value === null || value === undefined) return false;
|
||||
if (Array.isArray(value)) return value.some((item) => containsImage(item));
|
||||
if (!isRecord(value)) return false;
|
||||
|
||||
const typed = value;
|
||||
const type = String(typed.type ?? "");
|
||||
if (type.includes("image")) return true;
|
||||
if (typed.image_url || typed.image || typed.input_image || typed.inline_data) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return Object.values(typed).some((item) => containsImage(item));
|
||||
}
|
||||
|
||||
function hasStringProp<T extends string>(
|
||||
value: Record<string, unknown>,
|
||||
key: T
|
||||
): value is Record<string, unknown> & Record<T, string> {
|
||||
return typeof value[key] === "string";
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null;
|
||||
}
|
||||
@@ -7,6 +7,7 @@ export const OPENAI_OUTPUT_MAX = config.maxOutputTokensOpenAI;
|
||||
const OpenAIV1ChatContentArraySchema = z.array(
|
||||
z.union([
|
||||
z.object({ type: z.literal("text"), text: z.string() }),
|
||||
z.object({ type: z.literal("refusal"), refusal: z.string() }),
|
||||
z.object({
|
||||
type: z.union([z.literal("image"), z.literal("image_url")]),
|
||||
image_url: z.object({
|
||||
@@ -21,7 +22,14 @@ export const OpenAIV1ChatCompletionSchema = z
|
||||
model: z.string().max(100),
|
||||
messages: z.array(
|
||||
z.object({
|
||||
role: z.enum(["system", "user", "assistant", "tool", "function"]),
|
||||
role: z.enum([
|
||||
"system",
|
||||
"developer",
|
||||
"user",
|
||||
"assistant",
|
||||
"tool",
|
||||
"function",
|
||||
]),
|
||||
content: z.union([z.string(), OpenAIV1ChatContentArraySchema]),
|
||||
name: z.string().optional(),
|
||||
tool_calls: z.array(z.any()).optional(),
|
||||
@@ -54,11 +62,20 @@ export const OpenAIV1ChatCompletionSchema = z
|
||||
.nullish()
|
||||
.default(Math.min(OPENAI_OUTPUT_MAX, 4096))
|
||||
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
|
||||
max_completion_tokens: z.coerce
|
||||
.number()
|
||||
.int()
|
||||
.nullish()
|
||||
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
|
||||
frequency_penalty: z.number().optional().default(0),
|
||||
presence_penalty: z.number().optional().default(0),
|
||||
logit_bias: z.any().optional(),
|
||||
metadata: z.record(z.any()).optional(),
|
||||
user: z.string().max(500).optional(),
|
||||
safety_identifier: z.string().max(500).optional(),
|
||||
seed: z.number().int().optional(),
|
||||
prompt_cache_key: z.string().max(500).optional(),
|
||||
prompt_cache_retention: z.string().optional(),
|
||||
// Be warned that Azure OpenAI combines these two into a single field.
|
||||
// It's the only deviation from the OpenAI API that I'm aware of so I have
|
||||
// special cased it in `addAzureKey` rather than expecting clients to do it.
|
||||
@@ -70,14 +87,23 @@ export const OpenAIV1ChatCompletionSchema = z
|
||||
functions: z.array(z.any()).optional(),
|
||||
tool_choice: z.any().optional(),
|
||||
function_choice: z.any().optional(),
|
||||
response_format: z.any(),
|
||||
response_format: z.any().optional(),
|
||||
parallel_tool_calls: z.boolean().optional(),
|
||||
reasoning_effort: z.string().optional(),
|
||||
stream_options: z.any().optional(),
|
||||
modalities: z.array(z.string()).optional(),
|
||||
audio: z.any().optional(),
|
||||
prediction: z.any().optional(),
|
||||
web_search_options: z.any().optional(),
|
||||
service_tier: z.string().optional(),
|
||||
verbosity: z.enum(["low", "medium", "high"]).optional(),
|
||||
})
|
||||
// Tool usage must be enabled via config because we currently have no way to
|
||||
// track quota usage for them or enforce limits.
|
||||
.omit(
|
||||
Boolean(config.allowOpenAIToolUsage) ? {} : { tools: true, functions: true }
|
||||
)
|
||||
.strip();
|
||||
.passthrough();
|
||||
export type OpenAIChatMessage = z.infer<
|
||||
typeof OpenAIV1ChatCompletionSchema
|
||||
>["messages"][0];
|
||||
@@ -89,6 +115,7 @@ export function flattenOpenAIMessageContent(
|
||||
? content
|
||||
.map((contentItem) => {
|
||||
if ("text" in contentItem) return contentItem.text;
|
||||
if ("refusal" in contentItem) return contentItem.refusal;
|
||||
if ("image_url" in contentItem) return "[ Uploaded Image Omitted ]";
|
||||
})
|
||||
.join("\n")
|
||||
@@ -107,7 +134,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
|
||||
let role: string = m.role;
|
||||
if (role === "assistant") {
|
||||
role = "Assistant";
|
||||
} else if (role === "system") {
|
||||
} else if (role === "system" || role === "developer") {
|
||||
role = "System";
|
||||
} else if (role === "user") {
|
||||
role = "User";
|
||||
@@ -121,7 +148,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
|
||||
.map((m) => {
|
||||
// Claude without prefixes (except system) and no Assistant priming
|
||||
let role: string = "";
|
||||
if (role === "system") {
|
||||
if (m.role === "system" || m.role === "developer") {
|
||||
role = "System: ";
|
||||
}
|
||||
return `\n\n${role}${flattenOpenAIMessageContent(m.content)}`;
|
||||
|
||||
@@ -54,10 +54,10 @@ export class AwsKeyChecker extends KeyCheckerBase<AwsBedrockKey> {
|
||||
if (isInitialCheck) {
|
||||
checks = [
|
||||
this.invokeModel("anthropic.claude-v2", key),
|
||||
this.invokeModel("anthropic.claude-3-sonnet-20240229-v1:0", key),
|
||||
this.invokeModel("anthropic.claude-3-haiku-20240307-v1:0", key),
|
||||
this.invokeModel("anthropic.claude-3-opus-20240229-v1:0", key),
|
||||
this.invokeModel("anthropic.claude-3-5-sonnet-20240620-v1:0", key),
|
||||
this.invokeModel("anthropic.claude-sonnet-4-5-20250929-v1:0", key),
|
||||
this.invokeModel("anthropic.claude-haiku-4-5-20251001-v1:0", key),
|
||||
this.invokeModel("anthropic.claude-opus-4-1-20250805-v1:0", key),
|
||||
this.invokeModel("anthropic.claude-3-5-haiku-20241022-v1:0", key),
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
@@ -35,9 +35,15 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
|
||||
}
|
||||
|
||||
protected async testKeyOrFail(key: AzureOpenAIKey) {
|
||||
const model = await this.testModel(key);
|
||||
this.log.info({ key: key.hash, deploymentModel: model }, "Checked key.");
|
||||
this.updateKey(key.hash, { modelFamilies: [model] });
|
||||
const result = await this.testModel(key);
|
||||
this.log.info(
|
||||
{ key: key.hash, deploymentModel: result.modelIds[0] ?? result.family },
|
||||
"Checked key."
|
||||
);
|
||||
this.updateKey(key.hash, {
|
||||
modelFamilies: [result.family],
|
||||
modelIds: result.modelIds,
|
||||
});
|
||||
}
|
||||
|
||||
protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) {
|
||||
@@ -107,7 +113,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
|
||||
this.updateKey(key.hash, { lastChecked: next });
|
||||
}
|
||||
|
||||
private async testModel(key: AzureOpenAIKey) {
|
||||
private async testModel(key: AzureOpenAIKey): Promise<{
|
||||
family: ReturnType<typeof getAzureOpenAIModelFamily>;
|
||||
modelIds: string[];
|
||||
}> {
|
||||
const { apiKey, deploymentId, resourceName } =
|
||||
AzureOpenAIKeyChecker.getCredentialsFromKey(key);
|
||||
const url = POST_CHAT_COMPLETIONS(resourceName, deploymentId);
|
||||
@@ -126,7 +135,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
|
||||
// we try to invoke /chat/completions on dall-e-3. This is expected and
|
||||
// indicates a DALL-E deployment.
|
||||
if (response.status === 400) {
|
||||
if (data.error.code === "OperationNotSupported") return "azure-dall-e";
|
||||
if (data.error.code === "OperationNotSupported") {
|
||||
return {
|
||||
family: "azure-dall-e",
|
||||
modelIds: ["dall-e-3", "gpt-image-1", "gpt-image-1-mini", "gpt-image-1.5"],
|
||||
};
|
||||
}
|
||||
throw new AxiosError(
|
||||
`Unexpected error when testing deployment ${deploymentId}`,
|
||||
"AZURE_TEST_ERROR",
|
||||
@@ -137,11 +151,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
|
||||
}
|
||||
|
||||
const family = getAzureOpenAIModelFamily(data.model);
|
||||
const normalizedModel = normalizeAzureModelId(data.model);
|
||||
|
||||
// Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks.
|
||||
// Otherwise we can use the model family Azure returned.
|
||||
if (family !== "azure-gpt4") {
|
||||
return family;
|
||||
return { family, modelIds: [normalizedModel] };
|
||||
}
|
||||
|
||||
// Try to send an oversized prompt. GPT-4 Turbo can handle this but regular
|
||||
@@ -160,8 +175,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
|
||||
const code = contextTest.error?.code;
|
||||
this.log.debug({ code, status }, "Performed Azure GPT4 context size test.");
|
||||
|
||||
if (code === "context_length_exceeded") return "azure-gpt4";
|
||||
return "azure-gpt4-turbo";
|
||||
if (code === "context_length_exceeded") {
|
||||
return { family: "azure-gpt4", modelIds: ["gpt-4"] };
|
||||
}
|
||||
return { family: "azure-gpt4-turbo", modelIds: ["gpt-4-turbo"] };
|
||||
}
|
||||
|
||||
static errorIsAzureError(error: AxiosError): error is AxiosError<AzureError> {
|
||||
@@ -179,3 +196,7 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
|
||||
return { resourceName, deploymentId, apiKey };
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeAzureModelId(model: string) {
|
||||
return model.replace("gpt-35-turbo", "gpt-3.5-turbo");
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@ type AzureOpenAIKeyUsage = {
|
||||
export interface AzureOpenAIKey extends Key, AzureOpenAIKeyUsage {
|
||||
readonly service: "azure";
|
||||
readonly modelFamilies: AzureOpenAIModelFamily[];
|
||||
/** Exact model IDs or deployment aliases known to be backed by this key. */
|
||||
modelIds: string[];
|
||||
/** The time at which this key was last rate limited. */
|
||||
rateLimitedAt: number;
|
||||
/** The time until which this key is rate limited. */
|
||||
@@ -62,6 +64,7 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
|
||||
rateLimitedAt: 0,
|
||||
rateLimitedUntil: 0,
|
||||
contentFiltering: false,
|
||||
modelIds: [],
|
||||
hash: `azu-${crypto
|
||||
.createHash("sha256")
|
||||
.update(key)
|
||||
@@ -73,6 +76,8 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
|
||||
"azure-gpt4-32kTokens": 0,
|
||||
"azure-gpt4-turboTokens": 0,
|
||||
"azure-gpt4oTokens": 0,
|
||||
"azure-gpt5Tokens": 0,
|
||||
"azure-o-seriesTokens": 0,
|
||||
"azure-dall-eTokens": 0,
|
||||
};
|
||||
this.keys.push(newKey);
|
||||
@@ -96,8 +101,14 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
|
||||
|
||||
public get(model: string) {
|
||||
const neededFamily = getAzureOpenAIModelFamily(model);
|
||||
const normalizedModel = model
|
||||
.replace(/^azure-/, "")
|
||||
.replace("gpt-35-turbo", "gpt-3.5-turbo");
|
||||
const availableKeys = this.keys.filter(
|
||||
(k) => !k.isDisabled && k.modelFamilies.includes(neededFamily)
|
||||
(k) =>
|
||||
!k.isDisabled &&
|
||||
k.modelFamilies.includes(neededFamily) &&
|
||||
(!k.modelIds.length || k.modelIds.includes(normalizedModel))
|
||||
);
|
||||
if (availableKeys.length === 0) {
|
||||
throw new PaymentRequiredError(
|
||||
|
||||
@@ -32,10 +32,10 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
|
||||
const isInitialCheck = !key.lastChecked;
|
||||
if (isInitialCheck) {
|
||||
checks = [
|
||||
this.invokeModel("claude-3-haiku@20240307", key, true),
|
||||
this.invokeModel("claude-3-sonnet@20240229", key, true),
|
||||
this.invokeModel("claude-3-opus@20240229", key, true),
|
||||
this.invokeModel("claude-3-5-sonnet@20240620", key, true),
|
||||
this.invokeModel("claude-haiku-4-5@20251001", key, true),
|
||||
this.invokeModel("claude-sonnet-4-5@20250929", key, true),
|
||||
this.invokeModel("claude-opus-4-1@20250805", key, true),
|
||||
this.invokeModel("claude-3-5-haiku@20241022", key, true),
|
||||
];
|
||||
|
||||
const [sonnet, haiku, opus, sonnet35] =
|
||||
@@ -66,13 +66,13 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
|
||||
});
|
||||
} else {
|
||||
if (key.haikuEnabled) {
|
||||
await this.invokeModel("claude-3-haiku@20240307", key, false)
|
||||
await this.invokeModel("claude-haiku-4-5@20251001", key, false)
|
||||
} else if (key.sonnetEnabled) {
|
||||
await this.invokeModel("claude-3-sonnet@20240229", key, false)
|
||||
await this.invokeModel("claude-sonnet-4-5@20250929", key, false)
|
||||
} else if (key.sonnet35Enabled) {
|
||||
await this.invokeModel("claude-3-5-sonnet@20240620", key, false)
|
||||
await this.invokeModel("claude-3-5-haiku@20241022", key, false)
|
||||
} else {
|
||||
await this.invokeModel("claude-3-opus@20240229", key, false)
|
||||
await this.invokeModel("claude-opus-4-1@20250805", key, false)
|
||||
}
|
||||
|
||||
this.updateKey(key.hash, { lastChecked: Date.now() });
|
||||
|
||||
@@ -4,6 +4,7 @@ import { KeyPool } from "./key-pool";
|
||||
/** The request and response format used by a model's API. */
|
||||
export type APIFormat =
|
||||
| "openai"
|
||||
| "openai-responses"
|
||||
| "openai-text"
|
||||
| "openai-image"
|
||||
| "anthropic-chat" // Anthropic's newer messages array format
|
||||
|
||||
@@ -111,7 +111,10 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
|
||||
const familiesArray = [...families];
|
||||
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
|
||||
this.updateKey(key.hash, {
|
||||
modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)),
|
||||
modelIds: models,
|
||||
modelSnapshots: models.filter((m) =>
|
||||
m.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/)
|
||||
),
|
||||
modelFamilies: familiesArray,
|
||||
lastChecked: keyFromPool.lastChecked,
|
||||
});
|
||||
|
||||
@@ -16,6 +16,8 @@ type OpenAIKeyUsage = {
|
||||
export interface OpenAIKey extends Key, OpenAIKeyUsage {
|
||||
readonly service: "openai";
|
||||
modelFamilies: OpenAIModelFamily[];
|
||||
/** Exact model IDs reported by the models API for this key. */
|
||||
modelIds: string[];
|
||||
/**
|
||||
* Some keys are assigned to multiple organizations, each with their own quota
|
||||
* limits. We clone the key for each organization and track usage/disabled
|
||||
@@ -97,6 +99,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
"gpt4" as const,
|
||||
"gpt4-turbo" as const,
|
||||
"gpt4o" as const,
|
||||
"gpt5" as const,
|
||||
"o-series" as const,
|
||||
],
|
||||
isTrial: false,
|
||||
isDisabled: false,
|
||||
@@ -118,8 +122,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
"gpt4-32kTokens": 0,
|
||||
"gpt4-turboTokens": 0,
|
||||
gpt4oTokens: 0,
|
||||
gpt5Tokens: 0,
|
||||
"o-seriesTokens": 0,
|
||||
"dall-eTokens": 0,
|
||||
gpt4Rpm: 0,
|
||||
modelIds: [],
|
||||
modelSnapshots: [],
|
||||
};
|
||||
this.keys.push(newKey);
|
||||
@@ -160,8 +167,10 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
if (model === "gpt-4-32k") model = "gpt-4-32k-0613";
|
||||
|
||||
const neededFamily = getOpenAIModelFamily(model);
|
||||
const excludeTrials = model === "text-embedding-ada-002";
|
||||
const needsSnapshot = model.match(/-\d{4}(-preview)?$/);
|
||||
const excludeTrials = /^text-embedding-(?:3-small|3-large|ada-002)$/.test(
|
||||
model
|
||||
);
|
||||
const needsSnapshot = model.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/);
|
||||
|
||||
const availableKeys = this.keys.filter(
|
||||
// Allow keys which
|
||||
@@ -169,6 +178,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
!key.isDisabled && // are not disabled
|
||||
key.modelFamilies.includes(neededFamily) && // have access to the model family we need
|
||||
(!excludeTrials || !key.isTrial) && // and are not trials if we don't want them
|
||||
(!key.modelIds.length || key.modelIds.includes(model)) && // and have the requested model if exact inventory is available
|
||||
(!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need
|
||||
);
|
||||
|
||||
|
||||
+48
-2
@@ -23,6 +23,8 @@ export type OpenAIModelFamily =
|
||||
| "gpt4-32k"
|
||||
| "gpt4-turbo"
|
||||
| "gpt4o"
|
||||
| "gpt5"
|
||||
| "o-series"
|
||||
| "dall-e";
|
||||
export type AnthropicModelFamily = "claude" | "claude-opus";
|
||||
export type GoogleAIModelFamily = "gemini-pro";
|
||||
@@ -51,6 +53,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
|
||||
"gpt4-32k",
|
||||
"gpt4-turbo",
|
||||
"gpt4o",
|
||||
"gpt5",
|
||||
"o-series",
|
||||
"dall-e",
|
||||
"claude",
|
||||
"claude-opus",
|
||||
@@ -68,6 +72,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
|
||||
"azure-gpt4-32k",
|
||||
"azure-gpt4-turbo",
|
||||
"azure-gpt4o",
|
||||
"azure-gpt5",
|
||||
"azure-o-series",
|
||||
"azure-dall-e",
|
||||
] as const);
|
||||
|
||||
@@ -84,6 +90,10 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
|
||||
] as const);
|
||||
|
||||
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^gpt-5(\\.\\d+)?([-.].+)?$": "gpt5",
|
||||
"^o\\d([-.].+)?$": "o-series",
|
||||
"^computer-use-preview$": "o-series",
|
||||
"^gpt-4\\.1([-.].+)?$": "gpt4o",
|
||||
"^gpt-4o": "gpt4o",
|
||||
"^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
|
||||
"^gpt-4-turbo(-preview)?$": "gpt4-turbo",
|
||||
@@ -94,7 +104,8 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^gpt-4-\\d{4}$": "gpt4",
|
||||
"^gpt-4$": "gpt4",
|
||||
"^gpt-3.5-turbo": "turbo",
|
||||
"^text-embedding-ada-002$": "turbo",
|
||||
"^text-embedding-(ada-002|3-small|3-large)$": "turbo",
|
||||
"^gpt-image-1([-.].+)?$": "dall-e",
|
||||
"^dall-e-\\d{1}$": "dall-e",
|
||||
};
|
||||
|
||||
@@ -106,6 +117,8 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
"gpt4-turbo": "openai",
|
||||
"gpt4-32k": "openai",
|
||||
"gpt4o": "openai",
|
||||
gpt5: "openai",
|
||||
"o-series": "openai",
|
||||
"dall-e": "openai",
|
||||
claude: "anthropic",
|
||||
"claude-opus": "anthropic",
|
||||
@@ -118,6 +131,8 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
"azure-gpt4-32k": "azure",
|
||||
"azure-gpt4-turbo": "azure",
|
||||
"azure-gpt4o": "azure",
|
||||
"azure-gpt5": "azure",
|
||||
"azure-o-series": "azure",
|
||||
"azure-dall-e": "azure",
|
||||
"gemini-pro": "google-ai",
|
||||
"mistral-tiny": "mistral-ai",
|
||||
@@ -150,7 +165,10 @@ export function getGoogleAIModelFamily(_model: string): ModelFamily {
|
||||
}
|
||||
|
||||
export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
|
||||
const prunedModel = model.replace(/-(latest|\d{4})$/, "");
|
||||
const prunedModel = model.replace(
|
||||
/-(latest|\d{4}|\d{6}|\d+\.\d+|v\d+(:\d+)?)$/,
|
||||
""
|
||||
);
|
||||
switch (prunedModel) {
|
||||
case "mistral-tiny":
|
||||
case "mistral-small":
|
||||
@@ -161,7 +179,34 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
|
||||
return "mistral-tiny";
|
||||
case "open-mixtral-8x7b":
|
||||
return "mistral-small";
|
||||
case "ministral-3b":
|
||||
case "ministral-8b":
|
||||
case "mistral-small-3.1":
|
||||
case "mistral-small-3.2":
|
||||
return "mistral-small";
|
||||
case "magistral-medium":
|
||||
return "mistral-medium";
|
||||
case "codestral":
|
||||
case "devstral":
|
||||
case "mistral-large-2":
|
||||
case "mistral-large-3":
|
||||
case "pixtral-large":
|
||||
return "mistral-large";
|
||||
default:
|
||||
if (model.startsWith("mistral-small") || model.startsWith("ministral")) {
|
||||
return "mistral-small";
|
||||
}
|
||||
if (model.startsWith("mistral-medium") || model.startsWith("magistral")) {
|
||||
return "mistral-medium";
|
||||
}
|
||||
if (
|
||||
model.startsWith("mistral-large") ||
|
||||
model.startsWith("pixtral-large") ||
|
||||
model.startsWith("codestral") ||
|
||||
model.startsWith("devstral")
|
||||
) {
|
||||
return "mistral-large";
|
||||
}
|
||||
return "mistral-tiny";
|
||||
}
|
||||
}
|
||||
@@ -225,6 +270,7 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
|
||||
modelFamily = getClaudeModelFamily(model);
|
||||
break;
|
||||
case "openai":
|
||||
case "openai-responses":
|
||||
case "openai-text":
|
||||
case "openai-image":
|
||||
modelFamily = getOpenAIModelFamily(model);
|
||||
|
||||
@@ -10,6 +10,14 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
case "azure-gpt4o":
|
||||
cost = 0.000005;
|
||||
break;
|
||||
case "gpt5":
|
||||
case "azure-gpt5":
|
||||
cost = 0.00001;
|
||||
break;
|
||||
case "o-series":
|
||||
case "azure-o-series":
|
||||
cost = 0.000012;
|
||||
break;
|
||||
case "azure-gpt4-turbo":
|
||||
case "gpt4-turbo":
|
||||
cost = 0.00001;
|
||||
|
||||
@@ -65,7 +65,14 @@ async function getTokenCountForMessages({
|
||||
numTokens += encoder.encode(text.normalize("NFKC"), "all").length;
|
||||
break;
|
||||
case "image":
|
||||
if (part.source.type === "base64") {
|
||||
numTokens += await getImageTokenCount(part.source.data);
|
||||
} else {
|
||||
// Remote image URLs are already hosted elsewhere, so we cannot
|
||||
// inspect dimensions locally. Charge the documented worst-case
|
||||
// token cost instead of undercounting them as zero.
|
||||
numTokens += 1600;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported Anthropic content type.`);
|
||||
|
||||
@@ -179,16 +179,33 @@ export const DALLE_TOKENS_PER_DOLLAR = 100000;
|
||||
* which we convert to tokens at a rate of 100000 tokens per dollar.
|
||||
*/
|
||||
export function getOpenAIImageCost(params: {
|
||||
model: "dall-e-2" | "dall-e-3";
|
||||
quality: "standard" | "hd";
|
||||
resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024";
|
||||
model:
|
||||
| "dall-e-2"
|
||||
| "dall-e-3"
|
||||
| "gpt-image-1"
|
||||
| "gpt-image-1-mini"
|
||||
| "gpt-image-1.5";
|
||||
quality: "auto" | "low" | "medium" | "high" | "standard" | "hd";
|
||||
resolution:
|
||||
| "auto"
|
||||
| "512x512"
|
||||
| "256x256"
|
||||
| "1024x1024"
|
||||
| "1024x1536"
|
||||
| "1536x1024"
|
||||
| "1024x1792"
|
||||
| "1792x1024";
|
||||
n: number | null;
|
||||
}) {
|
||||
const { model, quality, resolution, n } = params;
|
||||
const normalizedResolution =
|
||||
resolution === "auto" ? "1024x1024" : resolution;
|
||||
const normalizedQuality =
|
||||
quality === "hd" || quality === "high" ? "hd" : "standard";
|
||||
const usd = (() => {
|
||||
switch (model) {
|
||||
case "dall-e-2":
|
||||
switch (resolution) {
|
||||
switch (normalizedResolution) {
|
||||
case "512x512":
|
||||
return 0.018;
|
||||
case "256x256":
|
||||
@@ -199,12 +216,20 @@ export function getOpenAIImageCost(params: {
|
||||
throw new Error("Invalid resolution");
|
||||
}
|
||||
case "dall-e-3":
|
||||
switch (resolution) {
|
||||
case "gpt-image-1.5":
|
||||
case "gpt-image-1":
|
||||
case "gpt-image-1-mini":
|
||||
// GPT Image models have newer parameter ranges, but we still account
|
||||
// for them using the existing DALL-E 3-style price buckets so the
|
||||
// proxy can continue to enforce rough quota/cost limits.
|
||||
switch (normalizedResolution) {
|
||||
case "1024x1024":
|
||||
return quality === "standard" ? 0.04 : 0.08;
|
||||
return normalizedQuality === "standard" ? 0.04 : 0.08;
|
||||
case "1024x1536":
|
||||
case "1536x1024":
|
||||
case "1024x1792":
|
||||
case "1792x1024":
|
||||
return quality === "standard" ? 0.08 : 0.12;
|
||||
return normalizedQuality === "standard" ? 0.08 : 0.12;
|
||||
default:
|
||||
throw new Error("Invalid resolution");
|
||||
}
|
||||
@@ -233,7 +258,10 @@ export function estimateGoogleAITokenCount(
|
||||
let numTokens = 0;
|
||||
for (const message of prompt) {
|
||||
numTokens += tokensPerMessage;
|
||||
numTokens += encoder.encode(message.parts[0].text).length;
|
||||
const text = message.parts
|
||||
.map((part) => ("text" in part ? part.text : ""))
|
||||
.join("\n");
|
||||
numTokens += encoder.encode(text).length;
|
||||
}
|
||||
|
||||
numTokens += 3;
|
||||
|
||||
@@ -55,7 +55,7 @@ type MistralAIChatTokenCountRequest = {
|
||||
type FlatPromptTokenCountRequest = {
|
||||
prompt: string;
|
||||
completion?: never;
|
||||
service: "openai-text" | "anthropic-text" | "google-ai";
|
||||
service: "openai-text" | "openai-responses" | "anthropic-text" | "google-ai";
|
||||
};
|
||||
|
||||
type StringCompletionTokenCountRequest = {
|
||||
@@ -105,6 +105,7 @@ export async function countTokens({
|
||||
tokenization_duration_ms: getElapsedMs(time),
|
||||
};
|
||||
case "openai":
|
||||
case "openai-responses":
|
||||
case "openai-text":
|
||||
return {
|
||||
...(await getOpenAITokenCount(prompt ?? completion, req.body.model)),
|
||||
|
||||
@@ -400,6 +400,7 @@ function getModelFamilyForQuotaUsage(
|
||||
|
||||
switch (api) {
|
||||
case "openai":
|
||||
case "openai-responses":
|
||||
case "openai-text":
|
||||
case "openai-image":
|
||||
return getOpenAIModelFamily(model);
|
||||
|
||||
Reference in New Issue
Block a user