This commit is contained in:
reanon
2025-08-07 21:03:03 +02:00
parent 9cc86c2d68
commit c8dab8786a
8 changed files with 131 additions and 28 deletions
+8
View File
@@ -40,6 +40,10 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
gpt41: "GPT-4.1",
"gpt41-mini": "GPT-4.1 Mini",
"gpt41-nano": "GPT-4.1 Nano",
gpt5: "GPT-5",
"gpt5-mini": "GPT-5 Mini",
"gpt5-nano": "GPT-5 Nano",
"gpt5-chat-latest": "GPT-5 Chat Latest",
gpt45: "GPT-4.5",
o1: "OpenAI o1",
"o1-mini": "OpenAI o1 mini",
@@ -77,6 +81,10 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
"azure-gpt41": "Azure GPT-4.1",
"azure-gpt41-mini": "Azure GPT-4.1 Mini",
"azure-gpt41-nano": "Azure GPT-4.1 Nano",
"azure-gpt5": "Azure GPT-5",
"azure-gpt5-mini": "Azure GPT-5 Mini",
"azure-gpt5-nano": "Azure GPT-5 Nano",
"azure-gpt5-chat-latest": "Azure GPT-5 Chat Latest",
"azure-o1": "Azure o1",
"azure-o1-mini": "Azure o1 mini",
"azure-o1-pro": "Azure o1 pro",
@@ -67,6 +67,14 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
modelMax = 1000000;
} else if (model.match(/^gpt-4\.1-nano(-\d{4}-\d{2}-\d{2})?$/)) {
modelMax = 1000000;
} else if (model.match(/^gpt-5(-\d{4}-\d{2}-\d{2})?$/)) {
modelMax = 400000;
} else if (model.match(/^gpt-5-mini(-\d{4}-\d{2}-\d{2})?$/)) {
modelMax = 400000;
} else if (model.match(/^gpt-5-nano(-\d{4}-\d{2}-\d{2})?$/)) {
modelMax = 400000;
} else if (model.match(/^gpt-5-chat-latest$/)) {
modelMax = 400000;
} else if (model.match(/^chatgpt-4o/)) {
modelMax = 128000;
} else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
+22 -22
View File
@@ -1,5 +1,6 @@
import { Request, RequestHandler, Router } from "express";
import { config } from "../config";
import { BadRequestError } from "../shared/errors";
import { AzureOpenAIKey, keyPool, OpenAIKey } from "../shared/key-management";
import { getOpenAIModelFamily } from "../shared/models";
import { ipLimiter } from "./rate-limit";
@@ -378,8 +379,8 @@ openaiRouter.post(
{ inApi: "openai", outApi: "openai", service: "openai" },
{
afterTransform: [
fixupMaxTokens,
setO1ReasoningEffort,
fixupMaxTokens,
filterGPT5UnsupportedParams,
routeToResponsesApi
]
}
@@ -412,7 +413,7 @@ openaiRouter.post(
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "openai", outApi: "openai-responses", service: "openai" },
{ afterTransform: [fixupMaxTokens, setO1ReasoningEffort] }
{ afterTransform: [fixupMaxTokens, filterGPT5UnsupportedParams] }
),
// Add final check to ensure the body is in the correct format for Responses API
(req, _res, next) => {
@@ -472,27 +473,26 @@ function fixupMaxTokens(req: Request) {
}
}
// Models that support 'reasoning_effort'
function isO1Model(model: string): boolean {
// Match any o-series model:
// - starts with 'o' followed by number (o1, o3, o4, etc.)
// - optionally followed by suffix like -mini or -preview
// - optionally followed by a date stamp
// Also match codex-mini models which support reasoning
return /^o\d+(-mini|-pro|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model) ||
/^codex-mini(-latest|-\d{4}-\d{2}-\d{2})?$/.test(model);
}
// most frontends don't currently support custom reasoning effort for o1
// so we do this to overwrite the default (medium)
function setO1ReasoningEffort(req: Request) {
const effort = process.env.O1_REASONING_EFFORT?.toLowerCase();
if (!effort || !isO1Model(req.body.model) || req.body.reasoning_effort) return;
// GPT-5, GPT-5-mini, and GPT-5-nano don't support certain parameters
// Remove them if present to prevent API errors
function filterGPT5UnsupportedParams(req: Request) {
const model = req.body.model;
if (['low', 'medium', 'high'].includes(effort)) {
req.body.reasoning_effort = effort;
// Only apply filtering to these specific models (gpt5-chat-latest supports all params)
const restrictedModels = /^gpt-5(-mini|-nano)?(-\d{4}-\d{2}-\d{2})?$/;
if (!restrictedModels.test(model)) {
return; // Not a restricted model, no filtering needed
}
// Remove unsupported parameters if they exist
const unsupportedParams = ['temperature', 'top_p', 'presence_penalty', 'frequency_penalty'];
for (const param of unsupportedParams) {
if (req.body[param] !== undefined) {
delete req.body[param];
}
}
}
export const openai = openaiRouter;
+52 -5
View File
@@ -37,6 +37,51 @@ import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";
const CACHE_TTL = 2000;
// Define the preferred order for model families in the service info display
// This ensures logical grouping (GPT-4 models together, then GPT-4.1, then GPT-5, etc.)
const MODEL_FAMILY_ORDER: ModelFamily[] = [
// OpenAI models in logical order
"turbo",
"gpt4",
"gpt4-32k",
"gpt4-turbo",
"gpt4o",
"gpt41",
"gpt41-mini",
"gpt41-nano",
"gpt45",
"gpt5",
"gpt5-mini",
"gpt5-nano",
"gpt5-chat-latest",
"o1",
"o1-mini",
"o1-pro",
"o3",
"o3-mini",
"o3-pro",
"o4-mini",
"codex-mini",
"dall-e",
"gpt-image",
// Other services
"claude",
"claude-opus",
"gemini-pro",
"gemini-flash",
"mistral-tiny",
"mistral-small",
"mistral-medium",
"mistral-large",
"aws-claude",
"aws-claude-opus",
"deepseek",
"xai",
"cohere",
"qwen",
"moonshot"
];
type KeyPoolKey = ReturnType<typeof keyPool.list>[0];
const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey =>
k.service === "openai";
@@ -323,16 +368,18 @@ function getServiceModelStats(accessibleFamilies: Set<ModelFamily>) {
if (!hasKeys) continue;
serviceInfo[`${service}Keys`] = hasKeys;
accessibleFamilies.forEach((f) => {
if (MODEL_FAMILY_SERVICE[f] === service) {
modelFamilyInfo[f] = getInfoForFamily(f);
}
});
if (service === "openai" && config.checkKeys) {
serviceInfo.openaiOrgs = getUniqueOpenAIOrgs(keyPool.list());
}
}
// Build model family info in the defined order for logical grouping
for (const family of MODEL_FAMILY_ORDER) {
if (accessibleFamilies.has(family)) {
modelFamilyInfo[family] = getInfoForFamily(family);
}
}
return { serviceInfo, modelFamilyInfo };
}
+2 -1
View File
@@ -77,7 +77,8 @@ export const OpenAIV1ChatCompletionSchema = z
functions: z.array(z.any()).optional(),
tool_choice: z.any().optional(),
function_choice: z.any().optional(),
reasoning_effort: z.enum(["low", "medium", "high"]).optional(),
reasoning_effort: z.enum(["minimal", "low", "medium", "high"]).optional(),
verbosity: z.enum(["low", "medium", "high"]).optional(),
response_format: z.any(),
})
// Tool usage must be enabled via config because we currently have no way to
@@ -88,6 +88,13 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
"gpt4-turbo" as const,
"gpt4o" as const,
"gpt45" as const,
"gpt41" as const,
"gpt41-mini" as const,
"gpt41-nano" as const,
"gpt5" as const,
"gpt5-mini" as const,
"gpt5-nano" as const,
"gpt5-chat-latest" as const,
],
isTrial: false,
isDisabled: false,
+24
View File
@@ -31,6 +31,10 @@ export type OpenAIModelFamily =
| "gpt41-mini"
| "gpt41-nano"
| "gpt45"
| "gpt5"
| "gpt5-mini"
| "gpt5-nano"
| "gpt5-chat-latest"
| "o1"
| "o1-mini"
| "o1-pro"
@@ -92,6 +96,10 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"gpt41",
"gpt41-mini",
"gpt41-nano",
"gpt5",
"gpt5-mini",
"gpt5-nano",
"gpt5-chat-latest",
"o1",
"o1-mini",
"o1-pro",
@@ -128,6 +136,10 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"azure-gpt41",
"azure-gpt41-mini",
"azure-gpt41-nano",
"azure-gpt5",
"azure-gpt5-mini",
"azure-gpt5-nano",
"azure-gpt5-chat-latest",
"azure-dall-e",
"azure-o1",
"azure-o1-mini",
@@ -174,6 +186,10 @@ export const MODEL_FAMILY_SERVICE: {
gpt41: "openai",
"gpt41-mini": "openai",
"gpt41-nano": "openai",
gpt5: "openai",
"gpt5-mini": "openai",
"gpt5-nano": "openai",
"gpt5-chat-latest": "openai",
"o1": "openai",
"o1-mini": "openai",
"o1-pro": "openai",
@@ -203,6 +219,10 @@ export const MODEL_FAMILY_SERVICE: {
"azure-gpt41": "azure",
"azure-gpt41-mini": "azure",
"azure-gpt41-nano": "azure",
"azure-gpt5": "azure",
"azure-gpt5-mini": "azure",
"azure-gpt5-nano": "azure",
"azure-gpt5-chat-latest": "azure",
"azure-dall-e": "azure",
"azure-o1": "azure",
"azure-o1-mini": "azure",
@@ -226,6 +246,10 @@ export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e", "gpt-i
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-image(-\\d+)?(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt-image",
"^gpt-5(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5",
"^gpt-5-mini(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5-mini",
"^gpt-5-nano(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5-nano",
"^gpt-5-chat-latest$": "gpt5-chat-latest",
"^gpt-4\\.5(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt45",
"^gpt-4\\.1(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41",
"^gpt-4\\.1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41-mini",
+8
View File
@@ -11,6 +11,14 @@ const MODEL_PRICING: Record<ModelFamily, { input: number; output: number } | und
"azure-gpt41-mini": { input: 0.40, output: 1.60 },
"gpt41-nano": { input: 0.10, output: 0.40 },
"azure-gpt41-nano": { input: 0.10, output: 0.40 },
"gpt5": { input: 1.25, output: 10.00 },
"azure-gpt5": { input: 1.25, output: 10.00 },
"gpt5-mini": { input: 0.25, output: 2.00 },
"azure-gpt5-mini": { input: 0.25, output: 2.00 },
"gpt5-nano": { input: 0.05, output: 0.40 },
"azure-gpt5-nano": { input: 0.05, output: 0.40 },
"gpt5-chat-latest": { input: 1.25, output: 10.00 },
"azure-gpt5-chat-latest": { input: 1.25, output: 10.00 },
"gpt45": { input: 75.00, output: 150.00 }, // Example, needs verification if this model family is still current with this pricing
"azure-gpt45": { input: 75.00, output: 150.00 }, // Example, needs verification
"gpt4o": { input: 2.50, output: 10.00 },