diff --git a/src/info-page.ts b/src/info-page.ts index 11afed4..7e0443f 100644 --- a/src/info-page.ts +++ b/src/info-page.ts @@ -40,6 +40,10 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = { gpt41: "GPT-4.1", "gpt41-mini": "GPT-4.1 Mini", "gpt41-nano": "GPT-4.1 Nano", + gpt5: "GPT-5", + "gpt5-mini": "GPT-5 Mini", + "gpt5-nano": "GPT-5 Nano", + "gpt5-chat-latest": "GPT-5 Chat Latest", gpt45: "GPT-4.5", o1: "OpenAI o1", "o1-mini": "OpenAI o1 mini", @@ -77,6 +81,10 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = { "azure-gpt41": "Azure GPT-4.1", "azure-gpt41-mini": "Azure GPT-4.1 Mini", "azure-gpt41-nano": "Azure GPT-4.1 Nano", + "azure-gpt5": "Azure GPT-5", + "azure-gpt5-mini": "Azure GPT-5 Mini", + "azure-gpt5-nano": "Azure GPT-5 Nano", + "azure-gpt5-chat-latest": "Azure GPT-5 Chat Latest", "azure-o1": "Azure o1", "azure-o1-mini": "Azure o1 mini", "azure-o1-pro": "Azure o1 pro", diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts index cb6812f..b65d2a1 100644 --- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts +++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts @@ -67,6 +67,14 @@ export const validateContextSize: RequestPreprocessor = async (req) => { modelMax = 1000000; } else if (model.match(/^gpt-4\.1-nano(-\d{4}-\d{2}-\d{2})?$/)) { modelMax = 1000000; + } else if (model.match(/^gpt-5(-\d{4}-\d{2}-\d{2})?$/)) { + modelMax = 400000; + } else if (model.match(/^gpt-5-mini(-\d{4}-\d{2}-\d{2})?$/)) { + modelMax = 400000; + } else if (model.match(/^gpt-5-nano(-\d{4}-\d{2}-\d{2})?$/)) { + modelMax = 400000; + } else if (model.match(/^gpt-5-chat-latest$/)) { + modelMax = 400000; } else if (model.match(/^chatgpt-4o/)) { modelMax = 128000; } else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) { diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts index 1ec586e..14168eb 100644 --- a/src/proxy/openai.ts +++ b/src/proxy/openai.ts @@ -1,5 +1,6 @@ import { Request, RequestHandler, Router } from "express"; import { config } from "../config"; +import { BadRequestError } from "../shared/errors"; import { AzureOpenAIKey, keyPool, OpenAIKey } from "../shared/key-management"; import { getOpenAIModelFamily } from "../shared/models"; import { ipLimiter } from "./rate-limit"; @@ -378,8 +379,8 @@ openaiRouter.post( { inApi: "openai", outApi: "openai", service: "openai" }, { afterTransform: [ - fixupMaxTokens, - setO1ReasoningEffort, + fixupMaxTokens, + filterGPT5UnsupportedParams, routeToResponsesApi ] } @@ -412,7 +413,7 @@ openaiRouter.post( ipLimiter, createPreprocessorMiddleware( { inApi: "openai", outApi: "openai-responses", service: "openai" }, - { afterTransform: [fixupMaxTokens, setO1ReasoningEffort] } + { afterTransform: [fixupMaxTokens, filterGPT5UnsupportedParams] } ), // Add final check to ensure the body is in the correct format for Responses API (req, _res, next) => { @@ -472,27 +473,26 @@ function fixupMaxTokens(req: Request) { } } -// Models that support 'reasoning_effort' -function isO1Model(model: string): boolean { - // Match any o-series model: - // - starts with 'o' followed by number (o1, o3, o4, etc.) - // - optionally followed by suffix like -mini or -preview - // - optionally followed by a date stamp - // Also match codex-mini models which support reasoning - return /^o\d+(-mini|-pro|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model) || - /^codex-mini(-latest|-\d{4}-\d{2}-\d{2})?$/.test(model); -} - -// most frontends don't currently support custom reasoning effort for o1 -// so we do this to overwrite the default (medium) -function setO1ReasoningEffort(req: Request) { - const effort = process.env.O1_REASONING_EFFORT?.toLowerCase(); - if (!effort || !isO1Model(req.body.model) || req.body.reasoning_effort) return; +// GPT-5, GPT-5-mini, and GPT-5-nano don't support certain parameters +// Remove them if present to prevent API errors +function filterGPT5UnsupportedParams(req: Request) { + const model = req.body.model; - if (['low', 'medium', 'high'].includes(effort)) { - req.body.reasoning_effort = effort; + // Only apply filtering to these specific models (gpt5-chat-latest supports all params) + const restrictedModels = /^gpt-5(-mini|-nano)?(-\d{4}-\d{2}-\d{2})?$/; + + if (!restrictedModels.test(model)) { + return; // Not a restricted model, no filtering needed + } + + // Remove unsupported parameters if they exist + const unsupportedParams = ['temperature', 'top_p', 'presence_penalty', 'frequency_penalty']; + + for (const param of unsupportedParams) { + if (req.body[param] !== undefined) { + delete req.body[param]; + } } } - export const openai = openaiRouter; diff --git a/src/service-info.ts b/src/service-info.ts index 19f1dfa..ca79b60 100644 --- a/src/service-info.ts +++ b/src/service-info.ts @@ -37,6 +37,51 @@ import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue"; const CACHE_TTL = 2000; +// Define the preferred order for model families in the service info display +// This ensures logical grouping (GPT-4 models together, then GPT-4.1, then GPT-5, etc.) +const MODEL_FAMILY_ORDER: ModelFamily[] = [ + // OpenAI models in logical order + "turbo", + "gpt4", + "gpt4-32k", + "gpt4-turbo", + "gpt4o", + "gpt41", + "gpt41-mini", + "gpt41-nano", + "gpt45", + "gpt5", + "gpt5-mini", + "gpt5-nano", + "gpt5-chat-latest", + "o1", + "o1-mini", + "o1-pro", + "o3", + "o3-mini", + "o3-pro", + "o4-mini", + "codex-mini", + "dall-e", + "gpt-image", + // Other services + "claude", + "claude-opus", + "gemini-pro", + "gemini-flash", + "mistral-tiny", + "mistral-small", + "mistral-medium", + "mistral-large", + "aws-claude", + "aws-claude-opus", + "deepseek", + "xai", + "cohere", + "qwen", + "moonshot" +]; + type KeyPoolKey = ReturnType[0]; const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey => k.service === "openai"; @@ -323,16 +368,18 @@ function getServiceModelStats(accessibleFamilies: Set) { if (!hasKeys) continue; serviceInfo[`${service}Keys`] = hasKeys; - accessibleFamilies.forEach((f) => { - if (MODEL_FAMILY_SERVICE[f] === service) { - modelFamilyInfo[f] = getInfoForFamily(f); - } - }); if (service === "openai" && config.checkKeys) { serviceInfo.openaiOrgs = getUniqueOpenAIOrgs(keyPool.list()); } } + + // Build model family info in the defined order for logical grouping + for (const family of MODEL_FAMILY_ORDER) { + if (accessibleFamilies.has(family)) { + modelFamilyInfo[family] = getInfoForFamily(family); + } + } return { serviceInfo, modelFamilyInfo }; } diff --git a/src/shared/api-schemas/openai.ts b/src/shared/api-schemas/openai.ts index 6d267cb..a232684 100644 --- a/src/shared/api-schemas/openai.ts +++ b/src/shared/api-schemas/openai.ts @@ -77,7 +77,8 @@ export const OpenAIV1ChatCompletionSchema = z functions: z.array(z.any()).optional(), tool_choice: z.any().optional(), function_choice: z.any().optional(), - reasoning_effort: z.enum(["low", "medium", "high"]).optional(), + reasoning_effort: z.enum(["minimal", "low", "medium", "high"]).optional(), + verbosity: z.enum(["low", "medium", "high"]).optional(), response_format: z.any(), }) // Tool usage must be enabled via config because we currently have no way to diff --git a/src/shared/key-management/openai/provider.ts b/src/shared/key-management/openai/provider.ts index 76bc858..c15d6fc 100644 --- a/src/shared/key-management/openai/provider.ts +++ b/src/shared/key-management/openai/provider.ts @@ -88,6 +88,13 @@ export class OpenAIKeyProvider implements KeyProvider { "gpt4-turbo" as const, "gpt4o" as const, "gpt45" as const, + "gpt41" as const, + "gpt41-mini" as const, + "gpt41-nano" as const, + "gpt5" as const, + "gpt5-mini" as const, + "gpt5-nano" as const, + "gpt5-chat-latest" as const, ], isTrial: false, isDisabled: false, diff --git a/src/shared/models.ts b/src/shared/models.ts index 15cd23c..2c35244 100644 --- a/src/shared/models.ts +++ b/src/shared/models.ts @@ -31,6 +31,10 @@ export type OpenAIModelFamily = | "gpt41-mini" | "gpt41-nano" | "gpt45" + | "gpt5" + | "gpt5-mini" + | "gpt5-nano" + | "gpt5-chat-latest" | "o1" | "o1-mini" | "o1-pro" @@ -92,6 +96,10 @@ export const MODEL_FAMILIES = (( "gpt41", "gpt41-mini", "gpt41-nano", + "gpt5", + "gpt5-mini", + "gpt5-nano", + "gpt5-chat-latest", "o1", "o1-mini", "o1-pro", @@ -128,6 +136,10 @@ export const MODEL_FAMILIES = (( "azure-gpt41", "azure-gpt41-mini", "azure-gpt41-nano", + "azure-gpt5", + "azure-gpt5-mini", + "azure-gpt5-nano", + "azure-gpt5-chat-latest", "azure-dall-e", "azure-o1", "azure-o1-mini", @@ -174,6 +186,10 @@ export const MODEL_FAMILY_SERVICE: { gpt41: "openai", "gpt41-mini": "openai", "gpt41-nano": "openai", + gpt5: "openai", + "gpt5-mini": "openai", + "gpt5-nano": "openai", + "gpt5-chat-latest": "openai", "o1": "openai", "o1-mini": "openai", "o1-pro": "openai", @@ -203,6 +219,10 @@ export const MODEL_FAMILY_SERVICE: { "azure-gpt41": "azure", "azure-gpt41-mini": "azure", "azure-gpt41-nano": "azure", + "azure-gpt5": "azure", + "azure-gpt5-mini": "azure", + "azure-gpt5-nano": "azure", + "azure-gpt5-chat-latest": "azure", "azure-dall-e": "azure", "azure-o1": "azure", "azure-o1-mini": "azure", @@ -226,6 +246,10 @@ export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e", "gpt-i export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = { "^gpt-image(-\\d+)?(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt-image", + "^gpt-5(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5", + "^gpt-5-mini(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5-mini", + "^gpt-5-nano(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5-nano", + "^gpt-5-chat-latest$": "gpt5-chat-latest", "^gpt-4\\.5(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt45", "^gpt-4\\.1(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41", "^gpt-4\\.1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41-mini", diff --git a/src/shared/stats.ts b/src/shared/stats.ts index b2d2195..ec86df5 100644 --- a/src/shared/stats.ts +++ b/src/shared/stats.ts @@ -11,6 +11,14 @@ const MODEL_PRICING: Record