gpt5

2025-08-07 21:03:03 +02:00
parent 9cc86c2d68
commit c8dab8786a
8 changed files with 131 additions and 28 deletions
@@ -40,6 +40,10 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  gpt41: "GPT-4.1",
  "gpt41-mini": "GPT-4.1 Mini",
  "gpt41-nano": "GPT-4.1 Nano",
+  gpt5: "GPT-5",
+  "gpt5-mini": "GPT-5 Mini",
+  "gpt5-nano": "GPT-5 Nano",
+  "gpt5-chat-latest": "GPT-5 Chat Latest",
  gpt45: "GPT-4.5",
  o1: "OpenAI o1",
  "o1-mini": "OpenAI o1 mini",
@@ -77,6 +81,10 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "azure-gpt41": "Azure GPT-4.1",
  "azure-gpt41-mini": "Azure GPT-4.1 Mini",
  "azure-gpt41-nano": "Azure GPT-4.1 Nano",
+  "azure-gpt5": "Azure GPT-5",
+  "azure-gpt5-mini": "Azure GPT-5 Mini",
+  "azure-gpt5-nano": "Azure GPT-5 Nano",
+  "azure-gpt5-chat-latest": "Azure GPT-5 Chat Latest",
  "azure-o1": "Azure o1",
  "azure-o1-mini": "Azure o1 mini",
  "azure-o1-pro": "Azure o1 pro",
@@ -67,6 +67,14 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 1000000;
  } else if (model.match(/^gpt-4\.1-nano(-\d{4}-\d{2}-\d{2})?$/)) {
    modelMax = 1000000;
+  } else if (model.match(/^gpt-5(-\d{4}-\d{2}-\d{2})?$/)) {
+    modelMax = 400000;
+  } else if (model.match(/^gpt-5-mini(-\d{4}-\d{2}-\d{2})?$/)) {
+    modelMax = 400000;
+  } else if (model.match(/^gpt-5-nano(-\d{4}-\d{2}-\d{2})?$/)) {
+    modelMax = 400000;
+  } else if (model.match(/^gpt-5-chat-latest$/)) {
+    modelMax = 400000;
  } else if (model.match(/^chatgpt-4o/)) {
    modelMax = 128000;
  } else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
@@ -1,5 +1,6 @@
 import { Request, RequestHandler, Router } from "express";
 import { config } from "../config";
+import { BadRequestError } from "../shared/errors";
 import { AzureOpenAIKey, keyPool, OpenAIKey } from "../shared/key-management";
 import { getOpenAIModelFamily } from "../shared/models";
 import { ipLimiter } from "./rate-limit";
@@ -378,8 +379,8 @@ openaiRouter.post(
    { inApi: "openai", outApi: "openai", service: "openai" },
    { 
      afterTransform: [
-        fixupMaxTokens, 
-        setO1ReasoningEffort,
+        fixupMaxTokens,
+        filterGPT5UnsupportedParams,
        routeToResponsesApi
      ] 
    }
@@ -412,7 +413,7 @@ openaiRouter.post(
  ipLimiter,
  createPreprocessorMiddleware(
    { inApi: "openai", outApi: "openai-responses", service: "openai" },
-    { afterTransform: [fixupMaxTokens, setO1ReasoningEffort] }
+    { afterTransform: [fixupMaxTokens, filterGPT5UnsupportedParams] }
  ),
  // Add final check to ensure the body is in the correct format for Responses API
  (req, _res, next) => {
@@ -472,27 +473,26 @@ function fixupMaxTokens(req: Request) {
  }
 }

-// Models that support 'reasoning_effort'
-function isO1Model(model: string): boolean {
-  // Match any o-series model: 
-  // - starts with 'o' followed by number (o1, o3, o4, etc.)
-  // - optionally followed by suffix like -mini or -preview
-  // - optionally followed by a date stamp
-  // Also match codex-mini models which support reasoning
-  return /^o\d+(-mini|-pro|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model) ||
-         /^codex-mini(-latest|-\d{4}-\d{2}-\d{2})?$/.test(model);
-}
-
-// most frontends don't currently support custom reasoning effort for o1
-// so we do this to overwrite the default (medium)
-function setO1ReasoningEffort(req: Request) {
-  const effort = process.env.O1_REASONING_EFFORT?.toLowerCase();
-  if (!effort || !isO1Model(req.body.model) || req.body.reasoning_effort) return;
+// GPT-5, GPT-5-mini, and GPT-5-nano don't support certain parameters
+// Remove them if present to prevent API errors
+function filterGPT5UnsupportedParams(req: Request) {
+  const model = req.body.model;
  
-  if (['low', 'medium', 'high'].includes(effort)) {
-    req.body.reasoning_effort = effort;
+  // Only apply filtering to these specific models (gpt5-chat-latest supports all params)
+  const restrictedModels = /^gpt-5(-mini|-nano)?(-\d{4}-\d{2}-\d{2})?$/;
+  
+  if (!restrictedModels.test(model)) {
+    return; // Not a restricted model, no filtering needed
+  }
+  
+  // Remove unsupported parameters if they exist
+  const unsupportedParams = ['temperature', 'top_p', 'presence_penalty', 'frequency_penalty'];
+  
+  for (const param of unsupportedParams) {
+    if (req.body[param] !== undefined) {
+      delete req.body[param];
+    }
  }
 }

-
 export const openai = openaiRouter;
@@ -37,6 +37,51 @@ import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";

 const CACHE_TTL = 2000;

+// Define the preferred order for model families in the service info display
+// This ensures logical grouping (GPT-4 models together, then GPT-4.1, then GPT-5, etc.)
+const MODEL_FAMILY_ORDER: ModelFamily[] = [
+  // OpenAI models in logical order
+  "turbo",
+  "gpt4",
+  "gpt4-32k", 
+  "gpt4-turbo",
+  "gpt4o",
+  "gpt41",
+  "gpt41-mini",
+  "gpt41-nano",
+  "gpt45",
+  "gpt5",
+  "gpt5-mini",
+  "gpt5-nano",
+  "gpt5-chat-latest",
+  "o1",
+  "o1-mini",
+  "o1-pro",
+  "o3",
+  "o3-mini",
+  "o3-pro",
+  "o4-mini",
+  "codex-mini",
+  "dall-e",
+  "gpt-image",
+  // Other services
+  "claude",
+  "claude-opus",
+  "gemini-pro",
+  "gemini-flash",
+  "mistral-tiny",
+  "mistral-small",
+  "mistral-medium",
+  "mistral-large",
+  "aws-claude",
+  "aws-claude-opus",
+  "deepseek",
+  "xai",
+  "cohere",
+  "qwen",
+  "moonshot"
+];
+
 type KeyPoolKey = ReturnType<typeof keyPool.list>[0];
 const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey =>
  k.service === "openai";
@@ -323,16 +368,18 @@ function getServiceModelStats(accessibleFamilies: Set<ModelFamily>) {
    if (!hasKeys) continue;

    serviceInfo[`${service}Keys`] = hasKeys;
-    accessibleFamilies.forEach((f) => {
-      if (MODEL_FAMILY_SERVICE[f] === service) {
-        modelFamilyInfo[f] = getInfoForFamily(f);
-      }
-    });

    if (service === "openai" && config.checkKeys) {
      serviceInfo.openaiOrgs = getUniqueOpenAIOrgs(keyPool.list());
    }
  }
+
+  // Build model family info in the defined order for logical grouping
+  for (const family of MODEL_FAMILY_ORDER) {
+    if (accessibleFamilies.has(family)) {
+      modelFamilyInfo[family] = getInfoForFamily(family);
+    }
+  }
  return { serviceInfo, modelFamilyInfo };
 }

@@ -77,7 +77,8 @@ export const OpenAIV1ChatCompletionSchema = z
    functions: z.array(z.any()).optional(),
    tool_choice: z.any().optional(),
    function_choice: z.any().optional(),
-    reasoning_effort: z.enum(["low", "medium", "high"]).optional(),
+    reasoning_effort: z.enum(["minimal", "low", "medium", "high"]).optional(),
+    verbosity: z.enum(["low", "medium", "high"]).optional(),
    response_format: z.any(),
  })
  // Tool usage must be enabled via config because we currently have no way to
@@ -88,6 +88,13 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
          "gpt4-turbo" as const,
          "gpt4o" as const,
          "gpt45" as const,
+          "gpt41" as const,
+          "gpt41-mini" as const,
+          "gpt41-nano" as const,
+          "gpt5" as const,
+          "gpt5-mini" as const,
+          "gpt5-nano" as const,
+          "gpt5-chat-latest" as const,
        ],
        isTrial: false,
        isDisabled: false,
@@ -31,6 +31,10 @@ export type OpenAIModelFamily =
  | "gpt41-mini"
  | "gpt41-nano"
  | "gpt45"
+  | "gpt5"
+  | "gpt5-mini"
+  | "gpt5-nano"
+  | "gpt5-chat-latest"
  | "o1"
  | "o1-mini"
  | "o1-pro"
@@ -92,6 +96,10 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "gpt41",
  "gpt41-mini",
  "gpt41-nano",
+  "gpt5",
+  "gpt5-mini",
+  "gpt5-nano",
+  "gpt5-chat-latest",
  "o1",
  "o1-mini",
  "o1-pro",
@@ -128,6 +136,10 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "azure-gpt41",
  "azure-gpt41-mini",
  "azure-gpt41-nano",
+  "azure-gpt5",
+  "azure-gpt5-mini",
+  "azure-gpt5-nano",
+  "azure-gpt5-chat-latest",
  "azure-dall-e",
  "azure-o1",
  "azure-o1-mini",
@@ -174,6 +186,10 @@ export const MODEL_FAMILY_SERVICE: {
  gpt41: "openai",
  "gpt41-mini": "openai",
  "gpt41-nano": "openai",
+  gpt5: "openai",
+  "gpt5-mini": "openai",
+  "gpt5-nano": "openai",
+  "gpt5-chat-latest": "openai",
  "o1": "openai",
  "o1-mini": "openai",
  "o1-pro": "openai",
@@ -203,6 +219,10 @@ export const MODEL_FAMILY_SERVICE: {
  "azure-gpt41": "azure",
  "azure-gpt41-mini": "azure",
  "azure-gpt41-nano": "azure",
+  "azure-gpt5": "azure",
+  "azure-gpt5-mini": "azure",
+  "azure-gpt5-nano": "azure",
+  "azure-gpt5-chat-latest": "azure",
  "azure-dall-e": "azure",
  "azure-o1": "azure",
  "azure-o1-mini": "azure",
@@ -226,6 +246,10 @@ export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e", "gpt-i

 export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^gpt-image(-\\d+)?(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt-image",
+  "^gpt-5(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5",
+  "^gpt-5-mini(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5-mini",
+  "^gpt-5-nano(-\\d{4}-\\d{2}-\\d{2})?$": "gpt5-nano",
+  "^gpt-5-chat-latest$": "gpt5-chat-latest",
  "^gpt-4\\.5(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt45",
  "^gpt-4\\.1(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41",
  "^gpt-4\\.1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41-mini",
@@ -11,6 +11,14 @@ const MODEL_PRICING: Record<ModelFamily, { input: number; output: number } | und
  "azure-gpt41-mini": { input: 0.40, output: 1.60 },
  "gpt41-nano": { input: 0.10, output: 0.40 },
  "azure-gpt41-nano": { input: 0.10, output: 0.40 },
+  "gpt5": { input: 1.25, output: 10.00 },
+  "azure-gpt5": { input: 1.25, output: 10.00 },
+  "gpt5-mini": { input: 0.25, output: 2.00 },
+  "azure-gpt5-mini": { input: 0.25, output: 2.00 },
+  "gpt5-nano": { input: 0.05, output: 0.40 },
+  "azure-gpt5-nano": { input: 0.05, output: 0.40 },
+  "gpt5-chat-latest": { input: 1.25, output: 10.00 },
+  "azure-gpt5-chat-latest": { input: 1.25, output: 10.00 },
  "gpt45": { input: 75.00, output: 150.00 }, // Example, needs verification if this model family is still current with this pricing
  "azure-gpt45": { input: 75.00, output: 150.00 }, // Example, needs verification
  "gpt4o": { input: 2.50, output: 10.00 },