Update proxy for modern model APIs

2026-04-06 03:59:37 -07:00
parent 824adfbbb2
commit 8662eadea7
48 changed files with 1294 additions and 214 deletions
@@ -40,11 +40,11 @@ NODE_ENV=production

 # Which model types users are allowed to access.
 # The following model families are recognized:
-# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-dall-e
+# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | gpt5 | o-series | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-gpt5 | azure-o-series | azure-dall-e
 # By default, all models are allowed except for 'dall-e' / 'azure-dall-e'.
 # To allow DALL-E image generation, uncomment the line below and add 'dall-e' or
 # 'azure-dall-e' to the list of allowed model families.
-# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o
+# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,gpt5,o-series,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o,azure-gpt5,azure-o-series

 # Which services can be used to process prompts containing images via multimodal
 # models. The following services are recognized:
@@ -115,10 +115,14 @@ NODE_ENV=production
 # TOKEN_QUOTA_GPT4=0
 # TOKEN_QUOTA_GPT4_32K=0
 # TOKEN_QUOTA_GPT4_TURBO=0
+# TOKEN_QUOTA_GPT5=0
+# TOKEN_QUOTA_O_SERIES=0
 # TOKEN_QUOTA_CLAUDE=0
 # TOKEN_QUOTA_GEMINI_PRO=0
 # TOKEN_QUOTA_AWS_CLAUDE=0
 # TOKEN_QUOTA_GCP_CLAUDE=0
+# TOKEN_QUOTA_AZURE_GPT5=0
+# TOKEN_QUOTA_AZURE_O_SERIES=0
 # "Tokens" for image-generation models are counted at a rate of 100000 tokens
 # per US$1.00 generated, which is similar to the cost of GPT-4 Turbo.
 # DALL-E 3 costs around US$0.10 per image (10000 tokens).
@@ -45,11 +45,16 @@ You can also request Claude Instant, but support for this isn't fully implemente
 ### Supported model IDs
 Users can send these model IDs to the proxy to invoke the corresponding models.
 - **Claude**
-  - `anthropic.claude-v1` (~18k context, claude 1.3 -- EOL 2024-02-28)
  - `anthropic.claude-v2` (~100k context, claude 2.0)
  - `anthropic.claude-v2:1` (~200k context, claude 2.1)
- **Claude Instant**
-  - `anthropic.claude-instant-v1` (~100k context, claude instant 1.2)
+  - `anthropic.claude-haiku-4-5-20251001-v1:0`
+  - `anthropic.claude-sonnet-4-5-20250929-v1:0`
+  - `anthropic.claude-opus-4-1-20250805-v1:0`
+  - `anthropic.claude-3-5-haiku-20241022-v1:0`
+  - `anthropic.claude-sonnet-4-20250514-v1:0`
+  - `anthropic.claude-opus-4-20250514-v1:0`
+
+For OpenAI-compatible callers, the proxy will also remap newer Claude-style names such as `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, `claude-opus-4-1-20250805`, and `claude-3-5-haiku-20241022` to the corresponding Bedrock model IDs.

 ## Note regarding logging

@@ -20,7 +20,9 @@ AZURE_CREDENTIALS=contoso-ml:gpt4-8k:0123456789abcdef0123456789abcdef,northwind-
 Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model.

 ### Supported model IDs
-Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. For the most part they work the same with Azure. GPT-3.5 Turbo has an ID of "gpt-35-turbo" because Azure doesn't allow periods in model names, but the proxy should automatically convert this to the correct ID.
+Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. The proxy now understands newer Azure-backed OpenAI model families such as GPT-4o, GPT-4.1, GPT-5 / GPT-5.2, o-series reasoning models, and GPT Image deployments including `gpt-image-1.5`, plus the newer Responses API route at `/proxy/azure/openai/v1/responses`.
+
+GPT-3.5 Turbo still has an Azure-specific ID of `gpt-35-turbo` because Azure doesn't allow periods in model names, but the proxy will automatically normalize that for you.

 As noted above, you can only use model IDs for which a deployment has been created and added to the proxy.

@@ -29,7 +29,11 @@ GCP_CREDENTIALS=my-first-project:xxx@yyy.com:us-east5:-----BEGIN PRIVATE KEY----
 ## Supported model IDs
 Users can send these model IDs to the proxy to invoke the corresponding models.
 - **Claude**
-  - `claude-3-haiku@20240307`
-  - `claude-3-sonnet@20240229`
-  - `claude-3-opus@20240229`
-  - `claude-3-5-sonnet@20240620`
+  - `claude-haiku-4-5@20251001`
+  - `claude-sonnet-4-5@20250929`
+  - `claude-opus-4-1@20250805`
+  - `claude-3-5-haiku@20241022`
+  - `claude-sonnet-4@20250514`
+  - `claude-opus-4@20250514`
+
+For OpenAI-compatible callers, the proxy will also remap Claude-style aliases like `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, and `claude-opus-4-1-20250805` to the corresponding Vertex AI model IDs.
@@ -7,8 +7,8 @@ Authorization: Bearer {{oai-key-1}}
 Content-Type: application/json

 {
-  "model": "gpt-3.5-turbo",
-  "max_tokens": 30,
+  "model": "gpt-4.1-mini",
+  "max_completion_tokens": 30,
  "stream": false,
  "messages": [
    {
@@ -18,6 +18,19 @@ Content-Type: application/json
  ]
 }

+###
+# @name OpenAI -- Responses API
+POST https://api.openai.com/v1/responses
+Authorization: Bearer {{oai-key-1}}
+Content-Type: application/json
+
+{
+  "model": "gpt-5.2",
+  "reasoning": { "effort": "medium" },
+  "max_output_tokens": 80,
+  "input": "This is a test prompt."
+}
+
 ###
 # @name OpenAI -- Text Completions
 POST https://api.openai.com/v1/completions
@@ -38,7 +51,7 @@ Authorization: Bearer {{oai-key-1}}
 Content-Type: application/json

 {
-  "model": "text-embedding-ada-002",
+  "model": "text-embedding-3-small",
  "input": "This is a test embedding input."
 }

@@ -81,8 +94,8 @@ Authorization: Bearer {{proxy-key}}
 Content-Type: application/json

 {
-  "model": "gpt-4-1106-preview",
-  "max_tokens": 20,
+  "model": "gpt-4.1",
+  "max_completion_tokens": 20,
  "stream": true,
  "temperature": 1,
  "seed": 123,
@@ -94,6 +107,20 @@ Content-Type: application/json
  ]
 }

+###
+# @name Proxy / OpenAI -- Native Responses API
+POST {{proxy-host}}/proxy/openai/v1/responses
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gpt-5.2",
+  "reasoning": { "effort": "medium" },
+  "max_output_tokens": 64,
+  "stream": false,
+  "input": "Summarize the purpose of this reverse proxy in one sentence."
+}
+
 ###
 # @name Proxy / OpenAI -- Native Text Completions
 POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions
@@ -142,7 +169,7 @@ Authorization: Bearer {{proxy-key}}
 Content-Type: application/json

 {
-  "model": "text-embedding-ada-002",
+  "model": "text-embedding-3-small",
  "input": "This is a test embedding input."
 }

@@ -185,7 +212,7 @@ Authorization: Bearer {{proxy-key}}
 Content-Type: application/json

 {
-  "model": "gpt-3.5-turbo",
+  "model": "gpt-5.2",
  "max_tokens": 20,
  "stream": false,
  "temperature": 0,
@@ -197,6 +224,23 @@ Content-Type: application/json
  ]
 }

+###
+# @name Proxy / Google AI -- OpenAI-Compat Image Generation
+POST {{proxy-host}}/proxy/google-ai/v1/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gemini-2.5-flash-image",
+  "stream": false,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Generate a flat vector-style illustration of a red fox reading a newspaper at a cafe table."
+    }
+  ]
+}
+
 ###
 # @name Proxy / AWS Claude -- Native Completion
 POST {{proxy-host}}/proxy/aws/claude/v1/complete
@@ -434,6 +434,8 @@ export const config: Config = {
    "gpt4-32k",
    "gpt4-turbo",
    "gpt4o",
+    "gpt5",
+    "o-series",
    "claude",
    "claude-opus",
    "gemini-pro",
@@ -450,6 +452,8 @@ export const config: Config = {
    "azure-gpt4-32k",
    "azure-gpt4-turbo",
    "azure-gpt4o",
+    "azure-gpt5",
+    "azure-o-series",
  ]),
  rejectPhrases: parseCsv(getEnvWithDefault("REJECT_PHRASES", "")),
  rejectMessage: getEnvWithDefault(
@@ -17,6 +17,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "gpt4-32k": "GPT-4 32k",
  "gpt4-turbo": "GPT-4 Turbo",
  gpt4o: "GPT-4o",
+  gpt5: "GPT-5",
+  "o-series": "o-Series",
  "dall-e": "DALL-E",
  claude: "Claude (Sonnet)",
  "claude-opus": "Claude (Opus)",
@@ -34,6 +36,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "azure-gpt4-32k": "Azure GPT-4 32k",
  "azure-gpt4-turbo": "Azure GPT-4 Turbo",
  "azure-gpt4o": "Azure GPT-4o",
+  "azure-gpt5": "Azure GPT-5",
+  "azure-o-series": "Azure o-Series",
  "azure-dall-e": "Azure DALL-E",
 };

@@ -29,24 +29,18 @@ const getModelsResponse = () => {
  if (!config.anthropicKey) return { object: "list", data: [] };

  const claudeVariants = [
-    "claude-v1",
-    "claude-v1-100k",
-    "claude-instant-v1",
-    "claude-instant-v1-100k",
-    "claude-v1.3",
-    "claude-v1.3-100k",
-    "claude-v1.2",
-    "claude-v1.0",
-    "claude-instant-v1.1",
-    "claude-instant-v1.1-100k",
-    "claude-instant-v1.0",
-    "claude-2",
    "claude-2.0",
    "claude-2.1",
-    "claude-3-haiku-20240307",
-    "claude-3-opus-20240229",
-    "claude-3-sonnet-20240229",
-    "claude-3-5-sonnet-20240620"
+    "claude-sonnet-4-5",
+    "claude-sonnet-4-5-20250929",
+    "claude-haiku-4-5",
+    "claude-haiku-4-5-20251001",
+    "claude-opus-4-1",
+    "claude-opus-4-1-20250805",
+    "claude-opus-4-20250514",
+    "claude-sonnet-4-20250514",
+    "claude-3-5-haiku-20241022",
+    "claude-3-5-haiku-latest",
  ];

  const models = claudeVariants.map((id) => ({
@@ -230,7 +224,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware({
 * (claude-3 based models do not support the old text completion endpoint).
 */
 const preprocessAnthropicTextRequest: RequestHandler = (req, res, next) => {
-  if (req.body.model?.startsWith("claude-3")) {
+  if (requiresAnthropicMessagesApi(req.body.model)) {
    textToChatPreprocessor(req, res, next);
  } else {
    nativeTextPreprocessor(req, res, next);
@@ -255,7 +249,7 @@ const oaiToChatPreprocessor = createPreprocessorMiddleware({
 */
 const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
  maybeReassignModel(req);
-  if (req.body.model?.includes("claude-3")) {
+  if (requiresAnthropicMessagesApi(req.body.model)) {
    oaiToChatPreprocessor(req, res, next);
  } else {
    oaiToTextPreprocessor(req, res, next);
@@ -315,7 +309,8 @@ function handleAnthropicTextCompatRequest(
  const type = req.params.type;
  const action = req.params.action;
  const alreadyInChatFormat = Boolean(req.body.messages);
-  const compatModel = `claude-3-${type}-20240229`;
+  const compatModel =
+    type === "opus" ? "claude-opus-4-1-20250805" : "claude-sonnet-4-5-20250929";
  req.log.info(
    { type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
    "Handling Anthropic compatibility request"
@@ -349,8 +344,20 @@ function handleAnthropicTextCompatRequest(
 */
 function maybeReassignModel(req: Request) {
  const model = req.body.model;
-  if (!model.startsWith("gpt-")) return;
-  req.body.model = "claude-3-sonnet-20240229";
+  const lower = String(model).toLowerCase();
+  if (
+    lower.startsWith("gpt-") ||
+    lower.startsWith("o1") ||
+    lower.startsWith("o3") ||
+    lower.startsWith("o4") ||
+    lower === "computer-use-preview"
+  ) {
+    req.body.model = "claude-sonnet-4-5-20250929";
+  }
+}
+
+function requiresAnthropicMessagesApi(model?: string) {
+  return /^claude-(?:3|sonnet|opus)/.test(model ?? "");
 }

 export const anthropic = anthropicRouter;
@@ -20,6 +20,12 @@ import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatRe
 import { sendErrorToClient } from "./middleware/response/error-generator";

 const LATEST_AWS_V2_MINOR_VERSION = "1";
+const AWS_CLAUDE_SONNET_45 = "anthropic.claude-sonnet-4-5-20250929-v1:0";
+const AWS_CLAUDE_HAIKU_45 = "anthropic.claude-haiku-4-5-20251001-v1:0";
+const AWS_CLAUDE_OPUS_41 = "anthropic.claude-opus-4-1-20250805-v1:0";
+const AWS_CLAUDE_SONNET_4 = "anthropic.claude-sonnet-4-20250514-v1:0";
+const AWS_CLAUDE_OPUS_4 = "anthropic.claude-opus-4-20250514-v1:0";
+const AWS_CLAUDE_35_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0";

 let modelsCache: any = null;
 let modelsCacheTime = 0;
@@ -35,10 +41,12 @@ const getModelsResponse = () => {
  const variants = [
    "anthropic.claude-v2",
    "anthropic.claude-v2:1",
-    "anthropic.claude-3-haiku-20240307-v1:0",
-    "anthropic.claude-3-sonnet-20240229-v1:0",
-    "anthropic.claude-3-5-sonnet-20240620-v1:0",
-    "anthropic.claude-3-opus-20240229-v1:0",
+    AWS_CLAUDE_HAIKU_45,
+    AWS_CLAUDE_SONNET_45,
+    AWS_CLAUDE_OPUS_41,
+    AWS_CLAUDE_35_HAIKU,
+    AWS_CLAUDE_SONNET_4,
+    AWS_CLAUDE_OPUS_4,
  ];

  const models = variants.map((id) => ({
@@ -164,7 +172,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware(
 * (claude-3 based models do not support the old text completion endpoint).
 */
 const preprocessAwsTextRequest: RequestHandler = (req, res, next) => {
-  if (req.body.model?.includes("claude-3")) {
+  if (requiresAnthropicMessagesApi(req.body.model)) {
    textToChatPreprocessor(req, res, next);
  } else {
    nativeTextPreprocessor(req, res, next);
@@ -186,7 +194,7 @@ const oaiToAwsChatPreprocessor = createPreprocessorMiddleware(
 * or the new Claude chat completion endpoint, based on the requested model.
 */
 const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
-  if (req.body.model?.includes("claude-3")) {
+  if (requiresAnthropicMessagesApi(req.body.model)) {
    oaiToAwsChatPreprocessor(req, res, next);
  } else {
    oaiToAwsTextPreprocessor(req, res, next);
@@ -241,12 +249,65 @@ awsRouter.post(
 */
 function maybeReassignModel(req: Request) {
  const model = req.body.model;
+  const lower = String(model).toLowerCase();

  // If it looks like an AWS model, use it as-is
  if (model.includes("anthropic.claude")) {
    return;
  }

+  if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
+    req.body.model = AWS_CLAUDE_OPUS_41;
+    return;
+  }
+  if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
+    req.body.model = AWS_CLAUDE_OPUS_41;
+    return;
+  }
+  if (lower.includes("opus-4")) {
+    req.body.model = AWS_CLAUDE_OPUS_4;
+    return;
+  }
+  if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
+    req.body.model = AWS_CLAUDE_HAIKU_45;
+    return;
+  }
+  if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
+    req.body.model = AWS_CLAUDE_SONNET_45;
+    return;
+  }
+  if (lower.includes("sonnet-4")) {
+    req.body.model = AWS_CLAUDE_SONNET_4;
+    return;
+  }
+  if (lower.includes("3-5") && lower.includes("haiku")) {
+    req.body.model = AWS_CLAUDE_35_HAIKU;
+    return;
+  }
+  if (lower.includes("opus")) {
+    req.body.model = AWS_CLAUDE_OPUS_41;
+    return;
+  }
+  if (lower.includes("haiku")) {
+    req.body.model = AWS_CLAUDE_HAIKU_45;
+    return;
+  }
+  if (lower.includes("sonnet")) {
+    req.body.model = AWS_CLAUDE_SONNET_45;
+    return;
+  }
+
+  if (
+    lower.startsWith("gpt-") ||
+    lower.startsWith("o1") ||
+    lower.startsWith("o3") ||
+    lower.startsWith("o4") ||
+    lower === "computer-use-preview"
+  ) {
+    req.body.model = AWS_CLAUDE_SONNET_45;
+    return;
+  }
+
  // Anthropic model names can look like:
  // - claude-v1
  // - claude-2.1
@@ -282,20 +343,22 @@ function maybeReassignModel(req: Request) {
    case "3":
    case "3.0":
      if (name.includes("opus")) {
-        req.body.model = "anthropic.claude-3-opus-20240229-v1:0";
+        req.body.model = AWS_CLAUDE_OPUS_41;
      } else if (name.includes("haiku")) {
-        req.body.model = "anthropic.claude-3-haiku-20240307-v1:0";
+        req.body.model = AWS_CLAUDE_HAIKU_45;
      } else {
-        req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
+        req.body.model = AWS_CLAUDE_SONNET_45;
      }
      return;
    case "3.5":
-      req.body.model = "anthropic.claude-3-5-sonnet-20240620-v1:0";
+      req.body.model = name.includes("haiku")
+        ? AWS_CLAUDE_35_HAIKU
+        : AWS_CLAUDE_SONNET_45;
      return;
  }

-  // Fallback to Claude 2.1
-  req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
+  // Fallback to Claude Sonnet 4
+  req.body.model = AWS_CLAUDE_SONNET_45;
  return;
 }

@@ -306,7 +369,7 @@ export function handleCompatibilityRequest(
 ) {
  const action = req.params.action;
  const alreadyInChatFormat = Boolean(req.body.messages);
-  const compatModel = "anthropic.claude-3-5-sonnet-20240620-v1:0";
+  const compatModel = AWS_CLAUDE_SONNET_4;
  req.log.info(
    { inputModel: req.body.model, compatModel, alreadyInChatFormat },
    "Handling AWS compatibility request"
@@ -335,3 +398,7 @@ export function handleCompatibilityRequest(
 }

 export const aws = awsRouter;
+
+function requiresAnthropicMessagesApi(model?: string) {
+  return /claude-(?:3|sonnet|opus)/.test(model ?? "");
+}
@@ -32,16 +32,25 @@ function getModelsResponse() {
  }

  let available = new Set<AzureOpenAIModelFamily>();
+  const availableModelIds = new Set<string>();
  for (const key of keyPool.list()) {
    if (key.isDisabled || key.service !== "azure") continue;
+    const azureKey = key as any;
    key.modelFamilies.forEach((family) =>
      available.add(family as AzureOpenAIModelFamily)
    );
+    azureKey.modelIds?.forEach((id: string) => availableModelIds.add(id));
  }
  const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
  available = new Set([...available].filter((x) => allowed.has(x)));

-  const models = KNOWN_OPENAI_MODELS.map((id) => ({
+  const usingExactModelIds = availableModelIds.size > 0;
+
+  const sourceModels = usingExactModelIds
+    ? [...new Set([...KNOWN_OPENAI_MODELS, ...availableModelIds])]
+    : KNOWN_OPENAI_MODELS;
+
+  const models = sourceModels.map((id) => ({
      id,
      object: "model",
      created: new Date().getTime(),
@@ -58,7 +67,12 @@ function getModelsResponse() {
    ],
    root: id,
    parent: null,
-  })).filter((model) => available.has(getAzureOpenAIModelFamily(model.id)));
+  })).filter((model) => {
+    if (usingExactModelIds) {
+      return availableModelIds.has(model.id);
+    }
+    return available.has(getAzureOpenAIModelFamily(model.id));
+  });

  modelsCache = { object: "list", data: models };
  modelsCacheTime = new Date().getTime();
@@ -115,6 +129,16 @@ azureOpenAIRouter.post(
  }),
  azureOpenAIProxy
 );
+azureOpenAIRouter.post(
+  "/v1/responses",
+  ipLimiter,
+  createPreprocessorMiddleware({
+    inApi: "openai-responses",
+    outApi: "openai-responses",
+    service: "azure",
+  }),
+  azureOpenAIProxy
+);
 azureOpenAIRouter.post(
  "/v1/images/generations",
  ipLimiter,
@@ -19,7 +19,12 @@ import {
 import { transformAnthropicChatResponseToOpenAI } from "./anthropic";
 import { sendErrorToClient } from "./middleware/response/error-generator";

-const LATEST_GCP_SONNET_MINOR_VERSION = "20240229";
+const GCP_CLAUDE_SONNET_45 = "claude-sonnet-4-5@20250929";
+const GCP_CLAUDE_HAIKU_45 = "claude-haiku-4-5@20251001";
+const GCP_CLAUDE_OPUS_41 = "claude-opus-4-1@20250805";
+const GCP_CLAUDE_SONNET_4 = "claude-sonnet-4@20250514";
+const GCP_CLAUDE_OPUS_4 = "claude-opus-4@20250514";
+const GCP_CLAUDE_35_HAIKU = "claude-3-5-haiku@20241022";

 let modelsCache: any = null;
 let modelsCacheTime = 0;
@@ -33,10 +38,12 @@ const getModelsResponse = () => {

  // https://docs.anthropic.com/en/docs/about-claude/models
  const variants = [
-    "claude-3-haiku@20240307",
-    "claude-3-sonnet@20240229",
-    "claude-3-opus@20240229",
-    "claude-3-5-sonnet@20240620",
+    GCP_CLAUDE_HAIKU_45,
+    GCP_CLAUDE_SONNET_45,
+    GCP_CLAUDE_OPUS_41,
+    GCP_CLAUDE_35_HAIKU,
+    GCP_CLAUDE_SONNET_4,
+    GCP_CLAUDE_OPUS_4,
  ];

  const models = variants.map((id) => ({
@@ -147,6 +154,7 @@ gcpRouter.post(
 */
 function maybeReassignModel(req: Request) {
  const model = req.body.model;
+  const lower = String(model).toLowerCase();

  // If it looks like an GCP model, use it as-is
  // if (model.includes("anthropic.claude")) {
@@ -154,6 +162,58 @@ function maybeReassignModel(req: Request) {
    return;
  }

+  if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
+    req.body.model = GCP_CLAUDE_OPUS_41;
+    return;
+  }
+  if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
+    req.body.model = GCP_CLAUDE_OPUS_41;
+    return;
+  }
+  if (lower.includes("opus-4")) {
+    req.body.model = GCP_CLAUDE_OPUS_4;
+    return;
+  }
+  if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
+    req.body.model = GCP_CLAUDE_HAIKU_45;
+    return;
+  }
+  if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
+    req.body.model = GCP_CLAUDE_SONNET_45;
+    return;
+  }
+  if (lower.includes("sonnet-4")) {
+    req.body.model = GCP_CLAUDE_SONNET_4;
+    return;
+  }
+  if (lower.includes("3-5") && lower.includes("haiku")) {
+    req.body.model = GCP_CLAUDE_35_HAIKU;
+    return;
+  }
+  if (lower.includes("opus")) {
+    req.body.model = GCP_CLAUDE_OPUS_41;
+    return;
+  }
+  if (lower.includes("haiku")) {
+    req.body.model = GCP_CLAUDE_HAIKU_45;
+    return;
+  }
+  if (lower.includes("sonnet")) {
+    req.body.model = GCP_CLAUDE_SONNET_45;
+    return;
+  }
+
+  if (
+    lower.startsWith("gpt-") ||
+    lower.startsWith("o1") ||
+    lower.startsWith("o3") ||
+    lower.startsWith("o4") ||
+    lower === "computer-use-preview"
+  ) {
+    req.body.model = GCP_CLAUDE_SONNET_45;
+    return;
+  }
+
  // Anthropic model names can look like:
  // - claude-v1
  // - claude-2.1
@@ -165,7 +225,7 @@ function maybeReassignModel(req: Request) {
  // If there's no match, fallback to Claude3 Sonnet as it is most likely to be
  // available on GCP.
  if (!match) {
-    req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`;
+    req.body.model = GCP_CLAUDE_SONNET_4;
    return;
  }

@@ -176,20 +236,22 @@ function maybeReassignModel(req: Request) {
    case "3":
    case "3.0":
      if (name.includes("opus")) {
-        req.body.model = "claude-3-opus@20240229";
+        req.body.model = GCP_CLAUDE_OPUS_41;
      } else if (name.includes("haiku")) {
-        req.body.model = "claude-3-haiku@20240307";
+        req.body.model = GCP_CLAUDE_HAIKU_45;
      } else {
-        req.body.model = "claude-3-sonnet@20240229";
+        req.body.model = GCP_CLAUDE_SONNET_45;
      }
      return;
    case "3.5":
-      req.body.model = "claude-3-5-sonnet@20240620";
+      req.body.model = name.includes("haiku")
+        ? GCP_CLAUDE_35_HAIKU
+        : GCP_CLAUDE_SONNET_45;
      return;
  }

-  // Fallback to Claude3 Sonnet
-  req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`;
+  // Fallback to Claude Sonnet 4
+  req.body.model = GCP_CLAUDE_SONNET_45;
  return;
 }

@@ -16,6 +16,11 @@ import {
  ProxyResHandlerWithBody,
 } from "./middleware/response";
 import { addGoogleAIKey } from "./middleware/request/preprocessors/add-google-ai-key";
+import { BadRequestError } from "../shared/errors";
+import {
+  flattenGoogleAIContentParts,
+  isGoogleAIImageModel,
+} from "../shared/api-schemas";

 let modelsCache: any = null;
 let modelsCacheTime = 0;
@@ -31,10 +36,15 @@ const getModelsResponse = () => {
  if (!config.googleAIKey) return { object: "list", data: [] };

  const googleAIVariants = [
-    "gemini-pro",
-    "gemini-1.0-pro",
+    "gemini-2.5-pro",
+    "gemini-2.5-flash",
+    "gemini-2.5-flash-lite",
+    "gemini-2.5-flash-image",
+    "gemini-3-pro-image-preview",
+    "gemini-2.0-flash-preview-image-generation",
+    "gemini-2.0-flash",
    "gemini-1.5-pro",
-    "gemini-1.5-pro-latest",
+    "gemini-1.5-flash",
  ];

  const models = googleAIVariants.map((id) => ({
@@ -83,7 +93,8 @@ function transformGoogleAIResponse(
 ): Record<string, any> {
  const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
  const parts = resBody.candidates[0].content?.parts ?? [{ text: "" }];
-  const content = parts[0].text.replace(/^(.{0,50}?): /, () => "");
+  const content = flattenGoogleAIContentParts(parts)
+    .replace(/^(.{0,50}?): /, () => "");
  return {
    id: "goo-" + v4(),
    object: "chat.completion",
@@ -136,14 +147,19 @@ googleAIRouter.post(
  googleAIProxy
 );

-/** Replaces requests for non-Google AI models with gemini-pro-1.5-latest. */
+/** Replaces requests for non-Google AI models with Gemini 2.5 Flash. */
 function maybeReassignModel(req: Request) {
  const requested = req.body.model;
  if (requested.includes("gemini")) {
+    if (req.body.stream && isGoogleAIImageModel(requested)) {
+      throw new BadRequestError(
+        "Streaming Gemini image-generation models is not currently supported by this proxy. Retry without `stream: true`."
+      );
+    }
    return;
  }
-  req.log.info({ requested }, "Reassigning model to gemini-pro-1.5-latest");
-  req.body.model = "gemini-pro-1.5-latest";
+  req.log.info({ requested }, "Reassigning model to gemini-2.5-flash");
+  req.body.model = "gemini-2.5-flash";
 }

 export const googleAI = googleAIRouter;
@@ -5,10 +5,15 @@ import { ZodError } from "zod";
 import { generateErrorMessage } from "zod-error";
 import { HttpError } from "../../shared/errors";
 import { assertNever } from "../../shared/utils";
+import {
+  flattenGoogleAIContentParts,
+  flattenOpenAIResponsesOutput,
+} from "../../shared/api-schemas";
 import { QuotaExceededError } from "./request/preprocessors/apply-quota-limits";
 import { sendErrorToClient } from "./response/error-generator";

 const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
+const OPENAI_RESPONSES_ENDPOINT = "/v1/responses";
 const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
 const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
 const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
@@ -22,6 +27,7 @@ export function isTextGenerationRequest(req: Request) {
    req.method === "POST" &&
    [
      OPENAI_CHAT_COMPLETION_ENDPOINT,
+      OPENAI_RESPONSES_ENDPOINT,
      OPENAI_TEXT_COMPLETION_ENDPOINT,
      ANTHROPIC_COMPLETION_ENDPOINT,
      ANTHROPIC_MESSAGES_ENDPOINT,
@@ -224,6 +230,8 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
      // Can be null if the model wants to invoke tools rather than return a
      // completion.
      return body.choices[0].message.content || "";
+    case "openai-responses":
+      return flattenOpenAIResponsesOutput(body);
    case "openai-text":
      return body.choices[0].text;
    case "anthropic-chat":
@@ -252,7 +260,7 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
      if ("choices" in body) {
        return body.choices[0].message.content;
      }
-      return body.candidates[0].content.parts[0].text;
+      return flattenGoogleAIContentParts(body.candidates?.[0]?.content?.parts);
    case "openai-image":
      return body.data?.map((item: any) => item.url).join("\n");
    default:
@@ -267,6 +275,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
    case "openai-text":
    case "mistral-ai":
      return body.model;
+    case "openai-responses":
+      return body.model || req.body.model;
    case "openai-image":
      return req.body.model;
    case "anthropic-chat":
@@ -47,6 +47,7 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
        assignedKey = keyPool.get("dall-e-3", service);
        break;
      case "openai":
+      case "openai-responses":
      case "google-ai":
      case "mistral-ai":
        throw new Error(
@@ -109,9 +110,10 @@ export const addKeyForEmbeddingsRequest: HPMRequestCallback = (
    throw new Error("Embeddings requests must be from OpenAI");
  }

-  req.body = { input: req.body.input, model: "text-embedding-ada-002" };
+  const model = req.body.model || "text-embedding-3-small";
+  req.body = { input: req.body.input, model };

-  const key = keyPool.get("text-embedding-ada-002", "openai") as OpenAIKey;
+  const key = keyPool.get(model, "openai") as OpenAIKey;

  req.key = key;
  req.log.info(
@@ -6,7 +6,7 @@ import {
 import { RequestPreprocessor } from "../index";

 export const addAzureKey: RequestPreprocessor = (req) => {
-  const validAPIs: APIFormat[] = ["openai", "openai-image"];
+  const validAPIs: APIFormat[] = ["openai", "openai-responses", "openai-image"];
  const apisValid = [req.outboundApi, req.inboundApi].every((api) =>
    validAPIs.includes(api)
  );
@@ -50,6 +50,23 @@ export const addAzureKey: RequestPreprocessor = (req) => {
  const cred = req.key as AzureOpenAIKey;
  const { resourceName, deploymentId, apiKey } = getCredentialsFromKey(cred);

+  if (req.outboundApi === "openai-responses") {
+    req.body.model = deploymentId;
+    req.signedRequest = {
+      method: "POST",
+      protocol: "https:",
+      hostname: `${resourceName}.openai.azure.com`,
+      path: `/openai/v1/responses?api-version=preview`,
+      headers: {
+        ["host"]: `${resourceName}.openai.azure.com`,
+        ["content-type"]: "application/json",
+        ["api-key"]: apiKey,
+      },
+      body: JSON.stringify(req.body),
+    };
+    return;
+  }
+
  const operation =
    req.outboundApi === "openai" ? "/chat/completions" : "/images/generations";
  const apiVersion =
@@ -6,6 +6,7 @@ import {
  GoogleAIChatMessage,
  MistralAIChatMessage,
  OpenAIChatMessage,
+  flattenOpenAIResponsesInput,
 } from "../../../../shared/api-schemas";

 /**
@@ -18,11 +19,23 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {

  switch (service) {
    case "openai": {
-      req.outputTokens = req.body.max_tokens;
+      req.outputTokens =
+        req.body.max_completion_tokens ?? req.body.max_tokens ?? 0;
      const prompt: OpenAIChatMessage[] = req.body.messages;
      result = await countTokens({ req, prompt, service });
      break;
    }
+    case "openai-responses": {
+      req.outputTokens = req.body.max_output_tokens ?? 0;
+      const prompt = [
+        flattenOpenAIResponsesInput(req.body.instructions),
+        flattenOpenAIResponsesInput(req.body.input),
+      ]
+        .filter(Boolean)
+        .join("\n\n");
+      result = await countTokens({ req, prompt, service });
+      break;
+    }
    case "openai-text": {
      req.outputTokens = req.body.max_tokens;
      const prompt: string = req.body.prompt;
@@ -4,8 +4,10 @@ import { assertNever } from "../../../../shared/utils";
 import { RequestPreprocessor } from "../index";
 import { BadRequestError } from "../../../../shared/errors";
 import {
+  GoogleAIChatMessage,
  MistralAIChatMessage,
  OpenAIChatMessage,
+  flattenOpenAIResponsesInput,
  flattenAnthropicMessages,
 } from "../../../../shared/api-schemas";

@@ -72,11 +74,27 @@ function getPromptFromRequest(req: Request) {
          return `${msg.role}: ${text}`;
        })
        .join("\n\n");
+    case "openai-responses":
+      return [
+        flattenOpenAIResponsesInput(body.instructions),
+        flattenOpenAIResponsesInput(body.input),
+      ]
+        .filter(Boolean)
+        .join("\n\n");
    case "openai-text":
    case "openai-image":
      return body.prompt;
    case "google-ai":
-      return body.prompt.text;
+      return body.contents
+        .map(({ parts, role }: GoogleAIChatMessage) => {
+          const text = parts
+            .map((part: any) =>
+              "text" in part ? part.text : "[image omitted]"
+            )
+            .join("\n");
+          return `${role}: ${text}`;
+        })
+        .join("\n\n");
    default:
      assertNever(service);
  }
@@ -6,8 +6,8 @@ import { RequestPreprocessor } from "../index";

 const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
 const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
-const GOOGLE_AI_MAX_CONTEXT = 32000;
-const MISTRAL_AI_MAX_CONTENT = 32768;
+const GOOGLE_AI_MAX_CONTEXT = 1048576;
+const MISTRAL_AI_MAX_CONTENT = 256000;

 /**
 * Assigns `req.promptTokens` and `req.outputTokens` based on the request body
@@ -26,6 +26,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  let proxyMax: number;
  switch (req.outboundApi) {
    case "openai":
+    case "openai-responses":
    case "openai-text":
      proxyMax = OPENAI_MAX_CONTEXT;
      break;
@@ -54,6 +55,12 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  let modelMax: number;
  if (model.match(/gpt-3.5-turbo-16k/)) {
    modelMax = 16384;
+  } else if (model.match(/^gpt-5(\.|-|\b)/)) {
+    modelMax = 1050000;
+  } else if (model.match(/^o\d/)) {
+    modelMax = 200000;
+  } else if (model.match(/^gpt-4\.1/)) {
+    modelMax = 1047576;
  } else if (model.match(/^gpt-4o/)) {
    modelMax = 128000;
  } else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
@@ -80,12 +87,27 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 200000;
  } else if (model.match(/^claude-3/)) {
    modelMax = 200000;
+  } else if (model.match(/^claude-(opus|sonnet|haiku)-4/)) {
+    modelMax = 200000;
  } else if (model.match(/^gemini-\d{3}$/)) {
    modelMax = GOOGLE_AI_MAX_CONTEXT;
+  } else if (model.match(/^gemini-(2\.5|2\.0)/)) {
+    modelMax = 1048576;
  } else if (model.match(/^mistral-(tiny|small|medium)$/)) {
    modelMax = MISTRAL_AI_MAX_CONTENT;
+  } else if (
+    model.match(
+      /^(mistral|ministral|magistral|pixtral|codestral|devstral|voxtral)-/
+    )
+  ) {
+    modelMax = MISTRAL_AI_MAX_CONTENT;
  } else if (model.match(/^anthropic\.claude-3/)) {
    modelMax = 200000;
+  } else if (
+    model.match(/^anthropic\.claude-(opus|sonnet|haiku)-4/) ||
+    model.match(/^claude-(opus|sonnet|haiku)-4@/)
+  ) {
+    modelMax = 200000;
  } else if (model.match(/^anthropic\.claude-v2:\d/)) {
    modelMax = 200000;
  } else if (model.match(/^anthropic\.claude/)) {
@@ -121,8 +143,8 @@ function assertRequestHasTokenCounts(
  req: Request
 ): asserts req is Request & { promptTokens: number; outputTokens: number } {
  z.object({
-    promptTokens: z.number().int().min(1),
-    outputTokens: z.number().int().min(1),
+    promptTokens: z.number().int().min(0),
+    outputTokens: z.number().int().min(0),
  })
    .nonstrict()
    .parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
@@ -3,6 +3,7 @@ import { assertNever } from "../../../../shared/utils";
 import { RequestPreprocessor } from "../index";
 import { containsImageContent as containsImageContentOpenAI } from "../../../../shared/api-schemas/openai";
 import { containsImageContent as containsImageContentAnthropic } from "../../../../shared/api-schemas/anthropic";
+import { containsOpenAIResponsesImageInput } from "../../../../shared/api-schemas";
 import { ForbiddenError } from "../../../../shared/errors";

 /**
@@ -22,11 +23,20 @@ export const validateVision: RequestPreprocessor = async (req) => {
    case "openai":
      hasImage = containsImageContentOpenAI(req.body.messages);
      break;
+    case "openai-responses":
+      hasImage =
+        containsOpenAIResponsesImageInput(req.body.instructions) ||
+        containsOpenAIResponsesImageInput(req.body.input);
+      break;
    case "anthropic-chat":
      hasImage = containsImageContentAnthropic(req.body.messages);
      break;
-    case "anthropic-text":
    case "google-ai":
+      hasImage = req.body.contents?.some((message: { parts: any[] }) =>
+        message.parts?.some((part) => "inline_data" in part)
+      );
+      break;
+    case "anthropic-text":
    case "mistral-ai":
    case "openai-image":
    case "openai-text":
@@ -72,7 +72,15 @@ type ErrorGeneratorOptions = {
 };

 export function tryInferFormat(body: any): APIFormat | "unknown" {
-  if (typeof body !== "object" || !body.model) {
+  if (typeof body !== "object") {
+    return "unknown";
+  }
+
+  if (body.object === "response" || Array.isArray(body.output)) {
+    return "openai-responses";
+  }
+
+  if (!body.model) {
    return "unknown";
  }

@@ -158,7 +166,30 @@ export function buildSpoofedCompletion({

  switch (format) {
    case "openai":
+    case "openai-responses":
    case "mistral-ai":
+      if (format === "openai-responses") {
+        return {
+          id: "error-" + id,
+          object: "response",
+          created_at: Math.floor(Date.now() / 1000),
+          model,
+          status: "completed",
+          error: null,
+          incomplete_details: null,
+          output_text: content,
+          output: [
+            {
+              id: "msg-error-" + id,
+              type: "message",
+              role: "assistant",
+              status: "completed",
+              content: [{ type: "output_text", text: content, annotations: [] }],
+            },
+          ],
+          usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
+        };
+      }
      return {
        id: "error-" + id,
        object: "chat.completion",
@@ -248,7 +279,23 @@ export function buildSpoofedSSE({

  switch (format) {
    case "openai":
+    case "openai-responses":
    case "mistral-ai":
+      if (format === "openai-responses") {
+        return (
+          `data: ${JSON.stringify({
+            type: "response.completed",
+            response: buildSpoofedCompletion({
+              format,
+              title,
+              message,
+              obj,
+              reqId,
+              model,
+            }),
+          })}\n\n`
+        );
+      }
      event = {
        id: "chatcmpl-" + id,
        object: "chat.completion.chunk",
@@ -11,6 +11,7 @@ import { ProxyResHandlerWithBody } from ".";
 import { assertNever } from "../../../shared/utils";
 import {
  AnthropicChatMessage,
+  flattenOpenAIResponsesInput,
  flattenAnthropicMessages, GoogleAIChatMessage,
  MistralAIChatMessage,
  OpenAIChatMessage,
@@ -62,6 +63,7 @@ const getPromptForRequest = (
 ):
  | string
  | OpenAIChatMessage[]
+  | { instructions?: unknown; input?: unknown }
  | { contents: GoogleAIChatMessage[] }
  | { system: string; messages: AnthropicChatMessage[] }
  | MistralAIChatMessage[]
@@ -73,6 +75,11 @@ const getPromptForRequest = (
    case "openai":
    case "mistral-ai":
      return req.body.messages;
+    case "openai-responses":
+      return {
+        instructions: req.body.instructions,
+        input: req.body.input,
+      };
    case "anthropic-chat":
      return { system: req.body.system, messages: req.body.messages };
    case "openai-text":
@@ -99,6 +106,7 @@ const flattenMessages = (
    | string
    | OaiImageResult
    | OpenAIChatMessage[]
+    | { instructions?: unknown; input?: unknown }
    | { contents: GoogleAIChatMessage[] }
    | { system: string; messages: AnthropicChatMessage[] }
    | MistralAIChatMessage[]
@@ -114,12 +122,20 @@ const flattenMessages = (
    return val.contents
      .map(({ parts, role }) => {
        const text = parts
-          .map((p) => p.text)
+          .map((p: any) => ("text" in p ? p.text : "(( Attached Image ))"))
          .join("\n");
        return `${role}: ${text}`;
      })
      .join("\n");
  }
+  if (isOpenAIResponsesPrompt(val)) {
+    return [
+      flattenOpenAIResponsesInput(val.instructions),
+      flattenOpenAIResponsesInput(val.input),
+    ]
+      .filter(Boolean)
+      .join("\n\n");
+  }
  if (Array.isArray(val)) {
    return val
      .map(({ content, role }) => {
@@ -140,6 +156,16 @@ const flattenMessages = (
  return val.prompt.trim();
 };

+function isOpenAIResponsesPrompt(
+  val: unknown
+): val is { instructions?: unknown; input?: unknown } {
+  return (
+    typeof val === "object" &&
+    val !== null &&
+    ("instructions" in val || "input" in val)
+  );
+}
+
 function isGoogleAIChatPrompt(
  val: unknown
 ): val is { contents: GoogleAIChatMessage[] } {
@@ -8,6 +8,7 @@ import {
  mergeEventsForOpenAIText,
  AnthropicV2StreamEvent,
  OpenAIChatCompletionStreamEvent,
+  OpenAIResponsesStreamEvent,
 } from "./index";

 /**
@@ -17,13 +18,36 @@ import {
 export class EventAggregator {
  private readonly format: APIFormat;
  private readonly events: OpenAIChatCompletionStreamEvent[];
+  private responseBody: Record<string, any> | null;
+  private responseEventCount: number;
+  private responseOutputText: string;

  constructor({ format }: { format: APIFormat }) {
    this.events = [];
    this.format = format;
+    this.responseBody = null;
+    this.responseEventCount = 0;
+    this.responseOutputText = "";
+  }
+
+  addEvent(
+    event:
+      | OpenAIChatCompletionStreamEvent
+      | AnthropicV2StreamEvent
+      | OpenAIResponsesStreamEvent
+  ) {
+    if (eventIsOpenAIResponsesEvent(event)) {
+      this.responseEventCount++;
+      if (event.response && typeof event.response === "object") {
+        this.responseBody = event.response;
+      }
+
+      if (event.type === "response.output_text.delta") {
+        this.responseOutputText += event.delta || event.text || "";
+      }
+      return;
    }

-  addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) {
    if (eventIsOpenAIEvent(event)) {
      this.events.push(event);
    } else {
@@ -52,8 +76,15 @@ export class EventAggregator {
  getFinalResponse() {
    switch (this.format) {
      case "openai":
+      case "openai-responses":
      case "google-ai":
      case "mistral-ai":
+        if (this.format === "openai-responses") {
+          if (this.responseBody) {
+            return this.responseBody;
+          }
+          return { output_text: this.responseOutputText };
+        }
        return mergeEventsForOpenAIChat(this.events);
      case "openai-text":
        return mergeEventsForOpenAIText(this.events);
@@ -69,7 +100,7 @@ export class EventAggregator {
  }

  hasEvents() {
-    return this.events.length > 0;
+    return this.events.length > 0 || this.responseEventCount > 0;
  }
 }

@@ -78,3 +109,9 @@ function eventIsOpenAIEvent(
 ): event is OpenAIChatCompletionStreamEvent {
  return event?.object === "chat.completion.chunk";
 }
+
+function eventIsOpenAIResponsesEvent(
+  event: any
+): event is OpenAIResponsesStreamEvent {
+  return typeof event?.type === "string" && event.type.startsWith("response.");
+}
@@ -26,6 +26,14 @@ export type OpenAIChatCompletionStreamEvent = {
  }[];
 };

+export type OpenAIResponsesStreamEvent = {
+  type: string;
+  response?: Record<string, any>;
+  delta?: string;
+  text?: string;
+  [key: string]: any;
+};
+
 export type StreamingCompletionTransformer<
  T = OpenAIChatCompletionStreamEvent,
  S = any,
@@ -42,6 +50,7 @@ export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-ant
 export { anthropicChatToOpenAI } from "./transformers/anthropic-chat-to-openai";
 export { googleAIToOpenAI } from "./transformers/google-ai-to-openai";
 export { passthroughToOpenAI } from "./transformers/passthrough-to-openai";
+export { passthroughToOpenAIResponses } from "./transformers/passthrough-to-openai-responses";
 export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
 export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
 export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text";
@@ -10,8 +10,10 @@ import {
  anthropicV2ToOpenAI,
  googleAIToOpenAI,
  OpenAIChatCompletionStreamEvent,
+  OpenAIResponsesStreamEvent,
  openAITextToOpenAIChat,
  passthroughToOpenAI,
+  passthroughToOpenAIResponses,
  StreamingCompletionTransformer,
 } from "./index";

@@ -35,7 +37,9 @@ export class SSEMessageTransformer extends Transform {
  private readonly inputFormat: APIFormat;
  private readonly transformFn: StreamingCompletionTransformer<
    // TODO: Refactor transformers to not assume only OpenAI events as output
-    OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
+    | OpenAIChatCompletionStreamEvent
+    | AnthropicV2StreamEvent
+    | OpenAIResponsesStreamEvent
  >;
  private readonly log;
  private readonly fallbackId: string;
@@ -126,12 +130,14 @@ function getTransformer(
  // used for that case.
  requestApi: APIFormat = "openai"
 ): StreamingCompletionTransformer<
-  OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
+  OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent | OpenAIResponsesStreamEvent
 > {
  switch (responseApi) {
    case "openai":
    case "mistral-ai":
      return passthroughToOpenAI;
+    case "openai-responses":
+      return passthroughToOpenAIResponses;
    case "openai-text":
      return openAITextToOpenAIChat;
    case "anthropic-text":
@@ -0,0 +1,43 @@
+import {
+  OpenAIResponsesStreamEvent,
+  SSEResponseTransformArgs,
+  StreamingCompletionTransformer,
+} from "../index";
+import { parseEvent, ServerSentEvent } from "../parse-sse";
+import { logger } from "../../../../../logger";
+
+const log = logger.child({
+  module: "sse-transformer",
+  transformer: "openai-responses-to-openai-responses",
+});
+
+export const passthroughToOpenAIResponses: StreamingCompletionTransformer<
+  OpenAIResponsesStreamEvent
+> = (
+  params: SSEResponseTransformArgs
+) => {
+  const { data } = params;
+
+  const rawEvent = parseEvent(data);
+  if (!rawEvent.data || rawEvent.data === "[DONE]") {
+    return { position: -1 };
+  }
+
+  const responseEvent = asResponseEvent(rawEvent);
+  if (!responseEvent) {
+    return { position: -1 };
+  }
+
+  return { position: -1, event: responseEvent };
+};
+
+function asResponseEvent(
+  event: ServerSentEvent
+): OpenAIResponsesStreamEvent | null {
+  try {
+    return JSON.parse(event.data) as OpenAIResponsesStreamEvent;
+  } catch (error) {
+    log.warn({ error: error.stack, event }, "Received invalid event");
+  }
+  return null;
+}
@@ -24,25 +24,29 @@ import {

 // https://docs.mistral.ai/platform/endpoints
 export const KNOWN_MISTRAL_AI_MODELS = [
-  // Mistral 7b (open weight, legacy)
-  "open-mistral-7b",
-  "mistral-tiny-2312",
-  // Mixtral 8x7b (open weight, legacy)
-  "open-mixtral-8x7b",
-  "mistral-small-2312",
-  // Mixtral Small (newer 8x7b, closed weight)
  "mistral-small-latest",
-  "mistral-small-2402",
-  // Mistral Medium
+  "mistral-small-2603",
+  "mistral-small-2506",
  "mistral-medium-latest",
-  "mistral-medium-2312",
-  // Mistral Large
+  "mistral-medium-2508",
+  "mistral-medium-2505",
+  "magistral-medium-latest",
+  "magistral-medium-2507",
+  "magistral-small-2507",
  "mistral-large-latest",
-  "mistral-large-2402",
-  // Deprecated identifiers (2024-05-01)
-  "mistral-tiny",
-  "mistral-small",
-  "mistral-medium",
+  "mistral-large-2512",
+  "ministral-14b-2512",
+  "ministral-8b-latest",
+  "ministral-8b-2512",
+  "ministral-3b-latest",
+  "ministral-3b-2512",
+  "pixtral-large-latest",
+  "pixtral-large-2411",
+  "codestral-latest",
+  "codestral-2508",
+  "devstral-small-latest",
+  "devstral-small-2507",
+  "devstral-medium-2507",
 ];

 let modelsCache: any = null;
@@ -18,7 +18,13 @@ import {
 import { generateModelList } from "./openai";
 import { OpenAIImageGenerationResult } from "../shared/file-storage/mirror-generated-image";

-const KNOWN_MODELS = ["dall-e-2", "dall-e-3"];
+const KNOWN_MODELS = [
+  "dall-e-2",
+  "dall-e-3",
+  "gpt-image-1.5",
+  "gpt-image-1",
+  "gpt-image-1-mini",
+];

 let modelListCache: any = null;
 let modelListValid = 0;
@@ -28,28 +28,57 @@ import {

 // https://platform.openai.com/docs/models/overview
 export const KNOWN_OPENAI_MODELS = [
+  "gpt-5.2",
+  "gpt-5.2-chat",
+  "gpt-5.2-chat-latest",
+  "gpt-5.2-pro",
+  "gpt-5.2-codex",
+  "gpt-5.1",
+  "gpt-5.1-chat",
+  "gpt-5.1-codex",
+  "gpt-5.1-codex-mini",
+  "gpt-5.1-codex-max",
+  "gpt-5",
+  "gpt-5-chat",
+  "gpt-5-pro",
+  "gpt-5-codex",
+  "gpt-5-mini",
+  "gpt-5-nano",
+  "gpt-4.1",
+  "gpt-4.1-2025-04-14",
+  "gpt-4.1-mini",
+  "gpt-4.1-nano",
+  "o3-pro",
+  "o3-deep-research",
+  "computer-use-preview",
+  "o4-mini",
+  "o4-mini-deep-research",
+  "o3",
+  "o3-mini",
+  "o1",
+  "o1-pro",
  "gpt-4o",
+  "gpt-4o-2024-08-06",
+  "gpt-4o-mini",
  "gpt-4o-2024-05-13",
  "gpt-4-turbo", // alias for latest gpt4-turbo stable
  "gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
-  "gpt-4-turbo-preview", // alias for latest turbo preview
-  "gpt-4-0125-preview", // gpt4-turbo preview 2
-  "gpt-4-1106-preview", // gpt4-turbo preview 1
-  "gpt-4-vision-preview", // gpt4-turbo preview 1 with vision
  "gpt-4",
  "gpt-4-0613",
-  "gpt-4-0314", // EOL 2024-06-13
  "gpt-4-32k",
-  "gpt-4-32k-0314", // EOL 2024-06-13
  "gpt-4-32k-0613",
  "gpt-3.5-turbo",
-  "gpt-3.5-turbo-0301", // EOL 2024-06-13
  "gpt-3.5-turbo-0613",
-  "gpt-3.5-turbo-16k",
-  "gpt-3.5-turbo-16k-0613",
  "gpt-3.5-turbo-instruct",
  "gpt-3.5-turbo-instruct-0914",
+  "text-embedding-3-small",
+  "text-embedding-3-large",
  "text-embedding-ada-002",
+  "gpt-image-1.5",
+  "gpt-image-1",
+  "gpt-image-1-mini",
+  "dall-e-3",
+  "dall-e-2",
 ];

 let modelsCache: any = null;
@@ -59,11 +88,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
  // Get available families and snapshots
  let availableFamilies = new Set<OpenAIModelFamily>();
  const availableSnapshots = new Set<string>();
+  const availableModelIds = new Set<string>();
  for (const key of keyPool.list()) {
    if (key.isDisabled || key.service !== "openai") continue;
    const asOpenAIKey = key as OpenAIKey;
    asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f));
    asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s));
+    asOpenAIKey.modelIds.forEach((id) => availableModelIds.add(id));
  }

  // Remove disabled families
@@ -71,8 +102,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
  availableFamilies = new Set(
    [...availableFamilies].filter((x) => allowed.has(x))
  );
+  const usingExactModelIds = availableModelIds.size > 0;

-  return models
+  const sourceModels = usingExactModelIds
+    ? [...new Set([...models, ...availableModelIds])]
+    : models;
+
+  return sourceModels
    .map((id) => ({
      id,
      object: "model",
@@ -92,6 +128,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
      parent: null,
    }))
    .filter((model) => {
+      if (usingExactModelIds) {
+        return (
+          allowed.has(getOpenAIModelFamily(model.id)) &&
+          availableModelIds.has(model.id)
+        );
+      }
+
      // First check if the family is available
      const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id));
      if (!hasFamily) return false;
@@ -233,6 +276,16 @@ openaiRouter.post(
  }),
  openaiProxy
 );
+openaiRouter.post(
+  "/v1/responses",
+  ipLimiter,
+  createPreprocessorMiddleware({
+    inApi: "openai-responses",
+    outApi: "openai-responses",
+    service: "openai",
+  }),
+  openaiProxy
+);
 // Embeddings endpoint.
 openaiRouter.post(
  "/v1/embeddings",
@@ -31,18 +31,24 @@ export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge(
      .int()
      .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
  })
-);
+).passthrough();

 const AnthropicV1MessageMultimodalContentSchema = z.array(
  z.union([
    z.object({ type: z.literal("text"), text: z.string() }),
    z.object({
      type: z.literal("image"),
-      source: z.object({
+      source: z.union([
+        z.object({
          type: z.literal("base64"),
          media_type: z.string().max(100),
          data: z.string(),
        }),
+        z.object({
+          type: z.literal("url"),
+          url: z.string().url(),
+        }),
+      ]),
    }),
  ])
 );
@@ -65,7 +71,7 @@ export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge(
      .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
    system: z.string().optional(),
  })
-);
+).passthrough();
 export type AnthropicChatMessage = z.infer<
  typeof AnthropicV1MessagesSchema
 >["messages"][0];
@@ -77,7 +83,7 @@ function openAIMessagesToClaudeTextPrompt(messages: OpenAIChatMessage[]) {
        let role: string = m.role;
        if (role === "assistant") {
          role = "Assistant";
-        } else if (role === "system") {
+        } else if (role === "system" || role === "developer") {
          role = "System";
        } else if (role === "user") {
          role = "Human";
@@ -115,12 +121,13 @@ export const transformOpenAIToAnthropicChat: APIFormatTransformer<
    system,
    messages: newMessages,
    model: rest.model,
-    max_tokens: rest.max_tokens,
+    max_tokens: rest.max_completion_tokens ?? rest.max_tokens,
    stream: rest.stream,
    temperature: rest.temperature,
    top_p: rest.top_p,
    stop_sequences:
      typeof rest.stop === "string" ? [rest.stop] : rest.stop || undefined,
+    ...(rest.thinking ? { thinking: rest.thinking } : {}),
    ...(rest.user ? { metadata: { user_id: rest.user } } : {}),
    // Anthropic supports top_k, but OpenAI does not
    // OpenAI supports frequency_penalty, presence_penalty, logit_bias, n, seed,
@@ -162,7 +169,7 @@ export const transformOpenAIToAnthropicText: APIFormatTransformer<
  return {
    model: rest.model,
    prompt: prompt,
-    max_tokens_to_sample: rest.max_tokens,
+    max_tokens_to_sample: rest.max_completion_tokens ?? rest.max_tokens,
    stop_sequences: stops,
    stream: rest.stream,
    temperature: rest.temperature,
@@ -366,7 +373,7 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
    // Here we will lose the original name if it was a system message, but that
    // is generally okay because the system message is usually a prompt and not
    // a character in the chat.
-    const name = msg.role === "system" ? "System" : msg.name?.trim();
+    const name = isSystemOpenAIRole(msg.role) ? "System" : msg.name?.trim();
    const content = convertOpenAIContent(msg.content);

    // Prepend the display name to the first text content in the current message
@@ -396,8 +403,8 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {

 function isSystemOpenAIRole(
  role: OpenAIChatMessage["role"]
-): role is "system" | "function" | "tool" {
-  return ["system", "function", "tool"].includes(role);
+): role is "system" | "developer" | "function" | "tool" {
+  return ["system", "developer", "function", "tool"].includes(role);
 }

 function getFirstTextContent(content: OpenAIChatMessage["content"]) {
@@ -1,10 +1,21 @@
 import { z } from "zod";
 import {
  flattenOpenAIMessageContent,
+  OpenAIChatMessage,
  OpenAIV1ChatCompletionSchema,
 } from "./openai";
 import { APIFormatTransformer } from "./index";

+const GoogleAIContentPartSchema = z.union([
+  z.object({ text: z.string() }),
+  z.object({
+    inline_data: z.object({
+      mime_type: z.string().max(100),
+      data: z.string(),
+    }),
+  }),
+]);
+
 // https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent
 export const GoogleAIV1GenerateContentSchema = z
  .object({
@@ -12,31 +23,40 @@ export const GoogleAIV1GenerateContentSchema = z
    stream: z.boolean().optional().default(false), // also used for router
    contents: z.array(
      z.object({
-        parts: z.array(z.object({ text: z.string() })),
+        parts: z.array(GoogleAIContentPartSchema),
        role: z.enum(["user", "model"]),
      })
    ),
-    tools: z.array(z.object({})).max(0).optional(),
-    safetySettings: z.array(z.object({})).max(0).optional(),
+    tools: z.array(z.any()).optional(),
+    toolConfig: z.any().optional(),
+    safetySettings: z.array(z.any()).optional(),
+    systemInstruction: z.any().optional(),
    generationConfig: z.object({
      temperature: z.number().optional(),
      maxOutputTokens: z.coerce
        .number()
        .int()
        .optional()
-        .default(16)
-        .transform((v) => Math.min(v, 1024)), // TODO: Add config
+        .default(1024)
+        .transform((v) => Math.min(v, 65536)),
      candidateCount: z.literal(1).optional(),
      topP: z.number().optional(),
      topK: z.number().optional(),
+      responseMimeType: z.string().optional(),
+      responseSchema: z.any().optional(),
+      responseJsonSchema: z.any().optional(),
+      responseModalities: z.array(z.string()).optional(),
+      thinkingConfig: z.any().optional(),
      stopSequences: z.array(z.string().max(500)).max(5).optional(),
    }),
  })
-  .strip();
+  .passthrough();
 export type GoogleAIChatMessage = z.infer<
  typeof GoogleAIV1GenerateContentSchema
 >["contents"][0];

+type GoogleAIPart = GoogleAIChatMessage["parts"][number];
+
 export const transformOpenAIToGoogleAI: APIFormatTransformer<
  typeof GoogleAIV1GenerateContentSchema
 > = async (req) => {
@@ -54,40 +74,51 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
  }

  const { messages, ...rest } = result.data;
+  const systemMessages = messages.filter(
+    (m) => m.role === "system" || m.role === "developer"
+  );
  const foundNames = new Set<string>();
+  const model = req.body.model;
+  const customThinkingConfig =
+    getObjectField(body, "thinkingConfig") ??
+    getObjectField(getObjectField(body, "generationConfig"), "thinkingConfig");
+  const customResponseModalities = getStringArrayField(
+    getObjectField(body, "generationConfig"),
+    "responseModalities"
+  );
  const contents = messages
+    .filter((m) => m.role !== "system" && m.role !== "developer")
    .map((m) => {
      const role = m.role === "assistant" ? "model" : "user";
-      // Detects character names so we can set stop sequences for them as Gemini
-      // is prone to continuing as the next character.
-      // If names are not available, we'll still try to prefix the message
-      // with generic names so we can set stops for them but they don't work
-      // as well as real names.
-      const text = flattenOpenAIMessageContent(m.content);
+      const parts = convertOpenAIContent(m.content);
+      const text = parts
+        .map((part) => ("text" in part ? part.text : ""))
+        .join("\n");
      const propName = m.name?.trim();
-      const textName =
-        m.role === "system" ? "" : text.match(/^(.{0,50}?): /)?.[1]?.trim();
-      const name =
-        propName || textName || (role === "model" ? "Character" : "User");
+      const textName = text.match(/^(.{0,50}?): /)?.[1]?.trim();
+      const name = propName || textName || (role === "model" ? "Character" : "User");

      foundNames.add(name);

-      // Prefixing messages with their character name seems to help avoid
-      // Gemini trying to continue as the next character, or at the very least
-      // ensures it will hit the stop sequence.  Otherwise it will start a new
-      // paragraph and switch perspectives.
-      // The response will be very likely to include this prefix so frontends
-      // will need to strip it out.
+      // Prefixing speaker names helps Gemini avoid continuing as the next
+      // character in multi-party roleplay/chat prompts.
      const textPrefix = textName ? "" : `${name}: `;
+      const firstTextPart = parts.find(
+        (part): part is Extract<GoogleAIPart, { text: string }> => "text" in part
+      );
+      if (firstTextPart) {
+        firstTextPart.text = textPrefix + firstTextPart.text;
+      }
+
      return {
-        parts: [{ text: textPrefix + text }],
+        parts,
        role: m.role === "assistant" ? ("model" as const) : ("user" as const),
      };
    })
    .reduce<GoogleAIChatMessage[]>((acc, msg) => {
      const last = acc[acc.length - 1];
      if (last?.role === msg.role) {
-        last.parts[0].text += "\n\n" + msg.parts[0].text;
+        last.parts.push(...msg.parts);
      } else {
        acc.push(msg);
      }
@@ -102,17 +133,44 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
  stops.push(...Array.from(foundNames).map((name) => `\n${name}:`));
  stops = [...new Set(stops)].slice(0, 5);

+  const responseFormat = rest.response_format as Record<string, any> | undefined;
+  const maxOutputTokens =
+    rest.max_completion_tokens ?? rest.max_tokens ?? 1024;
+
  return {
-    model: req.body.model,
+    model,
    stream: rest.stream,
    contents,
-    tools: [],
+    tools: Array.isArray(rest.tools) ? rest.tools : undefined,
+    systemInstruction: systemMessages.length
+      ? {
+          parts: [
+            {
+              text: systemMessages
+                .map((msg) => flattenOpenAIMessageContent(msg.content))
+                .join("\n\n"),
+            },
+          ],
+        }
+      : undefined,
    generationConfig: {
-      maxOutputTokens: rest.max_tokens,
+      maxOutputTokens,
      stopSequences: stops,
      topP: rest.top_p,
-      topK: 40, // openai schema doesn't have this, google ai defaults to 40
+      topK: 40, // OpenAI schema doesn't expose this; Gemini defaults to 40.
      temperature: rest.temperature,
+      responseMimeType:
+        responseFormat?.type === "json_object" ||
+        responseFormat?.type === "json_schema"
+          ? "application/json"
+          : undefined,
+      responseSchema: responseFormat?.json_schema?.schema,
+      responseJsonSchema: responseFormat?.json_schema?.schema,
+      responseModalities:
+        customResponseModalities ??
+        (isGoogleAIImageModel(model) ? ["TEXT", "IMAGE"] : undefined),
+      thinkingConfig:
+        customThinkingConfig ?? getThinkingConfig(model, rest.reasoning_effort),
    },
    safetySettings: [
      { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
@@ -122,3 +180,117 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
    ],
  };
 };
+
+function convertOpenAIContent(
+  content: OpenAIChatMessage["content"]
+): GoogleAIPart[] {
+  if (typeof content === "string") {
+    return [{ text: content }];
+  }
+
+  return content.map((item) => {
+    if ("text" in item) {
+      return { text: item.text };
+    }
+    if ("refusal" in item) {
+      return { text: item.refusal };
+    }
+
+    const url = item.image_url.url;
+    if (!url.startsWith("data:")) {
+      return { text: "[ Unsupported image URL ]" };
+    }
+
+    const [meta, data = ""] = url.split(",", 2);
+    const mimeType = meta.split(";")[0].replace("data:", "");
+    return { inline_data: { mime_type: mimeType, data } };
+  });
+}
+
+function getThinkingConfig(model: string, reasoningEffort?: string) {
+  if (model.startsWith("gemini-2.5")) {
+    switch (reasoningEffort) {
+      case "none":
+      case "minimal":
+      case "low":
+        return { thinkingBudget: 0 };
+      default:
+        return undefined;
+    }
+  }
+
+  switch (reasoningEffort) {
+    case "low":
+    case "minimal":
+    case "none":
+      return { thinkingLevel: "LOW" };
+    case "medium":
+    case "high":
+    case "xhigh":
+      return { thinkingLevel: "HIGH" };
+    default:
+      return undefined;
+  }
+}
+
+export function isGoogleAIImageModel(model: string) {
+  return [
+    "gemini-2.0-flash-preview-image-generation",
+    "gemini-2.5-flash-image",
+    "gemini-3-pro-image-preview",
+  ].includes(model);
+}
+
+export function flattenGoogleAIContentParts(
+  parts: Array<Record<string, any>> | undefined
+) {
+  return (parts ?? [])
+    .map((part) => {
+      if (typeof part?.text === "string") {
+        return part.text;
+      }
+
+      const inlineData = part?.inline_data ?? part?.inlineData;
+      if (inlineData?.data) {
+        const mimeType = inlineData.mime_type ?? inlineData.mimeType ?? "image/png";
+        return `![generated image](data:${mimeType};base64,${inlineData.data})`;
+      }
+
+      return "";
+    })
+    .filter(Boolean)
+    .join("\n\n");
+}
+
+function getObjectField(
+  value: unknown,
+  key: string
+): Record<string, any> | undefined {
+  if (
+    value &&
+    typeof value === "object" &&
+    !Array.isArray(value) &&
+    key in value &&
+    value[key as keyof typeof value] &&
+    typeof value[key as keyof typeof value] === "object" &&
+    !Array.isArray(value[key as keyof typeof value])
+  ) {
+    return value[key as keyof typeof value] as Record<string, any>;
+  }
+  return undefined;
+}
+
+function getStringArrayField(value: unknown, key: string) {
+  if (
+    value &&
+    typeof value === "object" &&
+    !Array.isArray(value) &&
+    key in value &&
+    Array.isArray(value[key as keyof typeof value])
+  ) {
+    return (value[key as keyof typeof value] as unknown[]).filter(
+      (item): item is string => typeof item === "string"
+    );
+  }
+  return undefined;
+}
@@ -17,6 +17,7 @@ import {
  OpenAIV1ImagesGenerationSchema,
  transformOpenAIToOpenAIImage,
 } from "./openai-image";
+import { OpenAIResponsesCreateSchema } from "./openai-responses";
 import {
  GoogleAIV1GenerateContentSchema,
  transformOpenAIToGoogleAI,
@@ -24,13 +25,22 @@ import {
 import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";

 export { OpenAIChatMessage } from "./openai";
+export {
+  containsOpenAIResponsesImageInput,
+  flattenOpenAIResponsesInput,
+  flattenOpenAIResponsesOutput,
+} from "./openai-responses";
 export {
  AnthropicChatMessage,
  AnthropicV1TextSchema,
  AnthropicV1MessagesSchema,
  flattenAnthropicMessages,
 } from "./anthropic";
-export { GoogleAIChatMessage } from "./google-ai";
+export {
+  GoogleAIChatMessage,
+  flattenGoogleAIContentParts,
+  isGoogleAIImageModel,
+} from "./google-ai";
 export { MistralAIChatMessage } from "./mistral-ai";

 type APIPair = `${APIFormat}->${APIFormat}`;
@@ -55,6 +65,7 @@ export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
  "anthropic-chat": AnthropicV1MessagesSchema,
  "anthropic-text": AnthropicV1TextSchema,
  openai: OpenAIV1ChatCompletionSchema,
+  "openai-responses": OpenAIResponsesCreateSchema,
  "openai-text": OpenAIV1TextCompletionSchema,
  "openai-image": OpenAIV1ImagesGenerationSchema,
  "google-ai": GoogleAIV1GenerateContentSchema,
@@ -20,7 +20,7 @@ export const MistralAIV1ChatCompletionsSchema = z.object({
  stream: z.boolean().optional().default(false),
  safe_prompt: z.boolean().optional().default(false),
  random_seed: z.number().int().optional(),
-});
+}).passthrough();
 export type MistralAIChatMessage = z.infer<
  typeof MistralAIV1ChatCompletionsSchema
 >["messages"][0];
@@ -5,19 +5,34 @@ import { APIFormatTransformer } from "./index";
 // https://platform.openai.com/docs/api-reference/images/create
 export const OpenAIV1ImagesGenerationSchema = z
  .object({
-    prompt: z.string().max(4000),
+    prompt: z.string().max(32000),
    model: z.string().max(100).optional(),
-    quality: z.enum(["standard", "hd"]).optional().default("standard"),
-    n: z.number().int().min(1).max(4).optional().default(1),
+    quality: z
+      .enum(["auto", "low", "medium", "high", "standard", "hd"])
+      .optional(),
+    n: z.number().int().min(1).max(10).optional().default(1),
    response_format: z.enum(["url", "b64_json"]).optional(),
+    output_format: z.string().optional(),
+    output_compression: z.number().int().min(0).max(100).optional(),
    size: z
-      .enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"])
+      .enum([
+        "auto",
+        "256x256",
+        "512x512",
+        "1024x1024",
+        "1024x1536",
+        "1536x1024",
+        "1792x1024",
+        "1024x1792",
+      ])
      .optional()
      .default("1024x1024"),
    style: z.enum(["vivid", "natural"]).optional().default("vivid"),
+    background: z.string().optional(),
+    moderation: z.string().optional(),
    user: z.string().max(500).optional(),
  })
-  .strip();
+  .passthrough();

 // Takes the last chat message and uses it verbatim as the image prompt.
 export const transformOpenAIToOpenAIImage: APIFormatTransformer<
@@ -57,12 +72,21 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
  }

  // TODO: Add some way to specify parameters via chat message
-  const transformed = {
-    model: body.model.includes("dall-e") ? body.model : "dall-e-3",
-    quality: "standard",
+  const requestedModel = String(body.model ?? "");
+  const model =
+    requestedModel.includes("dall-e") || requestedModel.includes("gpt-image")
+      ? requestedModel
+      : "gpt-image-1.5";
+  const transformed: Record<string, any> = {
+    model,
    size: "1024x1024",
-    response_format: "url",
    prompt: prompt.slice(index! + 6).trim(),
  };
+
+  if (model.includes("dall-e")) {
+    transformed.quality = "standard";
+    transformed.response_format = "url";
+  }
+
  return OpenAIV1ImagesGenerationSchema.parse(transformed);
 };
@@ -0,0 +1,136 @@
+import { z } from "zod";
+import { OPENAI_OUTPUT_MAX } from "./openai";
+
+const OpenAIResponsesReasoningSchema = z
+  .object({
+    effort: z.string().optional(),
+    summary: z.union([z.string(), z.array(z.string())]).optional(),
+  })
+  .passthrough();
+
+const OpenAIResponsesTextSchema = z
+  .object({
+    format: z.any().optional(),
+    verbosity: z.enum(["low", "medium", "high"]).optional(),
+  })
+  .passthrough();
+
+export const OpenAIResponsesCreateSchema = z
+  .object({
+    model: z.string().max(100),
+    input: z.union([z.string(), z.array(z.any())]).optional(),
+    instructions: z.union([z.string(), z.array(z.any())]).optional(),
+    previous_response_id: z.string().max(100).optional(),
+    stream: z.boolean().optional().default(false),
+    max_output_tokens: z.coerce
+      .number()
+      .int()
+      .nullish()
+      .default(OPENAI_OUTPUT_MAX)
+      .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
+    temperature: z.number().optional(),
+    top_p: z.number().optional(),
+    user: z.string().max(500).optional(),
+    safety_identifier: z.string().max(500).optional(),
+    metadata: z.record(z.any()).optional(),
+    tools: z.array(z.any()).optional(),
+    tool_choice: z.any().optional(),
+    parallel_tool_calls: z.boolean().optional(),
+    include: z.array(z.string()).optional(),
+    store: z.boolean().optional(),
+    background: z.boolean().optional(),
+    reasoning: OpenAIResponsesReasoningSchema.optional(),
+    text: OpenAIResponsesTextSchema.optional(),
+  })
+  .passthrough();
+
+export function flattenOpenAIResponsesInput(input: unknown): string {
+  return flattenResponseValue(input).trim();
+}
+
+export function flattenOpenAIResponsesOutput(body: Record<string, any>): string {
+  if (typeof body.output_text === "string" && body.output_text.trim()) {
+    return body.output_text.trim();
+  }
+
+  return flattenResponseValue(body.output ?? body.output_text).trim();
+}
+
+export function containsOpenAIResponsesImageInput(input: unknown): boolean {
+  return containsImage(input);
+}
+
+function flattenResponseValue(value: unknown): string {
+  if (value === null || value === undefined) return "";
+  if (typeof value === "string") return value;
+  if (typeof value === "number" || typeof value === "boolean") {
+    return String(value);
+  }
+
+  if (Array.isArray(value)) {
+    return value
+      .map((item) => flattenResponseValue(item))
+      .filter(Boolean)
+      .join("\n");
+  }
+
+  if (!isRecord(value)) return "";
+
+  const typed = value;
+  if (hasStringProp(typed, "text")) return typed.text;
+  if (hasStringProp(typed, "refusal")) return typed.refusal;
+  if (hasStringProp(typed, "summary")) return typed.summary;
+  if (hasStringProp(typed, "arguments")) return typed.arguments;
+  if (hasStringProp(typed, "result")) return typed.result;
+
+  const type = String(typed.type ?? "");
+  if (type.includes("image")) return "[ Uploaded Image Omitted ]";
+  if (type.includes("file")) return "[ File Omitted ]";
+
+  if (typeof typed.role === "string" && typed.content !== undefined) {
+    const content = flattenResponseValue(typed.content);
+    return content ? `${typed.role}: ${content}` : typed.role;
+  }
+
+  const nested = [
+    typed.content,
+    typed.input,
+    typed.output,
+    typed.summary,
+    typed.results,
+    typed.item,
+    typed.items,
+  ];
+  for (const candidate of nested) {
+    const flattened = flattenResponseValue(candidate);
+    if (flattened) return flattened;
+  }
+
+  return "";
+}
+
+function containsImage(value: unknown): boolean {
+  if (value === null || value === undefined) return false;
+  if (Array.isArray(value)) return value.some((item) => containsImage(item));
+  if (!isRecord(value)) return false;
+
+  const typed = value;
+  const type = String(typed.type ?? "");
+  if (type.includes("image")) return true;
+  if (typed.image_url || typed.image || typed.input_image || typed.inline_data) {
+    return true;
+  }
+
+  return Object.values(typed).some((item) => containsImage(item));
+}
+
+function hasStringProp<T extends string>(
+  value: Record<string, unknown>,
+  key: T
+): value is Record<string, unknown> & Record<T, string> {
+  return typeof value[key] === "string";
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null;
+}
@@ -7,6 +7,7 @@ export const OPENAI_OUTPUT_MAX = config.maxOutputTokensOpenAI;
 const OpenAIV1ChatContentArraySchema = z.array(
  z.union([
    z.object({ type: z.literal("text"), text: z.string() }),
+    z.object({ type: z.literal("refusal"), refusal: z.string() }),
    z.object({
      type: z.union([z.literal("image"), z.literal("image_url")]),
      image_url: z.object({
@@ -21,7 +22,14 @@ export const OpenAIV1ChatCompletionSchema = z
    model: z.string().max(100),
    messages: z.array(
      z.object({
-        role: z.enum(["system", "user", "assistant", "tool", "function"]),
+        role: z.enum([
+          "system",
+          "developer",
+          "user",
+          "assistant",
+          "tool",
+          "function",
+        ]),
        content: z.union([z.string(), OpenAIV1ChatContentArraySchema]),
        name: z.string().optional(),
        tool_calls: z.array(z.any()).optional(),
@@ -54,11 +62,20 @@ export const OpenAIV1ChatCompletionSchema = z
      .nullish()
      .default(Math.min(OPENAI_OUTPUT_MAX, 4096))
      .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
+    max_completion_tokens: z.coerce
+      .number()
+      .int()
+      .nullish()
+      .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
    frequency_penalty: z.number().optional().default(0),
    presence_penalty: z.number().optional().default(0),
    logit_bias: z.any().optional(),
+    metadata: z.record(z.any()).optional(),
    user: z.string().max(500).optional(),
+    safety_identifier: z.string().max(500).optional(),
    seed: z.number().int().optional(),
+    prompt_cache_key: z.string().max(500).optional(),
+    prompt_cache_retention: z.string().optional(),
    // Be warned that Azure OpenAI combines these two into a single field.
    // It's the only deviation from the OpenAI API that I'm aware of so I have
    // special cased it in `addAzureKey` rather than expecting clients to do it.
@@ -70,14 +87,23 @@ export const OpenAIV1ChatCompletionSchema = z
    functions: z.array(z.any()).optional(),
    tool_choice: z.any().optional(),
    function_choice: z.any().optional(),
-    response_format: z.any(),
+    response_format: z.any().optional(),
+    parallel_tool_calls: z.boolean().optional(),
+    reasoning_effort: z.string().optional(),
+    stream_options: z.any().optional(),
+    modalities: z.array(z.string()).optional(),
+    audio: z.any().optional(),
+    prediction: z.any().optional(),
+    web_search_options: z.any().optional(),
+    service_tier: z.string().optional(),
+    verbosity: z.enum(["low", "medium", "high"]).optional(),
  })
  // Tool usage must be enabled via config because we currently have no way to
  // track quota usage for them or enforce limits.
  .omit(
    Boolean(config.allowOpenAIToolUsage) ? {} : { tools: true, functions: true }
  )
-  .strip();
+  .passthrough();
 export type OpenAIChatMessage = z.infer<
  typeof OpenAIV1ChatCompletionSchema
 >["messages"][0];
@@ -89,6 +115,7 @@ export function flattenOpenAIMessageContent(
    ? content
        .map((contentItem) => {
          if ("text" in contentItem) return contentItem.text;
+          if ("refusal" in contentItem) return contentItem.refusal;
          if ("image_url" in contentItem) return "[ Uploaded Image Omitted ]";
        })
        .join("\n")
@@ -107,7 +134,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
            let role: string = m.role;
            if (role === "assistant") {
              role = "Assistant";
-            } else if (role === "system") {
+            } else if (role === "system" || role === "developer") {
              role = "System";
            } else if (role === "user") {
              role = "User";
@@ -121,7 +148,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
        .map((m) => {
          // Claude without prefixes (except system) and no Assistant priming
          let role: string = "";
-          if (role === "system") {
+          if (m.role === "system" || m.role === "developer") {
            role = "System: ";
          }
          return `\n\n${role}${flattenOpenAIMessageContent(m.content)}`;
@@ -54,10 +54,10 @@ export class AwsKeyChecker extends KeyCheckerBase<AwsBedrockKey> {
    if (isInitialCheck) {
      checks = [
        this.invokeModel("anthropic.claude-v2", key),
-        this.invokeModel("anthropic.claude-3-sonnet-20240229-v1:0", key),
-        this.invokeModel("anthropic.claude-3-haiku-20240307-v1:0", key),
-        this.invokeModel("anthropic.claude-3-opus-20240229-v1:0", key),
-        this.invokeModel("anthropic.claude-3-5-sonnet-20240620-v1:0", key),
+        this.invokeModel("anthropic.claude-sonnet-4-5-20250929-v1:0", key),
+        this.invokeModel("anthropic.claude-haiku-4-5-20251001-v1:0", key),
+        this.invokeModel("anthropic.claude-opus-4-1-20250805-v1:0", key),
+        this.invokeModel("anthropic.claude-3-5-haiku-20241022-v1:0", key),
      ];
    }

@@ -35,9 +35,15 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
  }

  protected async testKeyOrFail(key: AzureOpenAIKey) {
-    const model = await this.testModel(key);
-    this.log.info({ key: key.hash, deploymentModel: model }, "Checked key.");
-    this.updateKey(key.hash, { modelFamilies: [model] });
+    const result = await this.testModel(key);
+    this.log.info(
+      { key: key.hash, deploymentModel: result.modelIds[0] ?? result.family },
+      "Checked key."
+    );
+    this.updateKey(key.hash, {
+      modelFamilies: [result.family],
+      modelIds: result.modelIds,
+    });
  }

  protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) {
@@ -107,7 +113,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    this.updateKey(key.hash, { lastChecked: next });
  }

-  private async testModel(key: AzureOpenAIKey) {
+  private async testModel(key: AzureOpenAIKey): Promise<{
+    family: ReturnType<typeof getAzureOpenAIModelFamily>;
+    modelIds: string[];
+  }> {
    const { apiKey, deploymentId, resourceName } =
      AzureOpenAIKeyChecker.getCredentialsFromKey(key);
    const url = POST_CHAT_COMPLETIONS(resourceName, deploymentId);
@@ -126,7 +135,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    // we try to invoke /chat/completions on dall-e-3. This is expected and
    // indicates a DALL-E deployment.
    if (response.status === 400) {
-      if (data.error.code === "OperationNotSupported") return "azure-dall-e";
+      if (data.error.code === "OperationNotSupported") {
+        return {
+          family: "azure-dall-e",
+          modelIds: ["dall-e-3", "gpt-image-1", "gpt-image-1-mini", "gpt-image-1.5"],
+        };
+      }
      throw new AxiosError(
        `Unexpected error when testing deployment ${deploymentId}`,
        "AZURE_TEST_ERROR",
@@ -137,11 +151,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    }

    const family = getAzureOpenAIModelFamily(data.model);
+    const normalizedModel = normalizeAzureModelId(data.model);

    // Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks.
    // Otherwise we can use the model family Azure returned.
    if (family !== "azure-gpt4") {
-      return family;
+      return { family, modelIds: [normalizedModel] };
    }

    // Try to send an oversized prompt. GPT-4 Turbo can handle this but regular
@@ -160,8 +175,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    const code = contextTest.error?.code;
    this.log.debug({ code, status }, "Performed Azure GPT4 context size test.");

-    if (code === "context_length_exceeded") return "azure-gpt4";
-    return "azure-gpt4-turbo";
+    if (code === "context_length_exceeded") {
+      return { family: "azure-gpt4", modelIds: ["gpt-4"] };
+    }
+    return { family: "azure-gpt4-turbo", modelIds: ["gpt-4-turbo"] };
  }

  static errorIsAzureError(error: AxiosError): error is AxiosError<AzureError> {
@@ -179,3 +196,7 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    return { resourceName, deploymentId, apiKey };
  }
 }
+
+function normalizeAzureModelId(model: string) {
+  return model.replace("gpt-35-turbo", "gpt-3.5-turbo");
+}
@@ -14,6 +14,8 @@ type AzureOpenAIKeyUsage = {
 export interface AzureOpenAIKey extends Key, AzureOpenAIKeyUsage {
  readonly service: "azure";
  readonly modelFamilies: AzureOpenAIModelFamily[];
+  /** Exact model IDs or deployment aliases known to be backed by this key. */
+  modelIds: string[];
  /** The time at which this key was last rate limited. */
  rateLimitedAt: number;
  /** The time until which this key is rate limited. */
@@ -62,6 +64,7 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
        rateLimitedAt: 0,
        rateLimitedUntil: 0,
        contentFiltering: false,
+        modelIds: [],
        hash: `azu-${crypto
          .createHash("sha256")
          .update(key)
@@ -73,6 +76,8 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
        "azure-gpt4-32kTokens": 0,
        "azure-gpt4-turboTokens": 0,
        "azure-gpt4oTokens": 0,
+        "azure-gpt5Tokens": 0,
+        "azure-o-seriesTokens": 0,
        "azure-dall-eTokens": 0,
      };
      this.keys.push(newKey);
@@ -96,8 +101,14 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {

  public get(model: string) {
    const neededFamily = getAzureOpenAIModelFamily(model);
+    const normalizedModel = model
+      .replace(/^azure-/, "")
+      .replace("gpt-35-turbo", "gpt-3.5-turbo");
    const availableKeys = this.keys.filter(
-      (k) => !k.isDisabled && k.modelFamilies.includes(neededFamily)
+      (k) =>
+        !k.isDisabled &&
+        k.modelFamilies.includes(neededFamily) &&
+        (!k.modelIds.length || k.modelIds.includes(normalizedModel))
    );
    if (availableKeys.length === 0) {
      throw new PaymentRequiredError(
@@ -32,10 +32,10 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
    const isInitialCheck = !key.lastChecked;
    if (isInitialCheck) {
      checks = [
-        this.invokeModel("claude-3-haiku@20240307", key, true),
-        this.invokeModel("claude-3-sonnet@20240229", key, true),
-        this.invokeModel("claude-3-opus@20240229", key, true),
-        this.invokeModel("claude-3-5-sonnet@20240620", key, true),
+        this.invokeModel("claude-haiku-4-5@20251001", key, true),
+        this.invokeModel("claude-sonnet-4-5@20250929", key, true),
+        this.invokeModel("claude-opus-4-1@20250805", key, true),
+        this.invokeModel("claude-3-5-haiku@20241022", key, true),
      ];

      const [sonnet, haiku, opus, sonnet35] =
@@ -66,13 +66,13 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
      });
    } else {
      if (key.haikuEnabled) {
-        await this.invokeModel("claude-3-haiku@20240307", key, false)
+        await this.invokeModel("claude-haiku-4-5@20251001", key, false)
      } else if (key.sonnetEnabled) {
-        await this.invokeModel("claude-3-sonnet@20240229", key, false)
+        await this.invokeModel("claude-sonnet-4-5@20250929", key, false)
      } else if (key.sonnet35Enabled) {
-        await this.invokeModel("claude-3-5-sonnet@20240620", key, false)
+        await this.invokeModel("claude-3-5-haiku@20241022", key, false)
      } else {
-        await this.invokeModel("claude-3-opus@20240229", key, false)
+        await this.invokeModel("claude-opus-4-1@20250805", key, false)
      }

      this.updateKey(key.hash, { lastChecked: Date.now() });
@@ -4,6 +4,7 @@ import { KeyPool } from "./key-pool";
 /** The request and response format used by a model's API. */
 export type APIFormat =
  | "openai"
+  | "openai-responses"
  | "openai-text"
  | "openai-image"
  | "anthropic-chat" // Anthropic's newer messages array format
@@ -111,7 +111,10 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
    const familiesArray = [...families];
    const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
    this.updateKey(key.hash, {
-      modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)),
+      modelIds: models,
+      modelSnapshots: models.filter((m) =>
+        m.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/)
+      ),
      modelFamilies: familiesArray,
      lastChecked: keyFromPool.lastChecked,
    });
@@ -16,6 +16,8 @@ type OpenAIKeyUsage = {
 export interface OpenAIKey extends Key, OpenAIKeyUsage {
  readonly service: "openai";
  modelFamilies: OpenAIModelFamily[];
+  /** Exact model IDs reported by the models API for this key. */
+  modelIds: string[];
  /**
   * Some keys are assigned to multiple organizations, each with their own quota
   * limits. We clone the key for each organization and track usage/disabled
@@ -97,6 +99,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
          "gpt4" as const,
          "gpt4-turbo" as const,
          "gpt4o" as const,
+          "gpt5" as const,
+          "o-series" as const,
        ],
        isTrial: false,
        isDisabled: false,
@@ -118,8 +122,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
        "gpt4-32kTokens": 0,
        "gpt4-turboTokens": 0,
        gpt4oTokens: 0,
+        gpt5Tokens: 0,
+        "o-seriesTokens": 0,
        "dall-eTokens": 0,
        gpt4Rpm: 0,
+        modelIds: [],
        modelSnapshots: [],
      };
      this.keys.push(newKey);
@@ -160,8 +167,10 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
    if (model === "gpt-4-32k") model = "gpt-4-32k-0613";

    const neededFamily = getOpenAIModelFamily(model);
-    const excludeTrials = model === "text-embedding-ada-002";
-    const needsSnapshot = model.match(/-\d{4}(-preview)?$/);
+    const excludeTrials = /^text-embedding-(?:3-small|3-large|ada-002)$/.test(
+      model
+    );
+    const needsSnapshot = model.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/);

    const availableKeys = this.keys.filter(
      // Allow keys which
@@ -169,6 +178,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
        !key.isDisabled && // are not disabled
        key.modelFamilies.includes(neededFamily) && // have access to the model family we need
        (!excludeTrials || !key.isTrial) && // and are not trials if we don't want them
+        (!key.modelIds.length || key.modelIds.includes(model)) && // and have the requested model if exact inventory is available
        (!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need
    );

@@ -23,6 +23,8 @@ export type OpenAIModelFamily =
  | "gpt4-32k"
  | "gpt4-turbo"
  | "gpt4o"
+  | "gpt5"
+  | "o-series"
  | "dall-e";
 export type AnthropicModelFamily = "claude" | "claude-opus";
 export type GoogleAIModelFamily = "gemini-pro";
@@ -51,6 +53,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "gpt4-32k",
  "gpt4-turbo",
  "gpt4o",
+  "gpt5",
+  "o-series",
  "dall-e",
  "claude",
  "claude-opus",
@@ -68,6 +72,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "azure-gpt4-32k",
  "azure-gpt4-turbo",
  "azure-gpt4o",
+  "azure-gpt5",
+  "azure-o-series",
  "azure-dall-e",
 ] as const);

@@ -84,6 +90,10 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
 ] as const);

 export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
+  "^gpt-5(\\.\\d+)?([-.].+)?$": "gpt5",
+  "^o\\d([-.].+)?$": "o-series",
+  "^computer-use-preview$": "o-series",
+  "^gpt-4\\.1([-.].+)?$": "gpt4o",
  "^gpt-4o": "gpt4o",
  "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
  "^gpt-4-turbo(-preview)?$": "gpt4-turbo",
@@ -94,7 +104,8 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^gpt-4-\\d{4}$": "gpt4",
  "^gpt-4$": "gpt4",
  "^gpt-3.5-turbo": "turbo",
-  "^text-embedding-ada-002$": "turbo",
+  "^text-embedding-(ada-002|3-small|3-large)$": "turbo",
+  "^gpt-image-1([-.].+)?$": "dall-e",
  "^dall-e-\\d{1}$": "dall-e",
 };

@@ -106,6 +117,8 @@ export const MODEL_FAMILY_SERVICE: {
  "gpt4-turbo": "openai",
  "gpt4-32k": "openai",
  "gpt4o": "openai",
+  gpt5: "openai",
+  "o-series": "openai",
  "dall-e": "openai",
  claude: "anthropic",
  "claude-opus": "anthropic",
@@ -118,6 +131,8 @@ export const MODEL_FAMILY_SERVICE: {
  "azure-gpt4-32k": "azure",
  "azure-gpt4-turbo": "azure",
  "azure-gpt4o": "azure",
+  "azure-gpt5": "azure",
+  "azure-o-series": "azure",
  "azure-dall-e": "azure",
  "gemini-pro": "google-ai",
  "mistral-tiny": "mistral-ai",
@@ -150,7 +165,10 @@ export function getGoogleAIModelFamily(_model: string): ModelFamily {
 }

 export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
-  const prunedModel = model.replace(/-(latest|\d{4})$/, "");
+  const prunedModel = model.replace(
+    /-(latest|\d{4}|\d{6}|\d+\.\d+|v\d+(:\d+)?)$/,
+    ""
+  );
  switch (prunedModel) {
    case "mistral-tiny":
    case "mistral-small":
@@ -161,7 +179,34 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
      return "mistral-tiny";
    case "open-mixtral-8x7b":
      return "mistral-small";
+    case "ministral-3b":
+    case "ministral-8b":
+    case "mistral-small-3.1":
+    case "mistral-small-3.2":
+      return "mistral-small";
+    case "magistral-medium":
+      return "mistral-medium";
+    case "codestral":
+    case "devstral":
+    case "mistral-large-2":
+    case "mistral-large-3":
+    case "pixtral-large":
+      return "mistral-large";
    default:
+      if (model.startsWith("mistral-small") || model.startsWith("ministral")) {
+        return "mistral-small";
+      }
+      if (model.startsWith("mistral-medium") || model.startsWith("magistral")) {
+        return "mistral-medium";
+      }
+      if (
+        model.startsWith("mistral-large") ||
+        model.startsWith("pixtral-large") ||
+        model.startsWith("codestral") ||
+        model.startsWith("devstral")
+      ) {
+        return "mistral-large";
+      }
      return "mistral-tiny";
  }
 }
@@ -225,6 +270,7 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
        modelFamily = getClaudeModelFamily(model);
        break;
      case "openai":
+      case "openai-responses":
      case "openai-text":
      case "openai-image":
        modelFamily = getOpenAIModelFamily(model);
@@ -10,6 +10,14 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
    case "azure-gpt4o":
      cost = 0.000005;
      break;
+    case "gpt5":
+    case "azure-gpt5":
+      cost = 0.00001;
+      break;
+    case "o-series":
+    case "azure-o-series":
+      cost = 0.000012;
+      break;
    case "azure-gpt4-turbo":
    case "gpt4-turbo":
      cost = 0.00001;
@@ -65,7 +65,14 @@ async function getTokenCountForMessages({
          numTokens += encoder.encode(text.normalize("NFKC"), "all").length;
          break;
        case "image":
+          if (part.source.type === "base64") {
            numTokens += await getImageTokenCount(part.source.data);
+          } else {
+            // Remote image URLs are already hosted elsewhere, so we cannot
+            // inspect dimensions locally. Charge the documented worst-case
+            // token cost instead of undercounting them as zero.
+            numTokens += 1600;
+          }
          break;
        default:
          throw new Error(`Unsupported Anthropic content type.`);
@@ -179,16 +179,33 @@ export const DALLE_TOKENS_PER_DOLLAR = 100000;
 * which we convert to tokens at a rate of 100000 tokens per dollar.
 */
 export function getOpenAIImageCost(params: {
-  model: "dall-e-2" | "dall-e-3";
-  quality: "standard" | "hd";
-  resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024";
+  model:
+    | "dall-e-2"
+    | "dall-e-3"
+    | "gpt-image-1"
+    | "gpt-image-1-mini"
+    | "gpt-image-1.5";
+  quality: "auto" | "low" | "medium" | "high" | "standard" | "hd";
+  resolution:
+    | "auto"
+    | "512x512"
+    | "256x256"
+    | "1024x1024"
+    | "1024x1536"
+    | "1536x1024"
+    | "1024x1792"
+    | "1792x1024";
  n: number | null;
 }) {
  const { model, quality, resolution, n } = params;
+  const normalizedResolution =
+    resolution === "auto" ? "1024x1024" : resolution;
+  const normalizedQuality =
+    quality === "hd" || quality === "high" ? "hd" : "standard";
  const usd = (() => {
    switch (model) {
      case "dall-e-2":
-        switch (resolution) {
+        switch (normalizedResolution) {
          case "512x512":
            return 0.018;
          case "256x256":
@@ -199,12 +216,20 @@ export function getOpenAIImageCost(params: {
            throw new Error("Invalid resolution");
        }
      case "dall-e-3":
-        switch (resolution) {
+      case "gpt-image-1.5":
+      case "gpt-image-1":
+      case "gpt-image-1-mini":
+        // GPT Image models have newer parameter ranges, but we still account
+        // for them using the existing DALL-E 3-style price buckets so the
+        // proxy can continue to enforce rough quota/cost limits.
+        switch (normalizedResolution) {
          case "1024x1024":
-            return quality === "standard" ? 0.04 : 0.08;
+            return normalizedQuality === "standard" ? 0.04 : 0.08;
+          case "1024x1536":
+          case "1536x1024":
          case "1024x1792":
          case "1792x1024":
-            return quality === "standard" ? 0.08 : 0.12;
+            return normalizedQuality === "standard" ? 0.08 : 0.12;
          default:
            throw new Error("Invalid resolution");
        }
@@ -233,7 +258,10 @@ export function estimateGoogleAITokenCount(
  let numTokens = 0;
  for (const message of prompt) {
    numTokens += tokensPerMessage;
-    numTokens += encoder.encode(message.parts[0].text).length;
+    const text = message.parts
+      .map((part) => ("text" in part ? part.text : ""))
+      .join("\n");
+    numTokens += encoder.encode(text).length;
  }

  numTokens += 3;
@@ -55,7 +55,7 @@ type MistralAIChatTokenCountRequest = {
 type FlatPromptTokenCountRequest = {
  prompt: string;
  completion?: never;
-  service: "openai-text" | "anthropic-text" | "google-ai";
+  service: "openai-text" | "openai-responses" | "anthropic-text" | "google-ai";
 };

 type StringCompletionTokenCountRequest = {
@@ -105,6 +105,7 @@ export async function countTokens({
        tokenization_duration_ms: getElapsedMs(time),
      };
    case "openai":
+    case "openai-responses":
    case "openai-text":
      return {
        ...(await getOpenAITokenCount(prompt ?? completion, req.body.model)),
@@ -400,6 +400,7 @@ function getModelFamilyForQuotaUsage(

  switch (api) {
    case "openai":
+    case "openai-responses":
    case "openai-text":
    case "openai-image":
      return getOpenAIModelFamily(model);