Update proxy for modern model APIs

2026-04-06 03:59:37 -07:00
parent 824adfbbb2
commit 8662eadea7
48 changed files with 1294 additions and 214 deletions
@@ -40,11 +40,11 @@ NODE_ENV=production
 # Which model types users are allowed to access.
 # The following model families are recognized:
-# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-dall-e
+# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | gpt5 | o-series | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-gpt5 | azure-o-series | azure-dall-e
 # By default, all models are allowed except for 'dall-e' / 'azure-dall-e'.
 # To allow DALL-E image generation, uncomment the line below and add 'dall-e' or
 # 'azure-dall-e' to the list of allowed model families.
-# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o
+# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,gpt5,o-series,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o,azure-gpt5,azure-o-series
 # Which services can be used to process prompts containing images via multimodal
 # models. The following services are recognized:
@@ -115,10 +115,14 @@ NODE_ENV=production
 # TOKEN_QUOTA_GPT4=0
 # TOKEN_QUOTA_GPT4_32K=0
 # TOKEN_QUOTA_GPT4_TURBO=0
 # TOKEN_QUOTA_GPT5=0
 # TOKEN_QUOTA_O_SERIES=0
 # TOKEN_QUOTA_CLAUDE=0
 # TOKEN_QUOTA_GEMINI_PRO=0
 # TOKEN_QUOTA_AWS_CLAUDE=0
 # TOKEN_QUOTA_GCP_CLAUDE=0
 # TOKEN_QUOTA_AZURE_GPT5=0
 # TOKEN_QUOTA_AZURE_O_SERIES=0
 # "Tokens" for image-generation models are counted at a rate of 100000 tokens
 # per US$1.00 generated, which is similar to the cost of GPT-4 Turbo.
 # DALL-E 3 costs around US$0.10 per image (10000 tokens).
@@ -45,11 +45,16 @@ You can also request Claude Instant, but support for this isn't fully implemente
 ### Supported model IDs
 Users can send these model IDs to the proxy to invoke the corresponding models.
 - **Claude**
  - `anthropic.claude-v1` (~18k context, claude 1.3 -- EOL 2024-02-28)
  - `anthropic.claude-v2` (~100k context, claude 2.0)
  - `anthropic.claude-v2:1` (~200k context, claude 2.1)
- **Claude Instant**
+  - `anthropic.claude-haiku-4-5-20251001-v1:0`
-  - `anthropic.claude-instant-v1` (~100k context, claude instant 1.2)
+  - `anthropic.claude-sonnet-4-5-20250929-v1:0`
  - `anthropic.claude-opus-4-1-20250805-v1:0`
  - `anthropic.claude-3-5-haiku-20241022-v1:0`
  - `anthropic.claude-sonnet-4-20250514-v1:0`
  - `anthropic.claude-opus-4-20250514-v1:0`
 For OpenAI-compatible callers, the proxy will also remap newer Claude-style names such as `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, `claude-opus-4-1-20250805`, and `claude-3-5-haiku-20241022` to the corresponding Bedrock model IDs.
 ## Note regarding logging
@@ -20,7 +20,9 @@ AZURE_CREDENTIALS=contoso-ml:gpt4-8k:0123456789abcdef0123456789abcdef,northwind-
 Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model.
 ### Supported model IDs
-Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. For the most part they work the same with Azure. GPT-3.5 Turbo has an ID of "gpt-35-turbo" because Azure doesn't allow periods in model names, but the proxy should automatically convert this to the correct ID.
+Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. The proxy now understands newer Azure-backed OpenAI model families such as GPT-4o, GPT-4.1, GPT-5 / GPT-5.2, o-series reasoning models, and GPT Image deployments including `gpt-image-1.5`, plus the newer Responses API route at `/proxy/azure/openai/v1/responses`.
 GPT-3.5 Turbo still has an Azure-specific ID of `gpt-35-turbo` because Azure doesn't allow periods in model names, but the proxy will automatically normalize that for you.
 As noted above, you can only use model IDs for which a deployment has been created and added to the proxy.
@@ -29,7 +29,11 @@ GCP_CREDENTIALS=my-first-project:xxx@yyy.com:us-east5:-----BEGIN PRIVATE KEY----
 ## Supported model IDs
 Users can send these model IDs to the proxy to invoke the corresponding models.
 - **Claude**
-  - `claude-3-haiku@20240307`
+  - `claude-haiku-4-5@20251001`
-  - `claude-3-sonnet@20240229`
+  - `claude-sonnet-4-5@20250929`
-  - `claude-3-opus@20240229`
+  - `claude-opus-4-1@20250805`
-  - `claude-3-5-sonnet@20240620`
+  - `claude-3-5-haiku@20241022`
  - `claude-sonnet-4@20250514`
  - `claude-opus-4@20250514`
 For OpenAI-compatible callers, the proxy will also remap Claude-style aliases like `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`, and `claude-opus-4-1-20250805` to the corresponding Vertex AI model IDs.
@@ -7,8 +7,8 @@ Authorization: Bearer {{oai-key-1}}
 Content-Type: application/json
 {
-  "model": "gpt-3.5-turbo",
+  "model": "gpt-4.1-mini",
-  "max_tokens": 30,
+  "max_completion_tokens": 30,
  "stream": false,
  "messages": [
    {
@@ -18,6 +18,19 @@ Content-Type: application/json
  ]
 }
 ###
 # @name OpenAI -- Responses API
 POST https://api.openai.com/v1/responses
 Authorization: Bearer {{oai-key-1}}
 Content-Type: application/json
 {
  "model": "gpt-5.2",
  "reasoning": { "effort": "medium" },
  "max_output_tokens": 80,
  "input": "This is a test prompt."
 }
 ###
 # @name OpenAI -- Text Completions
 POST https://api.openai.com/v1/completions
@@ -38,7 +51,7 @@ Authorization: Bearer {{oai-key-1}}
 Content-Type: application/json
 {
-  "model": "text-embedding-ada-002",
+  "model": "text-embedding-3-small",
  "input": "This is a test embedding input."
 }
@@ -81,8 +94,8 @@ Authorization: Bearer {{proxy-key}}
 Content-Type: application/json
 {
-  "model": "gpt-4-1106-preview",
+  "model": "gpt-4.1",
-  "max_tokens": 20,
+  "max_completion_tokens": 20,
  "stream": true,
  "temperature": 1,
  "seed": 123,
@@ -94,6 +107,20 @@ Content-Type: application/json
  ]
 }
 ###
 # @name Proxy / OpenAI -- Native Responses API
 POST {{proxy-host}}/proxy/openai/v1/responses
 Authorization: Bearer {{proxy-key}}
 Content-Type: application/json
 {
  "model": "gpt-5.2",
  "reasoning": { "effort": "medium" },
  "max_output_tokens": 64,
  "stream": false,
  "input": "Summarize the purpose of this reverse proxy in one sentence."
 }
 ###
 # @name Proxy / OpenAI -- Native Text Completions
 POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions
@@ -142,7 +169,7 @@ Authorization: Bearer {{proxy-key}}
 Content-Type: application/json
 {
-  "model": "text-embedding-ada-002",
+  "model": "text-embedding-3-small",
  "input": "This is a test embedding input."
 }
@@ -185,7 +212,7 @@ Authorization: Bearer {{proxy-key}}
 Content-Type: application/json
 {
-  "model": "gpt-3.5-turbo",
+  "model": "gpt-5.2",
  "max_tokens": 20,
  "stream": false,
  "temperature": 0,
@@ -197,6 +224,23 @@ Content-Type: application/json
  ]
 }
 ###
 # @name Proxy / Google AI -- OpenAI-Compat Image Generation
 POST {{proxy-host}}/proxy/google-ai/v1/chat/completions
 Authorization: Bearer {{proxy-key}}
 Content-Type: application/json
 {
  "model": "gemini-2.5-flash-image",
  "stream": false,
  "messages": [
    {
      "role": "user",
      "content": "Generate a flat vector-style illustration of a red fox reading a newspaper at a cafe table."
    }
  ]
 }
 ###
 # @name Proxy / AWS Claude -- Native Completion
 POST {{proxy-host}}/proxy/aws/claude/v1/complete
@@ -434,6 +434,8 @@ export const config: Config = {
    "gpt4-32k",
    "gpt4-turbo",
    "gpt4o",
    "gpt5",
    "o-series",
    "claude",
    "claude-opus",
    "gemini-pro",
@@ -450,6 +452,8 @@ export const config: Config = {
    "azure-gpt4-32k",
    "azure-gpt4-turbo",
    "azure-gpt4o",
    "azure-gpt5",
    "azure-o-series",
  ]),
  rejectPhrases: parseCsv(getEnvWithDefault("REJECT_PHRASES", "")),
  rejectMessage: getEnvWithDefault(
@@ -17,6 +17,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "gpt4-32k": "GPT-4 32k",
  "gpt4-turbo": "GPT-4 Turbo",
  gpt4o: "GPT-4o",
  gpt5: "GPT-5",
  "o-series": "o-Series",
  "dall-e": "DALL-E",
  claude: "Claude (Sonnet)",
  "claude-opus": "Claude (Opus)",
@@ -34,6 +36,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "azure-gpt4-32k": "Azure GPT-4 32k",
  "azure-gpt4-turbo": "Azure GPT-4 Turbo",
  "azure-gpt4o": "Azure GPT-4o",
  "azure-gpt5": "Azure GPT-5",
  "azure-o-series": "Azure o-Series",
  "azure-dall-e": "Azure DALL-E",
 };
@@ -29,24 +29,18 @@ const getModelsResponse = () => {
  if (!config.anthropicKey) return { object: "list", data: [] };
  const claudeVariants = [
    "claude-v1",
    "claude-v1-100k",
    "claude-instant-v1",
    "claude-instant-v1-100k",
    "claude-v1.3",
    "claude-v1.3-100k",
    "claude-v1.2",
    "claude-v1.0",
    "claude-instant-v1.1",
    "claude-instant-v1.1-100k",
    "claude-instant-v1.0",
    "claude-2",
    "claude-2.0",
    "claude-2.1",
-    "claude-3-haiku-20240307",
+    "claude-sonnet-4-5",
-    "claude-3-opus-20240229",
+    "claude-sonnet-4-5-20250929",
-    "claude-3-sonnet-20240229",
+    "claude-haiku-4-5",
-    "claude-3-5-sonnet-20240620"
+    "claude-haiku-4-5-20251001",
    "claude-opus-4-1",
    "claude-opus-4-1-20250805",
    "claude-opus-4-20250514",
    "claude-sonnet-4-20250514",
    "claude-3-5-haiku-20241022",
    "claude-3-5-haiku-latest",
  ];
  const models = claudeVariants.map((id) => ({
@@ -230,7 +224,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware({
 * (claude-3 based models do not support the old text completion endpoint).
 */
 const preprocessAnthropicTextRequest: RequestHandler = (req, res, next) => {
-  if (req.body.model?.startsWith("claude-3")) {
+  if (requiresAnthropicMessagesApi(req.body.model)) {
    textToChatPreprocessor(req, res, next);
  } else {
    nativeTextPreprocessor(req, res, next);
@@ -255,7 +249,7 @@ const oaiToChatPreprocessor = createPreprocessorMiddleware({
 */
 const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
  maybeReassignModel(req);
-  if (req.body.model?.includes("claude-3")) {
+  if (requiresAnthropicMessagesApi(req.body.model)) {
    oaiToChatPreprocessor(req, res, next);
  } else {
    oaiToTextPreprocessor(req, res, next);
@@ -315,7 +309,8 @@ function handleAnthropicTextCompatRequest(
  const type = req.params.type;
  const action = req.params.action;
  const alreadyInChatFormat = Boolean(req.body.messages);
-  const compatModel = `claude-3-${type}-20240229`;
+  const compatModel =
    type === "opus" ? "claude-opus-4-1-20250805" : "claude-sonnet-4-5-20250929";
  req.log.info(
    { type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
    "Handling Anthropic compatibility request"
@@ -349,8 +344,20 @@ function handleAnthropicTextCompatRequest(
 */
 function maybeReassignModel(req: Request) {
  const model = req.body.model;
-  if (!model.startsWith("gpt-")) return;
+  const lower = String(model).toLowerCase();
-  req.body.model = "claude-3-sonnet-20240229";
+  if (
    lower.startsWith("gpt-") ||
    lower.startsWith("o1") ||
    lower.startsWith("o3") ||
    lower.startsWith("o4") ||
    lower === "computer-use-preview"
  ) {
    req.body.model = "claude-sonnet-4-5-20250929";
  }
 }
 function requiresAnthropicMessagesApi(model?: string) {
  return /^claude-(?:3|sonnet|opus)/.test(model ?? "");
 }
 export const anthropic = anthropicRouter;
@@ -20,6 +20,12 @@ import { transformAnthropicChatResponseToAnthropicText, transformAnthropicChatRe
 import { sendErrorToClient } from "./middleware/response/error-generator";
 const LATEST_AWS_V2_MINOR_VERSION = "1";
 const AWS_CLAUDE_SONNET_45 = "anthropic.claude-sonnet-4-5-20250929-v1:0";
 const AWS_CLAUDE_HAIKU_45 = "anthropic.claude-haiku-4-5-20251001-v1:0";
 const AWS_CLAUDE_OPUS_41 = "anthropic.claude-opus-4-1-20250805-v1:0";
 const AWS_CLAUDE_SONNET_4 = "anthropic.claude-sonnet-4-20250514-v1:0";
 const AWS_CLAUDE_OPUS_4 = "anthropic.claude-opus-4-20250514-v1:0";
 const AWS_CLAUDE_35_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0";
 let modelsCache: any = null;
 let modelsCacheTime = 0;
@@ -35,10 +41,12 @@ const getModelsResponse = () => {
  const variants = [
    "anthropic.claude-v2",
    "anthropic.claude-v2:1",
-    "anthropic.claude-3-haiku-20240307-v1:0",
+    AWS_CLAUDE_HAIKU_45,
-    "anthropic.claude-3-sonnet-20240229-v1:0",
+    AWS_CLAUDE_SONNET_45,
-    "anthropic.claude-3-5-sonnet-20240620-v1:0",
+    AWS_CLAUDE_OPUS_41,
-    "anthropic.claude-3-opus-20240229-v1:0",
+    AWS_CLAUDE_35_HAIKU,
    AWS_CLAUDE_SONNET_4,
    AWS_CLAUDE_OPUS_4,
  ];
  const models = variants.map((id) => ({
@@ -164,7 +172,7 @@ const textToChatPreprocessor = createPreprocessorMiddleware(
 * (claude-3 based models do not support the old text completion endpoint).
 */
 const preprocessAwsTextRequest: RequestHandler = (req, res, next) => {
-  if (req.body.model?.includes("claude-3")) {
+  if (requiresAnthropicMessagesApi(req.body.model)) {
    textToChatPreprocessor(req, res, next);
  } else {
    nativeTextPreprocessor(req, res, next);
@@ -186,7 +194,7 @@ const oaiToAwsChatPreprocessor = createPreprocessorMiddleware(
 * or the new Claude chat completion endpoint, based on the requested model.
 */
 const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
-  if (req.body.model?.includes("claude-3")) {
+  if (requiresAnthropicMessagesApi(req.body.model)) {
    oaiToAwsChatPreprocessor(req, res, next);
  } else {
    oaiToAwsTextPreprocessor(req, res, next);
@@ -241,12 +249,65 @@ awsRouter.post(
 */
 function maybeReassignModel(req: Request) {
  const model = req.body.model;
  const lower = String(model).toLowerCase();
  // If it looks like an AWS model, use it as-is
  if (model.includes("anthropic.claude")) {
    return;
  }
  if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
    req.body.model = AWS_CLAUDE_OPUS_41;
    return;
  }
  if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
    req.body.model = AWS_CLAUDE_OPUS_41;
    return;
  }
  if (lower.includes("opus-4")) {
    req.body.model = AWS_CLAUDE_OPUS_4;
    return;
  }
  if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
    req.body.model = AWS_CLAUDE_HAIKU_45;
    return;
  }
  if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
    req.body.model = AWS_CLAUDE_SONNET_45;
    return;
  }
  if (lower.includes("sonnet-4")) {
    req.body.model = AWS_CLAUDE_SONNET_4;
    return;
  }
  if (lower.includes("3-5") && lower.includes("haiku")) {
    req.body.model = AWS_CLAUDE_35_HAIKU;
    return;
  }
  if (lower.includes("opus")) {
    req.body.model = AWS_CLAUDE_OPUS_41;
    return;
  }
  if (lower.includes("haiku")) {
    req.body.model = AWS_CLAUDE_HAIKU_45;
    return;
  }
  if (lower.includes("sonnet")) {
    req.body.model = AWS_CLAUDE_SONNET_45;
    return;
  }
  if (
    lower.startsWith("gpt-") ||
    lower.startsWith("o1") ||
    lower.startsWith("o3") ||
    lower.startsWith("o4") ||
    lower === "computer-use-preview"
  ) {
    req.body.model = AWS_CLAUDE_SONNET_45;
    return;
  }
  // Anthropic model names can look like:
  // - claude-v1
  // - claude-2.1
@@ -282,20 +343,22 @@ function maybeReassignModel(req: Request) {
    case "3":
    case "3.0":
      if (name.includes("opus")) {
-        req.body.model = "anthropic.claude-3-opus-20240229-v1:0";
+        req.body.model = AWS_CLAUDE_OPUS_41;
      } else if (name.includes("haiku")) {
-        req.body.model = "anthropic.claude-3-haiku-20240307-v1:0";
+        req.body.model = AWS_CLAUDE_HAIKU_45;
      } else {
-        req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
+        req.body.model = AWS_CLAUDE_SONNET_45;
      }
      return;
    case "3.5":
-      req.body.model = "anthropic.claude-3-5-sonnet-20240620-v1:0";
+      req.body.model = name.includes("haiku")
        ? AWS_CLAUDE_35_HAIKU
        : AWS_CLAUDE_SONNET_45;
      return;
  }
-  // Fallback to Claude 2.1
+  // Fallback to Claude Sonnet 4
-  req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
+  req.body.model = AWS_CLAUDE_SONNET_45;
  return;
 }
@@ -306,7 +369,7 @@ export function handleCompatibilityRequest(
 ) {
  const action = req.params.action;
  const alreadyInChatFormat = Boolean(req.body.messages);
-  const compatModel = "anthropic.claude-3-5-sonnet-20240620-v1:0";
+  const compatModel = AWS_CLAUDE_SONNET_4;
  req.log.info(
    { inputModel: req.body.model, compatModel, alreadyInChatFormat },
    "Handling AWS compatibility request"
@@ -335,3 +398,7 @@ export function handleCompatibilityRequest(
 }
 export const aws = awsRouter;
 function requiresAnthropicMessagesApi(model?: string) {
  return /claude-(?:3|sonnet|opus)/.test(model ?? "");
 }
@@ -32,20 +32,29 @@ function getModelsResponse() {
  }
  let available = new Set<AzureOpenAIModelFamily>();
  const availableModelIds = new Set<string>();
  for (const key of keyPool.list()) {
    if (key.isDisabled || key.service !== "azure") continue;
    const azureKey = key as any;
    key.modelFamilies.forEach((family) =>
      available.add(family as AzureOpenAIModelFamily)
    );
    azureKey.modelIds?.forEach((id: string) => availableModelIds.add(id));
  }
  const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
  available = new Set([...available].filter((x) => allowed.has(x)));
-  const models = KNOWN_OPENAI_MODELS.map((id) => ({
+  const usingExactModelIds = availableModelIds.size > 0;
-    id,
+
-    object: "model",
+  const sourceModels = usingExactModelIds
-    created: new Date().getTime(),
+    ? [...new Set([...KNOWN_OPENAI_MODELS, ...availableModelIds])]
-    owned_by: "azure",
+    : KNOWN_OPENAI_MODELS;
  const models = sourceModels.map((id) => ({
      id,
      object: "model",
      created: new Date().getTime(),
      owned_by: "azure",
    permission: [
      {
        id: "modelperm-" + id,
@@ -58,7 +67,12 @@ function getModelsResponse() {
    ],
    root: id,
    parent: null,
-  })).filter((model) => available.has(getAzureOpenAIModelFamily(model.id)));
+  })).filter((model) => {
    if (usingExactModelIds) {
      return availableModelIds.has(model.id);
    }
    return available.has(getAzureOpenAIModelFamily(model.id));
  });
  modelsCache = { object: "list", data: models };
  modelsCacheTime = new Date().getTime();
@@ -115,6 +129,16 @@ azureOpenAIRouter.post(
  }),
  azureOpenAIProxy
 );
 azureOpenAIRouter.post(
  "/v1/responses",
  ipLimiter,
  createPreprocessorMiddleware({
    inApi: "openai-responses",
    outApi: "openai-responses",
    service: "azure",
  }),
  azureOpenAIProxy
 );
 azureOpenAIRouter.post(
  "/v1/images/generations",
  ipLimiter,
@@ -19,7 +19,12 @@ import {
 import { transformAnthropicChatResponseToOpenAI } from "./anthropic";
 import { sendErrorToClient } from "./middleware/response/error-generator";
-const LATEST_GCP_SONNET_MINOR_VERSION = "20240229";
+const GCP_CLAUDE_SONNET_45 = "claude-sonnet-4-5@20250929";
 const GCP_CLAUDE_HAIKU_45 = "claude-haiku-4-5@20251001";
 const GCP_CLAUDE_OPUS_41 = "claude-opus-4-1@20250805";
 const GCP_CLAUDE_SONNET_4 = "claude-sonnet-4@20250514";
 const GCP_CLAUDE_OPUS_4 = "claude-opus-4@20250514";
 const GCP_CLAUDE_35_HAIKU = "claude-3-5-haiku@20241022";
 let modelsCache: any = null;
 let modelsCacheTime = 0;
@@ -33,10 +38,12 @@ const getModelsResponse = () => {
  // https://docs.anthropic.com/en/docs/about-claude/models
  const variants = [
-    "claude-3-haiku@20240307",
+    GCP_CLAUDE_HAIKU_45,
-    "claude-3-sonnet@20240229",
+    GCP_CLAUDE_SONNET_45,
-    "claude-3-opus@20240229",
+    GCP_CLAUDE_OPUS_41,
-    "claude-3-5-sonnet@20240620",
+    GCP_CLAUDE_35_HAIKU,
    GCP_CLAUDE_SONNET_4,
    GCP_CLAUDE_OPUS_4,
  ];
  const models = variants.map((id) => ({
@@ -147,6 +154,7 @@ gcpRouter.post(
 */
 function maybeReassignModel(req: Request) {
  const model = req.body.model;
  const lower = String(model).toLowerCase();
  // If it looks like an GCP model, use it as-is
  // if (model.includes("anthropic.claude")) {
@@ -154,6 +162,58 @@ function maybeReassignModel(req: Request) {
    return;
  }
  if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
    req.body.model = GCP_CLAUDE_OPUS_41;
    return;
  }
  if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
    req.body.model = GCP_CLAUDE_OPUS_41;
    return;
  }
  if (lower.includes("opus-4")) {
    req.body.model = GCP_CLAUDE_OPUS_4;
    return;
  }
  if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
    req.body.model = GCP_CLAUDE_HAIKU_45;
    return;
  }
  if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
    req.body.model = GCP_CLAUDE_SONNET_45;
    return;
  }
  if (lower.includes("sonnet-4")) {
    req.body.model = GCP_CLAUDE_SONNET_4;
    return;
  }
  if (lower.includes("3-5") && lower.includes("haiku")) {
    req.body.model = GCP_CLAUDE_35_HAIKU;
    return;
  }
  if (lower.includes("opus")) {
    req.body.model = GCP_CLAUDE_OPUS_41;
    return;
  }
  if (lower.includes("haiku")) {
    req.body.model = GCP_CLAUDE_HAIKU_45;
    return;
  }
  if (lower.includes("sonnet")) {
    req.body.model = GCP_CLAUDE_SONNET_45;
    return;
  }
  if (
    lower.startsWith("gpt-") ||
    lower.startsWith("o1") ||
    lower.startsWith("o3") ||
    lower.startsWith("o4") ||
    lower === "computer-use-preview"
  ) {
    req.body.model = GCP_CLAUDE_SONNET_45;
    return;
  }
  // Anthropic model names can look like:
  // - claude-v1
  // - claude-2.1
@@ -165,7 +225,7 @@ function maybeReassignModel(req: Request) {
  // If there's no match, fallback to Claude3 Sonnet as it is most likely to be
  // available on GCP.
  if (!match) {
-    req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`;
+    req.body.model = GCP_CLAUDE_SONNET_4;
    return;
  }
@@ -176,20 +236,22 @@ function maybeReassignModel(req: Request) {
    case "3":
    case "3.0":
      if (name.includes("opus")) {
-        req.body.model = "claude-3-opus@20240229";
+        req.body.model = GCP_CLAUDE_OPUS_41;
      } else if (name.includes("haiku")) {
-        req.body.model = "claude-3-haiku@20240307";
+        req.body.model = GCP_CLAUDE_HAIKU_45;
      } else {
-        req.body.model = "claude-3-sonnet@20240229";
+        req.body.model = GCP_CLAUDE_SONNET_45;
      }
      return;
    case "3.5":
-      req.body.model = "claude-3-5-sonnet@20240620";
+      req.body.model = name.includes("haiku")
        ? GCP_CLAUDE_35_HAIKU
        : GCP_CLAUDE_SONNET_45;
      return;
  }
-  // Fallback to Claude3 Sonnet
+  // Fallback to Claude Sonnet 4
-  req.body.model = `claude-3-sonnet@${LATEST_GCP_SONNET_MINOR_VERSION}`;
+  req.body.model = GCP_CLAUDE_SONNET_45;
  return;
 }
@@ -16,6 +16,11 @@ import {
  ProxyResHandlerWithBody,
 } from "./middleware/response";
 import { addGoogleAIKey } from "./middleware/request/preprocessors/add-google-ai-key";
 import { BadRequestError } from "../shared/errors";
 import {
  flattenGoogleAIContentParts,
  isGoogleAIImageModel,
 } from "../shared/api-schemas";
 let modelsCache: any = null;
 let modelsCacheTime = 0;
@@ -31,10 +36,15 @@ const getModelsResponse = () => {
  if (!config.googleAIKey) return { object: "list", data: [] };
  const googleAIVariants = [
-    "gemini-pro",
+    "gemini-2.5-pro",
-    "gemini-1.0-pro",
+    "gemini-2.5-flash",
    "gemini-2.5-flash-lite",
    "gemini-2.5-flash-image",
    "gemini-3-pro-image-preview",
    "gemini-2.0-flash-preview-image-generation",
    "gemini-2.0-flash",
    "gemini-1.5-pro",
-    "gemini-1.5-pro-latest",
+    "gemini-1.5-flash",
  ];
  const models = googleAIVariants.map((id) => ({
@@ -83,7 +93,8 @@ function transformGoogleAIResponse(
 ): Record<string, any> {
  const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
  const parts = resBody.candidates[0].content?.parts ?? [{ text: "" }];
-  const content = parts[0].text.replace(/^(.{0,50}?): /, () => "");
+  const content = flattenGoogleAIContentParts(parts)
    .replace(/^(.{0,50}?): /, () => "");
  return {
    id: "goo-" + v4(),
    object: "chat.completion",
@@ -136,14 +147,19 @@ googleAIRouter.post(
  googleAIProxy
 );
-/** Replaces requests for non-Google AI models with gemini-pro-1.5-latest. */
+/** Replaces requests for non-Google AI models with Gemini 2.5 Flash. */
 function maybeReassignModel(req: Request) {
  const requested = req.body.model;
  if (requested.includes("gemini")) {
    if (req.body.stream && isGoogleAIImageModel(requested)) {
      throw new BadRequestError(
        "Streaming Gemini image-generation models is not currently supported by this proxy. Retry without `stream: true`."
      );
    }
    return;
  }
-  req.log.info({ requested }, "Reassigning model to gemini-pro-1.5-latest");
+  req.log.info({ requested }, "Reassigning model to gemini-2.5-flash");
-  req.body.model = "gemini-pro-1.5-latest";
+  req.body.model = "gemini-2.5-flash";
 }
 export const googleAI = googleAIRouter;
@@ -5,10 +5,15 @@ import { ZodError } from "zod";
 import { generateErrorMessage } from "zod-error";
 import { HttpError } from "../../shared/errors";
 import { assertNever } from "../../shared/utils";
 import {
  flattenGoogleAIContentParts,
  flattenOpenAIResponsesOutput,
 } from "../../shared/api-schemas";
 import { QuotaExceededError } from "./request/preprocessors/apply-quota-limits";
 import { sendErrorToClient } from "./response/error-generator";
 const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
 const OPENAI_RESPONSES_ENDPOINT = "/v1/responses";
 const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
 const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
 const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
@@ -22,6 +27,7 @@ export function isTextGenerationRequest(req: Request) {
    req.method === "POST" &&
    [
      OPENAI_CHAT_COMPLETION_ENDPOINT,
      OPENAI_RESPONSES_ENDPOINT,
      OPENAI_TEXT_COMPLETION_ENDPOINT,
      ANTHROPIC_COMPLETION_ENDPOINT,
      ANTHROPIC_MESSAGES_ENDPOINT,
@@ -224,6 +230,8 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
      // Can be null if the model wants to invoke tools rather than return a
      // completion.
      return body.choices[0].message.content || "";
    case "openai-responses":
      return flattenOpenAIResponsesOutput(body);
    case "openai-text":
      return body.choices[0].text;
    case "anthropic-chat":
@@ -252,7 +260,7 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
      if ("choices" in body) {
        return body.choices[0].message.content;
      }
-      return body.candidates[0].content.parts[0].text;
+      return flattenGoogleAIContentParts(body.candidates?.[0]?.content?.parts);
    case "openai-image":
      return body.data?.map((item: any) => item.url).join("\n");
    default:
@@ -267,6 +275,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
    case "openai-text":
    case "mistral-ai":
      return body.model;
    case "openai-responses":
      return body.model || req.body.model;
    case "openai-image":
      return req.body.model;
    case "anthropic-chat":
@@ -47,6 +47,7 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
        assignedKey = keyPool.get("dall-e-3", service);
        break;
      case "openai":
      case "openai-responses":
      case "google-ai":
      case "mistral-ai":
        throw new Error(
@@ -109,9 +110,10 @@ export const addKeyForEmbeddingsRequest: HPMRequestCallback = (
    throw new Error("Embeddings requests must be from OpenAI");
  }
-  req.body = { input: req.body.input, model: "text-embedding-ada-002" };
+  const model = req.body.model || "text-embedding-3-small";
  req.body = { input: req.body.input, model };
-  const key = keyPool.get("text-embedding-ada-002", "openai") as OpenAIKey;
+  const key = keyPool.get(model, "openai") as OpenAIKey;
  req.key = key;
  req.log.info(
@@ -6,7 +6,7 @@ import {
 import { RequestPreprocessor } from "../index";
 export const addAzureKey: RequestPreprocessor = (req) => {
-  const validAPIs: APIFormat[] = ["openai", "openai-image"];
+  const validAPIs: APIFormat[] = ["openai", "openai-responses", "openai-image"];
  const apisValid = [req.outboundApi, req.inboundApi].every((api) =>
    validAPIs.includes(api)
  );
@@ -50,6 +50,23 @@ export const addAzureKey: RequestPreprocessor = (req) => {
  const cred = req.key as AzureOpenAIKey;
  const { resourceName, deploymentId, apiKey } = getCredentialsFromKey(cred);
  if (req.outboundApi === "openai-responses") {
    req.body.model = deploymentId;
    req.signedRequest = {
      method: "POST",
      protocol: "https:",
      hostname: `${resourceName}.openai.azure.com`,
      path: `/openai/v1/responses?api-version=preview`,
      headers: {
        ["host"]: `${resourceName}.openai.azure.com`,
        ["content-type"]: "application/json",
        ["api-key"]: apiKey,
      },
      body: JSON.stringify(req.body),
    };
    return;
  }
  const operation =
    req.outboundApi === "openai" ? "/chat/completions" : "/images/generations";
  const apiVersion =
@@ -6,6 +6,7 @@ import {
  GoogleAIChatMessage,
  MistralAIChatMessage,
  OpenAIChatMessage,
  flattenOpenAIResponsesInput,
 } from "../../../../shared/api-schemas";
 /**
@@ -18,11 +19,23 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
  switch (service) {
    case "openai": {
-      req.outputTokens = req.body.max_tokens;
+      req.outputTokens =
        req.body.max_completion_tokens ?? req.body.max_tokens ?? 0;
      const prompt: OpenAIChatMessage[] = req.body.messages;
      result = await countTokens({ req, prompt, service });
      break;
    }
    case "openai-responses": {
      req.outputTokens = req.body.max_output_tokens ?? 0;
      const prompt = [
        flattenOpenAIResponsesInput(req.body.instructions),
        flattenOpenAIResponsesInput(req.body.input),
      ]
        .filter(Boolean)
        .join("\n\n");
      result = await countTokens({ req, prompt, service });
      break;
    }
    case "openai-text": {
      req.outputTokens = req.body.max_tokens;
      const prompt: string = req.body.prompt;
@@ -4,8 +4,10 @@ import { assertNever } from "../../../../shared/utils";
 import { RequestPreprocessor } from "../index";
 import { BadRequestError } from "../../../../shared/errors";
 import {
  GoogleAIChatMessage,
  MistralAIChatMessage,
  OpenAIChatMessage,
  flattenOpenAIResponsesInput,
  flattenAnthropicMessages,
 } from "../../../../shared/api-schemas";
@@ -72,11 +74,27 @@ function getPromptFromRequest(req: Request) {
          return `${msg.role}: ${text}`;
        })
        .join("\n\n");
    case "openai-responses":
      return [
        flattenOpenAIResponsesInput(body.instructions),
        flattenOpenAIResponsesInput(body.input),
      ]
        .filter(Boolean)
        .join("\n\n");
    case "openai-text":
    case "openai-image":
      return body.prompt;
    case "google-ai":
-      return body.prompt.text;
+      return body.contents
        .map(({ parts, role }: GoogleAIChatMessage) => {
          const text = parts
            .map((part: any) =>
              "text" in part ? part.text : "[image omitted]"
            )
            .join("\n");
          return `${role}: ${text}`;
        })
        .join("\n\n");
    default:
      assertNever(service);
  }
@@ -6,8 +6,8 @@ import { RequestPreprocessor } from "../index";
 const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
 const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
-const GOOGLE_AI_MAX_CONTEXT = 32000;
+const GOOGLE_AI_MAX_CONTEXT = 1048576;
-const MISTRAL_AI_MAX_CONTENT = 32768;
+const MISTRAL_AI_MAX_CONTENT = 256000;
 /**
 * Assigns `req.promptTokens` and `req.outputTokens` based on the request body
@@ -26,6 +26,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  let proxyMax: number;
  switch (req.outboundApi) {
    case "openai":
    case "openai-responses":
    case "openai-text":
      proxyMax = OPENAI_MAX_CONTEXT;
      break;
@@ -54,6 +55,12 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  let modelMax: number;
  if (model.match(/gpt-3.5-turbo-16k/)) {
    modelMax = 16384;
  } else if (model.match(/^gpt-5(\.|-|\b)/)) {
    modelMax = 1050000;
  } else if (model.match(/^o\d/)) {
    modelMax = 200000;
  } else if (model.match(/^gpt-4\.1/)) {
    modelMax = 1047576;
  } else if (model.match(/^gpt-4o/)) {
    modelMax = 128000;
  } else if (model.match(/gpt-4-turbo(-\d{4}-\d{2}-\d{2})?$/)) {
@@ -80,12 +87,27 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 200000;
  } else if (model.match(/^claude-3/)) {
    modelMax = 200000;
  } else if (model.match(/^claude-(opus|sonnet|haiku)-4/)) {
    modelMax = 200000;
  } else if (model.match(/^gemini-\d{3}$/)) {
    modelMax = GOOGLE_AI_MAX_CONTEXT;
  } else if (model.match(/^gemini-(2\.5|2\.0)/)) {
    modelMax = 1048576;
  } else if (model.match(/^mistral-(tiny|small|medium)$/)) {
    modelMax = MISTRAL_AI_MAX_CONTENT;
  } else if (
    model.match(
      /^(mistral|ministral|magistral|pixtral|codestral|devstral|voxtral)-/
    )
  ) {
    modelMax = MISTRAL_AI_MAX_CONTENT;
  } else if (model.match(/^anthropic\.claude-3/)) {
    modelMax = 200000;
  } else if (
    model.match(/^anthropic\.claude-(opus|sonnet|haiku)-4/) ||
    model.match(/^claude-(opus|sonnet|haiku)-4@/)
  ) {
    modelMax = 200000;
  } else if (model.match(/^anthropic\.claude-v2:\d/)) {
    modelMax = 200000;
  } else if (model.match(/^anthropic\.claude/)) {
@@ -121,8 +143,8 @@ function assertRequestHasTokenCounts(
  req: Request
 ): asserts req is Request & { promptTokens: number; outputTokens: number } {
  z.object({
-    promptTokens: z.number().int().min(1),
+    promptTokens: z.number().int().min(0),
-    outputTokens: z.number().int().min(1),
+    outputTokens: z.number().int().min(0),
  })
    .nonstrict()
    .parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
@@ -3,6 +3,7 @@ import { assertNever } from "../../../../shared/utils";
 import { RequestPreprocessor } from "../index";
 import { containsImageContent as containsImageContentOpenAI } from "../../../../shared/api-schemas/openai";
 import { containsImageContent as containsImageContentAnthropic } from "../../../../shared/api-schemas/anthropic";
 import { containsOpenAIResponsesImageInput } from "../../../../shared/api-schemas";
 import { ForbiddenError } from "../../../../shared/errors";
 /**
@@ -22,11 +23,20 @@ export const validateVision: RequestPreprocessor = async (req) => {
    case "openai":
      hasImage = containsImageContentOpenAI(req.body.messages);
      break;
    case "openai-responses":
      hasImage =
        containsOpenAIResponsesImageInput(req.body.instructions) ||
        containsOpenAIResponsesImageInput(req.body.input);
      break;
    case "anthropic-chat":
      hasImage = containsImageContentAnthropic(req.body.messages);
      break;
    case "anthropic-text":
    case "google-ai":
      hasImage = req.body.contents?.some((message: { parts: any[] }) =>
        message.parts?.some((part) => "inline_data" in part)
      );
      break;
    case "anthropic-text":
    case "mistral-ai":
    case "openai-image":
    case "openai-text":
@@ -72,7 +72,15 @@ type ErrorGeneratorOptions = {
 };
 export function tryInferFormat(body: any): APIFormat | "unknown" {
-  if (typeof body !== "object" || !body.model) {
+  if (typeof body !== "object") {
    return "unknown";
  }
  if (body.object === "response" || Array.isArray(body.output)) {
    return "openai-responses";
  }
  if (!body.model) {
    return "unknown";
  }
@@ -158,7 +166,30 @@ export function buildSpoofedCompletion({
  switch (format) {
    case "openai":
    case "openai-responses":
    case "mistral-ai":
      if (format === "openai-responses") {
        return {
          id: "error-" + id,
          object: "response",
          created_at: Math.floor(Date.now() / 1000),
          model,
          status: "completed",
          error: null,
          incomplete_details: null,
          output_text: content,
          output: [
            {
              id: "msg-error-" + id,
              type: "message",
              role: "assistant",
              status: "completed",
              content: [{ type: "output_text", text: content, annotations: [] }],
            },
          ],
          usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
        };
      }
      return {
        id: "error-" + id,
        object: "chat.completion",
@@ -248,7 +279,23 @@ export function buildSpoofedSSE({
  switch (format) {
    case "openai":
    case "openai-responses":
    case "mistral-ai":
      if (format === "openai-responses") {
        return (
          `data: ${JSON.stringify({
            type: "response.completed",
            response: buildSpoofedCompletion({
              format,
              title,
              message,
              obj,
              reqId,
              model,
            }),
          })}\n\n`
        );
      }
      event = {
        id: "chatcmpl-" + id,
        object: "chat.completion.chunk",
@@ -11,6 +11,7 @@ import { ProxyResHandlerWithBody } from ".";
 import { assertNever } from "../../../shared/utils";
 import {
  AnthropicChatMessage,
  flattenOpenAIResponsesInput,
  flattenAnthropicMessages, GoogleAIChatMessage,
  MistralAIChatMessage,
  OpenAIChatMessage,
@@ -62,6 +63,7 @@ const getPromptForRequest = (
 ):
  | string
  | OpenAIChatMessage[]
  | { instructions?: unknown; input?: unknown }
  | { contents: GoogleAIChatMessage[] }
  | { system: string; messages: AnthropicChatMessage[] }
  | MistralAIChatMessage[]
@@ -73,6 +75,11 @@ const getPromptForRequest = (
    case "openai":
    case "mistral-ai":
      return req.body.messages;
    case "openai-responses":
      return {
        instructions: req.body.instructions,
        input: req.body.input,
      };
    case "anthropic-chat":
      return { system: req.body.system, messages: req.body.messages };
    case "openai-text":
@@ -99,6 +106,7 @@ const flattenMessages = (
    | string
    | OaiImageResult
    | OpenAIChatMessage[]
    | { instructions?: unknown; input?: unknown }
    | { contents: GoogleAIChatMessage[] }
    | { system: string; messages: AnthropicChatMessage[] }
    | MistralAIChatMessage[]
@@ -114,12 +122,20 @@ const flattenMessages = (
    return val.contents
      .map(({ parts, role }) => {
        const text = parts
-          .map((p) => p.text)
+          .map((p: any) => ("text" in p ? p.text : "(( Attached Image ))"))
          .join("\n");
        return `${role}: ${text}`;
      })
      .join("\n");
  }
  if (isOpenAIResponsesPrompt(val)) {
    return [
      flattenOpenAIResponsesInput(val.instructions),
      flattenOpenAIResponsesInput(val.input),
    ]
      .filter(Boolean)
      .join("\n\n");
  }
  if (Array.isArray(val)) {
    return val
      .map(({ content, role }) => {
@@ -140,6 +156,16 @@ const flattenMessages = (
  return val.prompt.trim();
 };
 function isOpenAIResponsesPrompt(
  val: unknown
 ): val is { instructions?: unknown; input?: unknown } {
  return (
    typeof val === "object" &&
    val !== null &&
    ("instructions" in val || "input" in val)
  );
 }
 function isGoogleAIChatPrompt(
  val: unknown
 ): val is { contents: GoogleAIChatMessage[] } {
@@ -8,6 +8,7 @@ import {
  mergeEventsForOpenAIText,
  AnthropicV2StreamEvent,
  OpenAIChatCompletionStreamEvent,
  OpenAIResponsesStreamEvent,
 } from "./index";
 /**
@@ -17,13 +18,36 @@ import {
 export class EventAggregator {
  private readonly format: APIFormat;
  private readonly events: OpenAIChatCompletionStreamEvent[];
  private responseBody: Record<string, any> | null;
  private responseEventCount: number;
  private responseOutputText: string;
  constructor({ format }: { format: APIFormat }) {
    this.events = [];
    this.format = format;
    this.responseBody = null;
    this.responseEventCount = 0;
    this.responseOutputText = "";
  }
-  addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) {
+  addEvent(
    event:
      | OpenAIChatCompletionStreamEvent
      | AnthropicV2StreamEvent
      | OpenAIResponsesStreamEvent
  ) {
    if (eventIsOpenAIResponsesEvent(event)) {
      this.responseEventCount++;
      if (event.response && typeof event.response === "object") {
        this.responseBody = event.response;
      }
      if (event.type === "response.output_text.delta") {
        this.responseOutputText += event.delta || event.text || "";
      }
      return;
    }
    if (eventIsOpenAIEvent(event)) {
      this.events.push(event);
    } else {
@@ -52,8 +76,15 @@ export class EventAggregator {
  getFinalResponse() {
    switch (this.format) {
      case "openai":
      case "openai-responses":
      case "google-ai":
      case "mistral-ai":
        if (this.format === "openai-responses") {
          if (this.responseBody) {
            return this.responseBody;
          }
          return { output_text: this.responseOutputText };
        }
        return mergeEventsForOpenAIChat(this.events);
      case "openai-text":
        return mergeEventsForOpenAIText(this.events);
@@ -69,7 +100,7 @@ export class EventAggregator {
  }
  hasEvents() {
-    return this.events.length > 0;
+    return this.events.length > 0 || this.responseEventCount > 0;
  }
 }
@@ -78,3 +109,9 @@ function eventIsOpenAIEvent(
 ): event is OpenAIChatCompletionStreamEvent {
  return event?.object === "chat.completion.chunk";
 }
 function eventIsOpenAIResponsesEvent(
  event: any
 ): event is OpenAIResponsesStreamEvent {
  return typeof event?.type === "string" && event.type.startsWith("response.");
 }
@@ -26,6 +26,14 @@ export type OpenAIChatCompletionStreamEvent = {
  }[];
 };
 export type OpenAIResponsesStreamEvent = {
  type: string;
  response?: Record<string, any>;
  delta?: string;
  text?: string;
  [key: string]: any;
 };
 export type StreamingCompletionTransformer<
  T = OpenAIChatCompletionStreamEvent,
  S = any,
@@ -42,6 +50,7 @@ export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-ant
 export { anthropicChatToOpenAI } from "./transformers/anthropic-chat-to-openai";
 export { googleAIToOpenAI } from "./transformers/google-ai-to-openai";
 export { passthroughToOpenAI } from "./transformers/passthrough-to-openai";
 export { passthroughToOpenAIResponses } from "./transformers/passthrough-to-openai-responses";
 export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
 export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
 export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text";
@@ -10,8 +10,10 @@ import {
  anthropicV2ToOpenAI,
  googleAIToOpenAI,
  OpenAIChatCompletionStreamEvent,
  OpenAIResponsesStreamEvent,
  openAITextToOpenAIChat,
  passthroughToOpenAI,
  passthroughToOpenAIResponses,
  StreamingCompletionTransformer,
 } from "./index";
@@ -35,7 +37,9 @@ export class SSEMessageTransformer extends Transform {
  private readonly inputFormat: APIFormat;
  private readonly transformFn: StreamingCompletionTransformer<
    // TODO: Refactor transformers to not assume only OpenAI events as output
-    OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
+    | OpenAIChatCompletionStreamEvent
    | AnthropicV2StreamEvent
    | OpenAIResponsesStreamEvent
  >;
  private readonly log;
  private readonly fallbackId: string;
@@ -126,12 +130,14 @@ function getTransformer(
  // used for that case.
  requestApi: APIFormat = "openai"
 ): StreamingCompletionTransformer<
-  OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
+  OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent | OpenAIResponsesStreamEvent
 > {
  switch (responseApi) {
    case "openai":
    case "mistral-ai":
      return passthroughToOpenAI;
    case "openai-responses":
      return passthroughToOpenAIResponses;
    case "openai-text":
      return openAITextToOpenAIChat;
    case "anthropic-text":
@@ -0,0 +1,43 @@
 import {
  OpenAIResponsesStreamEvent,
  SSEResponseTransformArgs,
  StreamingCompletionTransformer,
 } from "../index";
 import { parseEvent, ServerSentEvent } from "../parse-sse";
 import { logger } from "../../../../../logger";
 const log = logger.child({
  module: "sse-transformer",
  transformer: "openai-responses-to-openai-responses",
 });
 export const passthroughToOpenAIResponses: StreamingCompletionTransformer<
  OpenAIResponsesStreamEvent
 > = (
  params: SSEResponseTransformArgs
 ) => {
  const { data } = params;
  const rawEvent = parseEvent(data);
  if (!rawEvent.data || rawEvent.data === "[DONE]") {
    return { position: -1 };
  }
  const responseEvent = asResponseEvent(rawEvent);
  if (!responseEvent) {
    return { position: -1 };
  }
  return { position: -1, event: responseEvent };
 };
 function asResponseEvent(
  event: ServerSentEvent
 ): OpenAIResponsesStreamEvent | null {
  try {
    return JSON.parse(event.data) as OpenAIResponsesStreamEvent;
  } catch (error) {
    log.warn({ error: error.stack, event }, "Received invalid event");
  }
  return null;
 }
@@ -24,25 +24,29 @@ import {
 // https://docs.mistral.ai/platform/endpoints
 export const KNOWN_MISTRAL_AI_MODELS = [
  // Mistral 7b (open weight, legacy)
  "open-mistral-7b",
  "mistral-tiny-2312",
  // Mixtral 8x7b (open weight, legacy)
  "open-mixtral-8x7b",
  "mistral-small-2312",
  // Mixtral Small (newer 8x7b, closed weight)
  "mistral-small-latest",
-  "mistral-small-2402",
+  "mistral-small-2603",
-  // Mistral Medium
+  "mistral-small-2506",
  "mistral-medium-latest",
-  "mistral-medium-2312",
+  "mistral-medium-2508",
-  // Mistral Large
+  "mistral-medium-2505",
  "magistral-medium-latest",
  "magistral-medium-2507",
  "magistral-small-2507",
  "mistral-large-latest",
-  "mistral-large-2402",
+  "mistral-large-2512",
-  // Deprecated identifiers (2024-05-01)
+  "ministral-14b-2512",
-  "mistral-tiny",
+  "ministral-8b-latest",
-  "mistral-small",
+  "ministral-8b-2512",
-  "mistral-medium",
+  "ministral-3b-latest",
  "ministral-3b-2512",
  "pixtral-large-latest",
  "pixtral-large-2411",
  "codestral-latest",
  "codestral-2508",
  "devstral-small-latest",
  "devstral-small-2507",
  "devstral-medium-2507",
 ];
 let modelsCache: any = null;
@@ -18,7 +18,13 @@ import {
 import { generateModelList } from "./openai";
 import { OpenAIImageGenerationResult } from "../shared/file-storage/mirror-generated-image";
-const KNOWN_MODELS = ["dall-e-2", "dall-e-3"];
+const KNOWN_MODELS = [
  "dall-e-2",
  "dall-e-3",
  "gpt-image-1.5",
  "gpt-image-1",
  "gpt-image-1-mini",
 ];
 let modelListCache: any = null;
 let modelListValid = 0;
@@ -28,28 +28,57 @@ import {
 // https://platform.openai.com/docs/models/overview
 export const KNOWN_OPENAI_MODELS = [
  "gpt-5.2",
  "gpt-5.2-chat",
  "gpt-5.2-chat-latest",
  "gpt-5.2-pro",
  "gpt-5.2-codex",
  "gpt-5.1",
  "gpt-5.1-chat",
  "gpt-5.1-codex",
  "gpt-5.1-codex-mini",
  "gpt-5.1-codex-max",
  "gpt-5",
  "gpt-5-chat",
  "gpt-5-pro",
  "gpt-5-codex",
  "gpt-5-mini",
  "gpt-5-nano",
  "gpt-4.1",
  "gpt-4.1-2025-04-14",
  "gpt-4.1-mini",
  "gpt-4.1-nano",
  "o3-pro",
  "o3-deep-research",
  "computer-use-preview",
  "o4-mini",
  "o4-mini-deep-research",
  "o3",
  "o3-mini",
  "o1",
  "o1-pro",
  "gpt-4o",
  "gpt-4o-2024-08-06",
  "gpt-4o-mini",
  "gpt-4o-2024-05-13",
  "gpt-4-turbo", // alias for latest gpt4-turbo stable
  "gpt-4-turbo-2024-04-09", // gpt4-turbo stable, with vision
  "gpt-4-turbo-preview", // alias for latest turbo preview
  "gpt-4-0125-preview", // gpt4-turbo preview 2
  "gpt-4-1106-preview", // gpt4-turbo preview 1
  "gpt-4-vision-preview", // gpt4-turbo preview 1 with vision
  "gpt-4",
  "gpt-4-0613",
  "gpt-4-0314", // EOL 2024-06-13
  "gpt-4-32k",
  "gpt-4-32k-0314", // EOL 2024-06-13
  "gpt-4-32k-0613",
  "gpt-3.5-turbo",
  "gpt-3.5-turbo-0301", // EOL 2024-06-13
  "gpt-3.5-turbo-0613",
  "gpt-3.5-turbo-16k",
  "gpt-3.5-turbo-16k-0613",
  "gpt-3.5-turbo-instruct",
  "gpt-3.5-turbo-instruct-0914",
  "text-embedding-3-small",
  "text-embedding-3-large",
  "text-embedding-ada-002",
  "gpt-image-1.5",
  "gpt-image-1",
  "gpt-image-1-mini",
  "dall-e-3",
  "dall-e-2",
 ];
 let modelsCache: any = null;
@@ -59,11 +88,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
  // Get available families and snapshots
  let availableFamilies = new Set<OpenAIModelFamily>();
  const availableSnapshots = new Set<string>();
  const availableModelIds = new Set<string>();
  for (const key of keyPool.list()) {
    if (key.isDisabled || key.service !== "openai") continue;
    const asOpenAIKey = key as OpenAIKey;
    asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f));
    asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s));
    asOpenAIKey.modelIds.forEach((id) => availableModelIds.add(id));
  }
  // Remove disabled families
@@ -71,8 +102,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
  availableFamilies = new Set(
    [...availableFamilies].filter((x) => allowed.has(x))
  );
  const usingExactModelIds = availableModelIds.size > 0;
-  return models
+  const sourceModels = usingExactModelIds
    ? [...new Set([...models, ...availableModelIds])]
    : models;
  return sourceModels
    .map((id) => ({
      id,
      object: "model",
@@ -92,6 +128,13 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
      parent: null,
    }))
    .filter((model) => {
      if (usingExactModelIds) {
        return (
          allowed.has(getOpenAIModelFamily(model.id)) &&
          availableModelIds.has(model.id)
        );
      }
      // First check if the family is available
      const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id));
      if (!hasFamily) return false;
@@ -233,6 +276,16 @@ openaiRouter.post(
  }),
  openaiProxy
 );
 openaiRouter.post(
  "/v1/responses",
  ipLimiter,
  createPreprocessorMiddleware({
    inApi: "openai-responses",
    outApi: "openai-responses",
    service: "openai",
  }),
  openaiProxy
 );
 // Embeddings endpoint.
 openaiRouter.post(
  "/v1/embeddings",
@@ -31,18 +31,24 @@ export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge(
      .int()
      .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
  })
-);
+).passthrough();
 const AnthropicV1MessageMultimodalContentSchema = z.array(
  z.union([
    z.object({ type: z.literal("text"), text: z.string() }),
    z.object({
      type: z.literal("image"),
-      source: z.object({
+      source: z.union([
-        type: z.literal("base64"),
+        z.object({
-        media_type: z.string().max(100),
+          type: z.literal("base64"),
-        data: z.string(),
+          media_type: z.string().max(100),
-      }),
+          data: z.string(),
        }),
        z.object({
          type: z.literal("url"),
          url: z.string().url(),
        }),
      ]),
    }),
  ])
 );
@@ -65,7 +71,7 @@ export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge(
      .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
    system: z.string().optional(),
  })
-);
+).passthrough();
 export type AnthropicChatMessage = z.infer<
  typeof AnthropicV1MessagesSchema
 >["messages"][0];
@@ -77,7 +83,7 @@ function openAIMessagesToClaudeTextPrompt(messages: OpenAIChatMessage[]) {
        let role: string = m.role;
        if (role === "assistant") {
          role = "Assistant";
-        } else if (role === "system") {
+        } else if (role === "system" || role === "developer") {
          role = "System";
        } else if (role === "user") {
          role = "Human";
@@ -115,12 +121,13 @@ export const transformOpenAIToAnthropicChat: APIFormatTransformer<
    system,
    messages: newMessages,
    model: rest.model,
-    max_tokens: rest.max_tokens,
+    max_tokens: rest.max_completion_tokens ?? rest.max_tokens,
    stream: rest.stream,
    temperature: rest.temperature,
    top_p: rest.top_p,
    stop_sequences:
      typeof rest.stop === "string" ? [rest.stop] : rest.stop || undefined,
    ...(rest.thinking ? { thinking: rest.thinking } : {}),
    ...(rest.user ? { metadata: { user_id: rest.user } } : {}),
    // Anthropic supports top_k, but OpenAI does not
    // OpenAI supports frequency_penalty, presence_penalty, logit_bias, n, seed,
@@ -162,7 +169,7 @@ export const transformOpenAIToAnthropicText: APIFormatTransformer<
  return {
    model: rest.model,
    prompt: prompt,
-    max_tokens_to_sample: rest.max_tokens,
+    max_tokens_to_sample: rest.max_completion_tokens ?? rest.max_tokens,
    stop_sequences: stops,
    stream: rest.stream,
    temperature: rest.temperature,
@@ -366,7 +373,7 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
    // Here we will lose the original name if it was a system message, but that
    // is generally okay because the system message is usually a prompt and not
    // a character in the chat.
-    const name = msg.role === "system" ? "System" : msg.name?.trim();
+    const name = isSystemOpenAIRole(msg.role) ? "System" : msg.name?.trim();
    const content = convertOpenAIContent(msg.content);
    // Prepend the display name to the first text content in the current message
@@ -396,8 +403,8 @@ function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
 function isSystemOpenAIRole(
  role: OpenAIChatMessage["role"]
-): role is "system" | "function" | "tool" {
+): role is "system" | "developer" | "function" | "tool" {
-  return ["system", "function", "tool"].includes(role);
+  return ["system", "developer", "function", "tool"].includes(role);
 }
 function getFirstTextContent(content: OpenAIChatMessage["content"]) {
@@ -1,42 +1,62 @@
 import { z } from "zod";
 import {
  flattenOpenAIMessageContent,
  OpenAIChatMessage,
  OpenAIV1ChatCompletionSchema,
 } from "./openai";
 import { APIFormatTransformer } from "./index";
 const GoogleAIContentPartSchema = z.union([
  z.object({ text: z.string() }),
  z.object({
    inline_data: z.object({
      mime_type: z.string().max(100),
      data: z.string(),
    }),
  }),
 ]);
 // https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent
 export const GoogleAIV1GenerateContentSchema = z
  .object({
-    model: z.string().max(100), //actually specified in path but we need it for the router
+    model: z.string().max(100), // actually specified in path but we need it for the router
    stream: z.boolean().optional().default(false), // also used for router
    contents: z.array(
      z.object({
-        parts: z.array(z.object({ text: z.string() })),
+        parts: z.array(GoogleAIContentPartSchema),
        role: z.enum(["user", "model"]),
      })
    ),
-    tools: z.array(z.object({})).max(0).optional(),
+    tools: z.array(z.any()).optional(),
-    safetySettings: z.array(z.object({})).max(0).optional(),
+    toolConfig: z.any().optional(),
    safetySettings: z.array(z.any()).optional(),
    systemInstruction: z.any().optional(),
    generationConfig: z.object({
      temperature: z.number().optional(),
      maxOutputTokens: z.coerce
        .number()
        .int()
        .optional()
-        .default(16)
+        .default(1024)
-        .transform((v) => Math.min(v, 1024)), // TODO: Add config
+        .transform((v) => Math.min(v, 65536)),
      candidateCount: z.literal(1).optional(),
      topP: z.number().optional(),
      topK: z.number().optional(),
      responseMimeType: z.string().optional(),
      responseSchema: z.any().optional(),
      responseJsonSchema: z.any().optional(),
      responseModalities: z.array(z.string()).optional(),
      thinkingConfig: z.any().optional(),
      stopSequences: z.array(z.string().max(500)).max(5).optional(),
    }),
  })
-  .strip();
+  .passthrough();
 export type GoogleAIChatMessage = z.infer<
  typeof GoogleAIV1GenerateContentSchema
 >["contents"][0];
 type GoogleAIPart = GoogleAIChatMessage["parts"][number];
 export const transformOpenAIToGoogleAI: APIFormatTransformer<
  typeof GoogleAIV1GenerateContentSchema
 > = async (req) => {
@@ -54,40 +74,51 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
  }
  const { messages, ...rest } = result.data;
  const systemMessages = messages.filter(
    (m) => m.role === "system" || m.role === "developer"
  );
  const foundNames = new Set<string>();
  const model = req.body.model;
  const customThinkingConfig =
    getObjectField(body, "thinkingConfig") ??
    getObjectField(getObjectField(body, "generationConfig"), "thinkingConfig");
  const customResponseModalities = getStringArrayField(
    getObjectField(body, "generationConfig"),
    "responseModalities"
  );
  const contents = messages
    .filter((m) => m.role !== "system" && m.role !== "developer")
    .map((m) => {
      const role = m.role === "assistant" ? "model" : "user";
-      // Detects character names so we can set stop sequences for them as Gemini
+      const parts = convertOpenAIContent(m.content);
-      // is prone to continuing as the next character.
+      const text = parts
-      // If names are not available, we'll still try to prefix the message
+        .map((part) => ("text" in part ? part.text : ""))
-      // with generic names so we can set stops for them but they don't work
+        .join("\n");
      // as well as real names.
      const text = flattenOpenAIMessageContent(m.content);
      const propName = m.name?.trim();
-      const textName =
+      const textName = text.match(/^(.{0,50}?): /)?.[1]?.trim();
-        m.role === "system" ? "" : text.match(/^(.{0,50}?): /)?.[1]?.trim();
+      const name = propName || textName || (role === "model" ? "Character" : "User");
      const name =
        propName || textName || (role === "model" ? "Character" : "User");
      foundNames.add(name);
-      // Prefixing messages with their character name seems to help avoid
+      // Prefixing speaker names helps Gemini avoid continuing as the next
-      // Gemini trying to continue as the next character, or at the very least
+      // character in multi-party roleplay/chat prompts.
      // ensures it will hit the stop sequence.  Otherwise it will start a new
      // paragraph and switch perspectives.
      // The response will be very likely to include this prefix so frontends
      // will need to strip it out.
      const textPrefix = textName ? "" : `${name}: `;
      const firstTextPart = parts.find(
        (part): part is Extract<GoogleAIPart, { text: string }> => "text" in part
      );
      if (firstTextPart) {
        firstTextPart.text = textPrefix + firstTextPart.text;
      }
      return {
-        parts: [{ text: textPrefix + text }],
+        parts,
        role: m.role === "assistant" ? ("model" as const) : ("user" as const),
      };
    })
    .reduce<GoogleAIChatMessage[]>((acc, msg) => {
      const last = acc[acc.length - 1];
      if (last?.role === msg.role) {
-        last.parts[0].text += "\n\n" + msg.parts[0].text;
+        last.parts.push(...msg.parts);
      } else {
        acc.push(msg);
      }
@@ -102,17 +133,44 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
  stops.push(...Array.from(foundNames).map((name) => `\n${name}:`));
  stops = [...new Set(stops)].slice(0, 5);
  const responseFormat = rest.response_format as Record<string, any> | undefined;
  const maxOutputTokens =
    rest.max_completion_tokens ?? rest.max_tokens ?? 1024;
  return {
-    model: req.body.model,
+    model,
    stream: rest.stream,
    contents,
-    tools: [],
+    tools: Array.isArray(rest.tools) ? rest.tools : undefined,
    systemInstruction: systemMessages.length
      ? {
          parts: [
            {
              text: systemMessages
                .map((msg) => flattenOpenAIMessageContent(msg.content))
                .join("\n\n"),
            },
          ],
        }
      : undefined,
    generationConfig: {
-      maxOutputTokens: rest.max_tokens,
+      maxOutputTokens,
      stopSequences: stops,
      topP: rest.top_p,
-      topK: 40, // openai schema doesn't have this, google ai defaults to 40
+      topK: 40, // OpenAI schema doesn't expose this; Gemini defaults to 40.
      temperature: rest.temperature,
      responseMimeType:
        responseFormat?.type === "json_object" ||
        responseFormat?.type === "json_schema"
          ? "application/json"
          : undefined,
      responseSchema: responseFormat?.json_schema?.schema,
      responseJsonSchema: responseFormat?.json_schema?.schema,
      responseModalities:
        customResponseModalities ??
        (isGoogleAIImageModel(model) ? ["TEXT", "IMAGE"] : undefined),
      thinkingConfig:
        customThinkingConfig ?? getThinkingConfig(model, rest.reasoning_effort),
    },
    safetySettings: [
      { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
@@ -122,3 +180,117 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer<
    ],
  };
 };
 function convertOpenAIContent(
  content: OpenAIChatMessage["content"]
 ): GoogleAIPart[] {
  if (typeof content === "string") {
    return [{ text: content }];
  }
  return content.map((item) => {
    if ("text" in item) {
      return { text: item.text };
    }
    if ("refusal" in item) {
      return { text: item.refusal };
    }
    const url = item.image_url.url;
    if (!url.startsWith("data:")) {
      return { text: "[ Unsupported image URL ]" };
    }
    const [meta, data = ""] = url.split(",", 2);
    const mimeType = meta.split(";")[0].replace("data:", "");
    return { inline_data: { mime_type: mimeType, data } };
  });
 }
 function getThinkingConfig(model: string, reasoningEffort?: string) {
  if (model.startsWith("gemini-2.5")) {
    switch (reasoningEffort) {
      case "none":
      case "minimal":
      case "low":
        return { thinkingBudget: 0 };
      default:
        return undefined;
    }
  }
  switch (reasoningEffort) {
    case "low":
    case "minimal":
    case "none":
      return { thinkingLevel: "LOW" };
    case "medium":
    case "high":
    case "xhigh":
      return { thinkingLevel: "HIGH" };
    default:
      return undefined;
  }
 }
 export function isGoogleAIImageModel(model: string) {
  return [
    "gemini-2.0-flash-preview-image-generation",
    "gemini-2.5-flash-image",
    "gemini-3-pro-image-preview",
  ].includes(model);
 }
 export function flattenGoogleAIContentParts(
  parts: Array<Record<string, any>> | undefined
 ) {
  return (parts ?? [])
    .map((part) => {
      if (typeof part?.text === "string") {
        return part.text;
      }
      const inlineData = part?.inline_data ?? part?.inlineData;
      if (inlineData?.data) {
        const mimeType = inlineData.mime_type ?? inlineData.mimeType ?? "image/png";
        return `![generated image](data:${mimeType};base64,${inlineData.data})`;
      }
      return "";
    })
    .filter(Boolean)
    .join("\n\n");
 }
 function getObjectField(
  value: unknown,
  key: string
 ): Record<string, any> | undefined {
  if (
    value &&
    typeof value === "object" &&
    !Array.isArray(value) &&
    key in value &&
    value[key as keyof typeof value] &&
    typeof value[key as keyof typeof value] === "object" &&
    !Array.isArray(value[key as keyof typeof value])
  ) {
    return value[key as keyof typeof value] as Record<string, any>;
  }
  return undefined;
 }
 function getStringArrayField(value: unknown, key: string) {
  if (
    value &&
    typeof value === "object" &&
    !Array.isArray(value) &&
    key in value &&
    Array.isArray(value[key as keyof typeof value])
  ) {
    return (value[key as keyof typeof value] as unknown[]).filter(
      (item): item is string => typeof item === "string"
    );
  }
  return undefined;
 }
@@ -17,6 +17,7 @@ import {
  OpenAIV1ImagesGenerationSchema,
  transformOpenAIToOpenAIImage,
 } from "./openai-image";
 import { OpenAIResponsesCreateSchema } from "./openai-responses";
 import {
  GoogleAIV1GenerateContentSchema,
  transformOpenAIToGoogleAI,
@@ -24,13 +25,22 @@ import {
 import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";
 export { OpenAIChatMessage } from "./openai";
 export {
  containsOpenAIResponsesImageInput,
  flattenOpenAIResponsesInput,
  flattenOpenAIResponsesOutput,
 } from "./openai-responses";
 export {
  AnthropicChatMessage,
  AnthropicV1TextSchema,
  AnthropicV1MessagesSchema,
  flattenAnthropicMessages,
 } from "./anthropic";
-export { GoogleAIChatMessage } from "./google-ai";
+export {
  GoogleAIChatMessage,
  flattenGoogleAIContentParts,
  isGoogleAIImageModel,
 } from "./google-ai";
 export { MistralAIChatMessage } from "./mistral-ai";
 type APIPair = `${APIFormat}->${APIFormat}`;
@@ -55,6 +65,7 @@ export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
  "anthropic-chat": AnthropicV1MessagesSchema,
  "anthropic-text": AnthropicV1TextSchema,
  openai: OpenAIV1ChatCompletionSchema,
  "openai-responses": OpenAIResponsesCreateSchema,
  "openai-text": OpenAIV1TextCompletionSchema,
  "openai-image": OpenAIV1ImagesGenerationSchema,
  "google-ai": GoogleAIV1GenerateContentSchema,
@@ -20,7 +20,7 @@ export const MistralAIV1ChatCompletionsSchema = z.object({
  stream: z.boolean().optional().default(false),
  safe_prompt: z.boolean().optional().default(false),
  random_seed: z.number().int().optional(),
-});
+}).passthrough();
 export type MistralAIChatMessage = z.infer<
  typeof MistralAIV1ChatCompletionsSchema
 >["messages"][0];
@@ -5,19 +5,34 @@ import { APIFormatTransformer } from "./index";
 // https://platform.openai.com/docs/api-reference/images/create
 export const OpenAIV1ImagesGenerationSchema = z
  .object({
-    prompt: z.string().max(4000),
+    prompt: z.string().max(32000),
    model: z.string().max(100).optional(),
-    quality: z.enum(["standard", "hd"]).optional().default("standard"),
+    quality: z
-    n: z.number().int().min(1).max(4).optional().default(1),
+      .enum(["auto", "low", "medium", "high", "standard", "hd"])
      .optional(),
    n: z.number().int().min(1).max(10).optional().default(1),
    response_format: z.enum(["url", "b64_json"]).optional(),
    output_format: z.string().optional(),
    output_compression: z.number().int().min(0).max(100).optional(),
    size: z
-      .enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"])
+      .enum([
        "auto",
        "256x256",
        "512x512",
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1792x1024",
        "1024x1792",
      ])
      .optional()
      .default("1024x1024"),
    style: z.enum(["vivid", "natural"]).optional().default("vivid"),
    background: z.string().optional(),
    moderation: z.string().optional(),
    user: z.string().max(500).optional(),
  })
-  .strip();
+  .passthrough();
 // Takes the last chat message and uses it verbatim as the image prompt.
 export const transformOpenAIToOpenAIImage: APIFormatTransformer<
@@ -57,12 +72,21 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
  }
  // TODO: Add some way to specify parameters via chat message
-  const transformed = {
+  const requestedModel = String(body.model ?? "");
-    model: body.model.includes("dall-e") ? body.model : "dall-e-3",
+  const model =
-    quality: "standard",
+    requestedModel.includes("dall-e") || requestedModel.includes("gpt-image")
      ? requestedModel
      : "gpt-image-1.5";
  const transformed: Record<string, any> = {
    model,
    size: "1024x1024",
    response_format: "url",
    prompt: prompt.slice(index! + 6).trim(),
  };
  if (model.includes("dall-e")) {
    transformed.quality = "standard";
    transformed.response_format = "url";
  }
  return OpenAIV1ImagesGenerationSchema.parse(transformed);
 };
@@ -0,0 +1,136 @@
 import { z } from "zod";
 import { OPENAI_OUTPUT_MAX } from "./openai";
 const OpenAIResponsesReasoningSchema = z
  .object({
    effort: z.string().optional(),
    summary: z.union([z.string(), z.array(z.string())]).optional(),
  })
  .passthrough();
 const OpenAIResponsesTextSchema = z
  .object({
    format: z.any().optional(),
    verbosity: z.enum(["low", "medium", "high"]).optional(),
  })
  .passthrough();
 export const OpenAIResponsesCreateSchema = z
  .object({
    model: z.string().max(100),
    input: z.union([z.string(), z.array(z.any())]).optional(),
    instructions: z.union([z.string(), z.array(z.any())]).optional(),
    previous_response_id: z.string().max(100).optional(),
    stream: z.boolean().optional().default(false),
    max_output_tokens: z.coerce
      .number()
      .int()
      .nullish()
      .default(OPENAI_OUTPUT_MAX)
      .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
    temperature: z.number().optional(),
    top_p: z.number().optional(),
    user: z.string().max(500).optional(),
    safety_identifier: z.string().max(500).optional(),
    metadata: z.record(z.any()).optional(),
    tools: z.array(z.any()).optional(),
    tool_choice: z.any().optional(),
    parallel_tool_calls: z.boolean().optional(),
    include: z.array(z.string()).optional(),
    store: z.boolean().optional(),
    background: z.boolean().optional(),
    reasoning: OpenAIResponsesReasoningSchema.optional(),
    text: OpenAIResponsesTextSchema.optional(),
  })
  .passthrough();
 export function flattenOpenAIResponsesInput(input: unknown): string {
  return flattenResponseValue(input).trim();
 }
 export function flattenOpenAIResponsesOutput(body: Record<string, any>): string {
  if (typeof body.output_text === "string" && body.output_text.trim()) {
    return body.output_text.trim();
  }
  return flattenResponseValue(body.output ?? body.output_text).trim();
 }
 export function containsOpenAIResponsesImageInput(input: unknown): boolean {
  return containsImage(input);
 }
 function flattenResponseValue(value: unknown): string {
  if (value === null || value === undefined) return "";
  if (typeof value === "string") return value;
  if (typeof value === "number" || typeof value === "boolean") {
    return String(value);
  }
  if (Array.isArray(value)) {
    return value
      .map((item) => flattenResponseValue(item))
      .filter(Boolean)
      .join("\n");
  }
  if (!isRecord(value)) return "";
  const typed = value;
  if (hasStringProp(typed, "text")) return typed.text;
  if (hasStringProp(typed, "refusal")) return typed.refusal;
  if (hasStringProp(typed, "summary")) return typed.summary;
  if (hasStringProp(typed, "arguments")) return typed.arguments;
  if (hasStringProp(typed, "result")) return typed.result;
  const type = String(typed.type ?? "");
  if (type.includes("image")) return "[ Uploaded Image Omitted ]";
  if (type.includes("file")) return "[ File Omitted ]";
  if (typeof typed.role === "string" && typed.content !== undefined) {
    const content = flattenResponseValue(typed.content);
    return content ? `${typed.role}: ${content}` : typed.role;
  }
  const nested = [
    typed.content,
    typed.input,
    typed.output,
    typed.summary,
    typed.results,
    typed.item,
    typed.items,
  ];
  for (const candidate of nested) {
    const flattened = flattenResponseValue(candidate);
    if (flattened) return flattened;
  }
  return "";
 }
 function containsImage(value: unknown): boolean {
  if (value === null || value === undefined) return false;
  if (Array.isArray(value)) return value.some((item) => containsImage(item));
  if (!isRecord(value)) return false;
  const typed = value;
  const type = String(typed.type ?? "");
  if (type.includes("image")) return true;
  if (typed.image_url || typed.image || typed.input_image || typed.inline_data) {
    return true;
  }
  return Object.values(typed).some((item) => containsImage(item));
 }
 function hasStringProp<T extends string>(
  value: Record<string, unknown>,
  key: T
 ): value is Record<string, unknown> & Record<T, string> {
  return typeof value[key] === "string";
 }
 function isRecord(value: unknown): value is Record<string, unknown> {
  return typeof value === "object" && value !== null;
 }
@@ -7,6 +7,7 @@ export const OPENAI_OUTPUT_MAX = config.maxOutputTokensOpenAI;
 const OpenAIV1ChatContentArraySchema = z.array(
  z.union([
    z.object({ type: z.literal("text"), text: z.string() }),
    z.object({ type: z.literal("refusal"), refusal: z.string() }),
    z.object({
      type: z.union([z.literal("image"), z.literal("image_url")]),
      image_url: z.object({
@@ -21,7 +22,14 @@ export const OpenAIV1ChatCompletionSchema = z
    model: z.string().max(100),
    messages: z.array(
      z.object({
-        role: z.enum(["system", "user", "assistant", "tool", "function"]),
+        role: z.enum([
          "system",
          "developer",
          "user",
          "assistant",
          "tool",
          "function",
        ]),
        content: z.union([z.string(), OpenAIV1ChatContentArraySchema]),
        name: z.string().optional(),
        tool_calls: z.array(z.any()).optional(),
@@ -54,11 +62,20 @@ export const OpenAIV1ChatCompletionSchema = z
      .nullish()
      .default(Math.min(OPENAI_OUTPUT_MAX, 4096))
      .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
    max_completion_tokens: z.coerce
      .number()
      .int()
      .nullish()
      .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
    frequency_penalty: z.number().optional().default(0),
    presence_penalty: z.number().optional().default(0),
    logit_bias: z.any().optional(),
    metadata: z.record(z.any()).optional(),
    user: z.string().max(500).optional(),
    safety_identifier: z.string().max(500).optional(),
    seed: z.number().int().optional(),
    prompt_cache_key: z.string().max(500).optional(),
    prompt_cache_retention: z.string().optional(),
    // Be warned that Azure OpenAI combines these two into a single field.
    // It's the only deviation from the OpenAI API that I'm aware of so I have
    // special cased it in `addAzureKey` rather than expecting clients to do it.
@@ -70,14 +87,23 @@ export const OpenAIV1ChatCompletionSchema = z
    functions: z.array(z.any()).optional(),
    tool_choice: z.any().optional(),
    function_choice: z.any().optional(),
-    response_format: z.any(),
+    response_format: z.any().optional(),
    parallel_tool_calls: z.boolean().optional(),
    reasoning_effort: z.string().optional(),
    stream_options: z.any().optional(),
    modalities: z.array(z.string()).optional(),
    audio: z.any().optional(),
    prediction: z.any().optional(),
    web_search_options: z.any().optional(),
    service_tier: z.string().optional(),
    verbosity: z.enum(["low", "medium", "high"]).optional(),
  })
  // Tool usage must be enabled via config because we currently have no way to
  // track quota usage for them or enforce limits.
  .omit(
    Boolean(config.allowOpenAIToolUsage) ? {} : { tools: true, functions: true }
  )
-  .strip();
+  .passthrough();
 export type OpenAIChatMessage = z.infer<
  typeof OpenAIV1ChatCompletionSchema
 >["messages"][0];
@@ -89,6 +115,7 @@ export function flattenOpenAIMessageContent(
    ? content
        .map((contentItem) => {
          if ("text" in contentItem) return contentItem.text;
          if ("refusal" in contentItem) return contentItem.refusal;
          if ("image_url" in contentItem) return "[ Uploaded Image Omitted ]";
        })
        .join("\n")
@@ -107,7 +134,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
            let role: string = m.role;
            if (role === "assistant") {
              role = "Assistant";
-            } else if (role === "system") {
+            } else if (role === "system" || role === "developer") {
              role = "System";
            } else if (role === "user") {
              role = "User";
@@ -121,7 +148,7 @@ export function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) {
        .map((m) => {
          // Claude without prefixes (except system) and no Assistant priming
          let role: string = "";
-          if (role === "system") {
+          if (m.role === "system" || m.role === "developer") {
            role = "System: ";
          }
          return `\n\n${role}${flattenOpenAIMessageContent(m.content)}`;
@@ -54,10 +54,10 @@ export class AwsKeyChecker extends KeyCheckerBase<AwsBedrockKey> {
    if (isInitialCheck) {
      checks = [
        this.invokeModel("anthropic.claude-v2", key),
-        this.invokeModel("anthropic.claude-3-sonnet-20240229-v1:0", key),
+        this.invokeModel("anthropic.claude-sonnet-4-5-20250929-v1:0", key),
-        this.invokeModel("anthropic.claude-3-haiku-20240307-v1:0", key),
+        this.invokeModel("anthropic.claude-haiku-4-5-20251001-v1:0", key),
-        this.invokeModel("anthropic.claude-3-opus-20240229-v1:0", key),
+        this.invokeModel("anthropic.claude-opus-4-1-20250805-v1:0", key),
-        this.invokeModel("anthropic.claude-3-5-sonnet-20240620-v1:0", key),
+        this.invokeModel("anthropic.claude-3-5-haiku-20241022-v1:0", key),
      ];
    }
@@ -35,9 +35,15 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
  }
  protected async testKeyOrFail(key: AzureOpenAIKey) {
-    const model = await this.testModel(key);
+    const result = await this.testModel(key);
-    this.log.info({ key: key.hash, deploymentModel: model }, "Checked key.");
+    this.log.info(
-    this.updateKey(key.hash, { modelFamilies: [model] });
+      { key: key.hash, deploymentModel: result.modelIds[0] ?? result.family },
      "Checked key."
    );
    this.updateKey(key.hash, {
      modelFamilies: [result.family],
      modelIds: result.modelIds,
    });
  }
  protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) {
@@ -107,7 +113,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    this.updateKey(key.hash, { lastChecked: next });
  }
-  private async testModel(key: AzureOpenAIKey) {
+  private async testModel(key: AzureOpenAIKey): Promise<{
    family: ReturnType<typeof getAzureOpenAIModelFamily>;
    modelIds: string[];
  }> {
    const { apiKey, deploymentId, resourceName } =
      AzureOpenAIKeyChecker.getCredentialsFromKey(key);
    const url = POST_CHAT_COMPLETIONS(resourceName, deploymentId);
@@ -126,7 +135,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    // we try to invoke /chat/completions on dall-e-3. This is expected and
    // indicates a DALL-E deployment.
    if (response.status === 400) {
-      if (data.error.code === "OperationNotSupported") return "azure-dall-e";
+      if (data.error.code === "OperationNotSupported") {
        return {
          family: "azure-dall-e",
          modelIds: ["dall-e-3", "gpt-image-1", "gpt-image-1-mini", "gpt-image-1.5"],
        };
      }
      throw new AxiosError(
        `Unexpected error when testing deployment ${deploymentId}`,
        "AZURE_TEST_ERROR",
@@ -137,11 +151,12 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    }
    const family = getAzureOpenAIModelFamily(data.model);
    const normalizedModel = normalizeAzureModelId(data.model);
    // Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks.
    // Otherwise we can use the model family Azure returned.
    if (family !== "azure-gpt4") {
-      return family;
+      return { family, modelIds: [normalizedModel] };
    }
    // Try to send an oversized prompt. GPT-4 Turbo can handle this but regular
@@ -160,8 +175,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    const code = contextTest.error?.code;
    this.log.debug({ code, status }, "Performed Azure GPT4 context size test.");
-    if (code === "context_length_exceeded") return "azure-gpt4";
+    if (code === "context_length_exceeded") {
-    return "azure-gpt4-turbo";
+      return { family: "azure-gpt4", modelIds: ["gpt-4"] };
    }
    return { family: "azure-gpt4-turbo", modelIds: ["gpt-4-turbo"] };
  }
  static errorIsAzureError(error: AxiosError): error is AxiosError<AzureError> {
@@ -179,3 +196,7 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
    return { resourceName, deploymentId, apiKey };
  }
 }
 function normalizeAzureModelId(model: string) {
  return model.replace("gpt-35-turbo", "gpt-3.5-turbo");
 }
@@ -14,6 +14,8 @@ type AzureOpenAIKeyUsage = {
 export interface AzureOpenAIKey extends Key, AzureOpenAIKeyUsage {
  readonly service: "azure";
  readonly modelFamilies: AzureOpenAIModelFamily[];
  /** Exact model IDs or deployment aliases known to be backed by this key. */
  modelIds: string[];
  /** The time at which this key was last rate limited. */
  rateLimitedAt: number;
  /** The time until which this key is rate limited. */
@@ -62,6 +64,7 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
        rateLimitedAt: 0,
        rateLimitedUntil: 0,
        contentFiltering: false,
        modelIds: [],
        hash: `azu-${crypto
          .createHash("sha256")
          .update(key)
@@ -73,6 +76,8 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
        "azure-gpt4-32kTokens": 0,
        "azure-gpt4-turboTokens": 0,
        "azure-gpt4oTokens": 0,
        "azure-gpt5Tokens": 0,
        "azure-o-seriesTokens": 0,
        "azure-dall-eTokens": 0,
      };
      this.keys.push(newKey);
@@ -96,8 +101,14 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
  public get(model: string) {
    const neededFamily = getAzureOpenAIModelFamily(model);
    const normalizedModel = model
      .replace(/^azure-/, "")
      .replace("gpt-35-turbo", "gpt-3.5-turbo");
    const availableKeys = this.keys.filter(
-      (k) => !k.isDisabled && k.modelFamilies.includes(neededFamily)
+      (k) =>
        !k.isDisabled &&
        k.modelFamilies.includes(neededFamily) &&
        (!k.modelIds.length || k.modelIds.includes(normalizedModel))
    );
    if (availableKeys.length === 0) {
      throw new PaymentRequiredError(
@@ -32,10 +32,10 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
    const isInitialCheck = !key.lastChecked;
    if (isInitialCheck) {
      checks = [
-        this.invokeModel("claude-3-haiku@20240307", key, true),
+        this.invokeModel("claude-haiku-4-5@20251001", key, true),
-        this.invokeModel("claude-3-sonnet@20240229", key, true),
+        this.invokeModel("claude-sonnet-4-5@20250929", key, true),
-        this.invokeModel("claude-3-opus@20240229", key, true),
+        this.invokeModel("claude-opus-4-1@20250805", key, true),
-        this.invokeModel("claude-3-5-sonnet@20240620", key, true),
+        this.invokeModel("claude-3-5-haiku@20241022", key, true),
      ];
      const [sonnet, haiku, opus, sonnet35] =
@@ -66,13 +66,13 @@ export class GcpKeyChecker extends KeyCheckerBase<GcpKey> {
      });
    } else {
      if (key.haikuEnabled) {
-        await this.invokeModel("claude-3-haiku@20240307", key, false)
+        await this.invokeModel("claude-haiku-4-5@20251001", key, false)
      } else if (key.sonnetEnabled) {
-        await this.invokeModel("claude-3-sonnet@20240229", key, false)
+        await this.invokeModel("claude-sonnet-4-5@20250929", key, false)
      } else if (key.sonnet35Enabled) {
-        await this.invokeModel("claude-3-5-sonnet@20240620", key, false)
+        await this.invokeModel("claude-3-5-haiku@20241022", key, false)
      } else {
-        await this.invokeModel("claude-3-opus@20240229", key, false)
+        await this.invokeModel("claude-opus-4-1@20250805", key, false)
      }
      this.updateKey(key.hash, { lastChecked: Date.now() });
@@ -4,6 +4,7 @@ import { KeyPool } from "./key-pool";
 /** The request and response format used by a model's API. */
 export type APIFormat =
  | "openai"
  | "openai-responses"
  | "openai-text"
  | "openai-image"
  | "anthropic-chat" // Anthropic's newer messages array format
@@ -111,7 +111,10 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
    const familiesArray = [...families];
    const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
    this.updateKey(key.hash, {
-      modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)),
+      modelIds: models,
      modelSnapshots: models.filter((m) =>
        m.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/)
      ),
      modelFamilies: familiesArray,
      lastChecked: keyFromPool.lastChecked,
    });
@@ -16,6 +16,8 @@ type OpenAIKeyUsage = {
 export interface OpenAIKey extends Key, OpenAIKeyUsage {
  readonly service: "openai";
  modelFamilies: OpenAIModelFamily[];
  /** Exact model IDs reported by the models API for this key. */
  modelIds: string[];
  /**
   * Some keys are assigned to multiple organizations, each with their own quota
   * limits. We clone the key for each organization and track usage/disabled
@@ -97,6 +99,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
          "gpt4" as const,
          "gpt4-turbo" as const,
          "gpt4o" as const,
          "gpt5" as const,
          "o-series" as const,
        ],
        isTrial: false,
        isDisabled: false,
@@ -118,8 +122,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
        "gpt4-32kTokens": 0,
        "gpt4-turboTokens": 0,
        gpt4oTokens: 0,
        gpt5Tokens: 0,
        "o-seriesTokens": 0,
        "dall-eTokens": 0,
        gpt4Rpm: 0,
        modelIds: [],
        modelSnapshots: [],
      };
      this.keys.push(newKey);
@@ -160,8 +167,10 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
    if (model === "gpt-4-32k") model = "gpt-4-32k-0613";
    const neededFamily = getOpenAIModelFamily(model);
-    const excludeTrials = model === "text-embedding-ada-002";
+    const excludeTrials = /^text-embedding-(?:3-small|3-large|ada-002)$/.test(
-    const needsSnapshot = model.match(/-\d{4}(-preview)?$/);
+      model
    );
    const needsSnapshot = model.match(/-\d{4}(?:-\d{2}-\d{2})?(-preview)?$/);
    const availableKeys = this.keys.filter(
      // Allow keys which
@@ -169,6 +178,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
        !key.isDisabled && // are not disabled
        key.modelFamilies.includes(neededFamily) && // have access to the model family we need
        (!excludeTrials || !key.isTrial) && // and are not trials if we don't want them
        (!key.modelIds.length || key.modelIds.includes(model)) && // and have the requested model if exact inventory is available
        (!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need
    );
@@ -23,6 +23,8 @@ export type OpenAIModelFamily =
  | "gpt4-32k"
  | "gpt4-turbo"
  | "gpt4o"
  | "gpt5"
  | "o-series"
  | "dall-e";
 export type AnthropicModelFamily = "claude" | "claude-opus";
 export type GoogleAIModelFamily = "gemini-pro";
@@ -51,6 +53,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "gpt4-32k",
  "gpt4-turbo",
  "gpt4o",
  "gpt5",
  "o-series",
  "dall-e",
  "claude",
  "claude-opus",
@@ -68,6 +72,8 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "azure-gpt4-32k",
  "azure-gpt4-turbo",
  "azure-gpt4o",
  "azure-gpt5",
  "azure-o-series",
  "azure-dall-e",
 ] as const);
@@ -84,6 +90,10 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
 ] as const);
 export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^gpt-5(\\.\\d+)?([-.].+)?$": "gpt5",
  "^o\\d([-.].+)?$": "o-series",
  "^computer-use-preview$": "o-series",
  "^gpt-4\\.1([-.].+)?$": "gpt4o",
  "^gpt-4o": "gpt4o",
  "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
  "^gpt-4-turbo(-preview)?$": "gpt4-turbo",
@@ -94,7 +104,8 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^gpt-4-\\d{4}$": "gpt4",
  "^gpt-4$": "gpt4",
  "^gpt-3.5-turbo": "turbo",
-  "^text-embedding-ada-002$": "turbo",
+  "^text-embedding-(ada-002|3-small|3-large)$": "turbo",
  "^gpt-image-1([-.].+)?$": "dall-e",
  "^dall-e-\\d{1}$": "dall-e",
 };
@@ -106,6 +117,8 @@ export const MODEL_FAMILY_SERVICE: {
  "gpt4-turbo": "openai",
  "gpt4-32k": "openai",
  "gpt4o": "openai",
  gpt5: "openai",
  "o-series": "openai",
  "dall-e": "openai",
  claude: "anthropic",
  "claude-opus": "anthropic",
@@ -118,6 +131,8 @@ export const MODEL_FAMILY_SERVICE: {
  "azure-gpt4-32k": "azure",
  "azure-gpt4-turbo": "azure",
  "azure-gpt4o": "azure",
  "azure-gpt5": "azure",
  "azure-o-series": "azure",
  "azure-dall-e": "azure",
  "gemini-pro": "google-ai",
  "mistral-tiny": "mistral-ai",
@@ -150,7 +165,10 @@ export function getGoogleAIModelFamily(_model: string): ModelFamily {
 }
 export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
-  const prunedModel = model.replace(/-(latest|\d{4})$/, "");
+  const prunedModel = model.replace(
    /-(latest|\d{4}|\d{6}|\d+\.\d+|v\d+(:\d+)?)$/,
    ""
  );
  switch (prunedModel) {
    case "mistral-tiny":
    case "mistral-small":
@@ -161,7 +179,34 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
      return "mistral-tiny";
    case "open-mixtral-8x7b":
      return "mistral-small";
    case "ministral-3b":
    case "ministral-8b":
    case "mistral-small-3.1":
    case "mistral-small-3.2":
      return "mistral-small";
    case "magistral-medium":
      return "mistral-medium";
    case "codestral":
    case "devstral":
    case "mistral-large-2":
    case "mistral-large-3":
    case "pixtral-large":
      return "mistral-large";
    default:
      if (model.startsWith("mistral-small") || model.startsWith("ministral")) {
        return "mistral-small";
      }
      if (model.startsWith("mistral-medium") || model.startsWith("magistral")) {
        return "mistral-medium";
      }
      if (
        model.startsWith("mistral-large") ||
        model.startsWith("pixtral-large") ||
        model.startsWith("codestral") ||
        model.startsWith("devstral")
      ) {
        return "mistral-large";
      }
      return "mistral-tiny";
  }
 }
@@ -225,6 +270,7 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
        modelFamily = getClaudeModelFamily(model);
        break;
      case "openai":
      case "openai-responses":
      case "openai-text":
      case "openai-image":
        modelFamily = getOpenAIModelFamily(model);
@@ -10,6 +10,14 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
    case "azure-gpt4o":
      cost = 0.000005;
      break;
    case "gpt5":
    case "azure-gpt5":
      cost = 0.00001;
      break;
    case "o-series":
    case "azure-o-series":
      cost = 0.000012;
      break;
    case "azure-gpt4-turbo":
    case "gpt4-turbo":
      cost = 0.00001;
@@ -65,7 +65,14 @@ async function getTokenCountForMessages({
          numTokens += encoder.encode(text.normalize("NFKC"), "all").length;
          break;
        case "image":
-          numTokens += await getImageTokenCount(part.source.data);
+          if (part.source.type === "base64") {
            numTokens += await getImageTokenCount(part.source.data);
          } else {
            // Remote image URLs are already hosted elsewhere, so we cannot
            // inspect dimensions locally. Charge the documented worst-case
            // token cost instead of undercounting them as zero.
            numTokens += 1600;
          }
          break;
        default:
          throw new Error(`Unsupported Anthropic content type.`);
@@ -179,16 +179,33 @@ export const DALLE_TOKENS_PER_DOLLAR = 100000;
 * which we convert to tokens at a rate of 100000 tokens per dollar.
 */
 export function getOpenAIImageCost(params: {
-  model: "dall-e-2" | "dall-e-3";
+  model:
-  quality: "standard" | "hd";
+    | "dall-e-2"
-  resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024";
+    | "dall-e-3"
    | "gpt-image-1"
    | "gpt-image-1-mini"
    | "gpt-image-1.5";
  quality: "auto" | "low" | "medium" | "high" | "standard" | "hd";
  resolution:
    | "auto"
    | "512x512"
    | "256x256"
    | "1024x1024"
    | "1024x1536"
    | "1536x1024"
    | "1024x1792"
    | "1792x1024";
  n: number | null;
 }) {
  const { model, quality, resolution, n } = params;
  const normalizedResolution =
    resolution === "auto" ? "1024x1024" : resolution;
  const normalizedQuality =
    quality === "hd" || quality === "high" ? "hd" : "standard";
  const usd = (() => {
    switch (model) {
      case "dall-e-2":
-        switch (resolution) {
+        switch (normalizedResolution) {
          case "512x512":
            return 0.018;
          case "256x256":
@@ -199,12 +216,20 @@ export function getOpenAIImageCost(params: {
            throw new Error("Invalid resolution");
        }
      case "dall-e-3":
-        switch (resolution) {
+      case "gpt-image-1.5":
      case "gpt-image-1":
      case "gpt-image-1-mini":
        // GPT Image models have newer parameter ranges, but we still account
        // for them using the existing DALL-E 3-style price buckets so the
        // proxy can continue to enforce rough quota/cost limits.
        switch (normalizedResolution) {
          case "1024x1024":
-            return quality === "standard" ? 0.04 : 0.08;
+            return normalizedQuality === "standard" ? 0.04 : 0.08;
          case "1024x1536":
          case "1536x1024":
          case "1024x1792":
          case "1792x1024":
-            return quality === "standard" ? 0.08 : 0.12;
+            return normalizedQuality === "standard" ? 0.08 : 0.12;
          default:
            throw new Error("Invalid resolution");
        }
@@ -233,7 +258,10 @@ export function estimateGoogleAITokenCount(
  let numTokens = 0;
  for (const message of prompt) {
    numTokens += tokensPerMessage;
-    numTokens += encoder.encode(message.parts[0].text).length;
+    const text = message.parts
      .map((part) => ("text" in part ? part.text : ""))
      .join("\n");
    numTokens += encoder.encode(text).length;
  }
  numTokens += 3;
@@ -55,7 +55,7 @@ type MistralAIChatTokenCountRequest = {
 type FlatPromptTokenCountRequest = {
  prompt: string;
  completion?: never;
-  service: "openai-text" | "anthropic-text" | "google-ai";
+  service: "openai-text" | "openai-responses" | "anthropic-text" | "google-ai";
 };
 type StringCompletionTokenCountRequest = {
@@ -105,6 +105,7 @@ export async function countTokens({
        tokenization_duration_ms: getElapsedMs(time),
      };
    case "openai":
    case "openai-responses":
    case "openai-text":
      return {
        ...(await getOpenAITokenCount(prompt ?? completion, req.body.model)),
@@ -400,6 +400,7 @@ function getModelFamilyForQuotaUsage(
  switch (api) {
    case "openai":
    case "openai-responses":
    case "openai-text":
    case "openai-image":
      return getOpenAIModelFamily(model);