o1-pro test

2025-04-17 11:33:58 +00:00
parent 8081d9516d
commit 5eb4858c69
15 changed files with 441 additions and 5 deletions
@@ -25,6 +25,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  gpt45: "GPT-4.5",
  o1: "OpenAI o1",
  "o1-mini": "OpenAI o1 mini",
+  "o1-pro": "OpenAI o1 pro",
  "o3-mini": "OpenAI o3 mini",
  "o3": "OpenAI o3",
  "o4-mini": "OpenAI o4 mini",
@@ -57,6 +58,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "azure-gpt41-nano": "Azure GPT-4.1 Nano",
  "azure-o1": "Azure o1",
  "azure-o1-mini": "Azure o1 mini",
+  "azure-o1-pro": "Azure o1 pro",
  "azure-o3-mini": "Azure o3 mini",
  "azure-o3": "Azure o3",
  "azure-o4-mini": "Azure o4 mini",
@@ -12,6 +12,7 @@ const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
 const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
 const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
 const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
+const OPENAI_RESPONSES_ENDPOINT = "/v1/responses";
 const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
 const ANTHROPIC_MESSAGES_ENDPOINT = "/v1/messages";
 const ANTHROPIC_SONNET_COMPAT_ENDPOINT = "/v1/sonnet";
@@ -25,6 +26,7 @@ export function isTextGenerationRequest(req: Request) {
    [
      OPENAI_CHAT_COMPLETION_ENDPOINT,
      OPENAI_TEXT_COMPLETION_ENDPOINT,
+      OPENAI_RESPONSES_ENDPOINT,
      ANTHROPIC_COMPLETION_ENDPOINT,
      ANTHROPIC_MESSAGES_ENDPOINT,
      ANTHROPIC_SONNET_COMPAT_ENDPOINT,
@@ -236,6 +238,22 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
      // - choices[0].message.content
      // - choices[0].message with no content if model is invoking a tool
      return body.choices?.[0]?.message?.content || "";
+    case "openai-responses":
+      // Handle the original Responses API format
+      if (body.output && Array.isArray(body.output)) {
+        // Look for a message type in the output array
+        for (const item of body.output) {
+          if (item.type === "message" && item.content && Array.isArray(item.content)) {
+            // Extract text content from each content item
+            return item.content
+              .filter((contentItem: any) => contentItem.type === "output_text")
+              .map((contentItem: any) => contentItem.text)
+              .join("");
+          }
+        }
+      }
+      // If we've been transformed to chat completion format already
+      return body.choices?.[0]?.message?.content || "";
    case "mistral-text":
      return body.outputs?.[0]?.text || "";
    case "openai-text":
@@ -287,6 +305,7 @@ export function getModelFromBody(req: Request, resBody: Record<string, any>) {
  switch (format) {
    case "openai":
    case "openai-text":
+    case "openai-responses":
      return resBody.model;
    case "mistral-ai":
    case "mistral-text":
@@ -51,6 +51,9 @@ export const addKey: ProxyReqMutator = (manager) => {
      case "openai-image":
        assignedKey = keyPool.get("dall-e-3", service);
        break;
+      case "openai-responses":
+        assignedKey = keyPool.get(body.model, service);
+        break;
      case "openai":
        throw new Error(
          `Outbound API ${outboundApi} is not supported for ${inboundApi}`
@@ -13,6 +13,51 @@ export const finalizeBody: ProxyReqMutator = (manager) => {
    if (req.outboundApi === "anthropic-chat") {
      delete req.body.prompt;
    }
+    // For OpenAI Responses API, ensure messages is in the correct format
+    if (req.outboundApi === "openai-responses") {
+      // Format messages for the Responses API
+      if (req.body.messages) {
+        req.log.info("Formatting messages for Responses API in finalizeBody");
+        // The Responses API expects input to be an array, not an object
+        req.body.input = req.body.messages;
+        delete req.body.messages;
+      } else if (req.body.input && req.body.input.messages) {
+        req.log.info("Reformatting input.messages for Responses API in finalizeBody");
+        // If input already exists but contains a messages object, replace input with the messages array
+        req.body.input = req.body.input.messages;
+      }
+      
+      // Final check to ensure max_completion_tokens is converted to max_output_tokens
+      if (req.body.max_completion_tokens) {
+        req.log.info("Converting max_completion_tokens to max_output_tokens in finalizeBody");
+        if (!req.body.max_output_tokens) {
+          req.body.max_output_tokens = req.body.max_completion_tokens;
+        }
+        delete req.body.max_completion_tokens;
+      }
+      
+      // Final check to ensure max_tokens is converted to max_output_tokens
+      if (req.body.max_tokens) {
+        req.log.info("Converting max_tokens to max_output_tokens in finalizeBody");
+        if (!req.body.max_output_tokens) {
+          req.body.max_output_tokens = req.body.max_tokens;
+        }
+        delete req.body.max_tokens;
+      }
+      
+      // Remove all parameters not supported by Responses API
+      const unsupportedParams = [
+        'frequency_penalty',
+        'presence_penalty',
+      ];
+      
+      for (const param of unsupportedParams) {
+        if (req.body[param] !== undefined) {
+          req.log.info(`Removing unsupported parameter for Responses API: ${param}`);
+          delete req.body[param];
+        }
+      }
+    }

    const serialized =
      typeof req.body === "string" ? req.body : JSON.stringify(req.body);
@@ -31,6 +31,7 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {

  applyMistralPromptFixes(req);
  applyGoogleAIKeyTransforms(req);
+  applyOpenAIResponsesTransform(req);

  // Native prompts are those which were already provided by the client in the
  // target API format. We don't need to transform them.
@@ -56,6 +57,58 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
  );
 };

+// Handle OpenAI Responses API transformation
+function applyOpenAIResponsesTransform(req: Request): void {
+  if (req.outboundApi === "openai-responses") {
+    req.log.info("Transforming request to OpenAI Responses API format");
+
+    // Store the original body for reference if needed
+    const originalBody = { ...req.body };
+
+    // Map standard OpenAI chat completions format to Responses API format
+    // The main differences are:
+    // 1. Endpoint is /v1/responses instead of /v1/chat/completions
+    // 2. 'messages' field moves to 'input.messages'
+    
+    // Move messages to input.messages
+    if (req.body.messages && !req.body.input) {
+      req.body.input = {
+        messages: req.body.messages
+      };
+      delete req.body.messages;
+    }
+    
+    // Keep all the original properties of the request but ensure compatibility
+    // with Responses API specifics
+    if (!req.body.previousResponseId && req.body.conversation_id) {
+      req.body.previousResponseId = req.body.conversation_id;
+      delete req.body.conversation_id;
+    }
+
+    // Convert max_tokens to max_output_tokens if present and not already set
+    if (req.body.max_tokens && !req.body.max_output_tokens) {
+      req.body.max_output_tokens = req.body.max_tokens;
+      delete req.body.max_tokens;
+    }
+
+    // Set the correct tools format if needed
+    if (req.body.tools) {
+      // Tools structure is maintained but might need conversion if non-standard
+      if (!req.body.tools.some((tool: any) => tool.type === "function" || tool.type === "web_search")) {
+        req.body.tools = req.body.tools.map((tool: any) => ({
+          ...tool,
+          type: tool.type || "function"
+        }));
+      }
+    }
+
+    req.log.info({
+      originalModel: originalBody.model,
+      newFormat: "openai-responses"
+    }, "Successfully transformed request to Responses API format");
+  }
+}
+
 // handles weird cases that don't fit into our abstractions
 function applyMistralPromptFixes(req: Request): void {
  if (req.inboundApi === "mistral-ai") {
@@ -28,6 +28,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  switch (req.outboundApi) {
    case "openai":
    case "openai-text":
+    case "openai-responses":
      proxyMax = OPENAI_MAX_CONTEXT;
      break;
    case "anthropic-chat":
@@ -86,6 +87,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 200000;
  } else if (model.match(/^o1-mini(-\d{4}-\d{2}-\d{2})?$/)) {
    modelMax = 128000;
+  } else if (model.match(/^o1-pro(-\d{4}-\d{2}-\d{2})?$/)) {
+    modelMax = 200000;
  } else if (model.match(/^o1-preview(-\d{4}-\d{2}-\d{2})?$/)) {
    modelMax = 128000;
  } else if (model.match(/gpt-3.5-turbo/)) {
@@ -158,6 +158,8 @@ function getTransformer(
        : mistralAIToOpenAI;
    case "openai-image":
      throw new Error(`SSE transformation not supported for ${responseApi}`);
+    case "openai-responses":
+      throw new Error(`SSE transformation not supported for ${responseApi}`);
    default:
      assertNever(responseApi);
  }
@@ -121,6 +121,9 @@ const openaiResponseHandler: ProxyResHandlerWithBody = async (
  if (req.outboundApi === "openai-text" && req.inboundApi === "openai") {
    req.log.info("Transforming Turbo-Instruct response to Chat format");
    newBody = transformTurboInstructResponse(body);
+  } else if (req.outboundApi === "openai-responses" && req.inboundApi === "openai") {
+    req.log.info("Transforming Responses API response to Chat format");
+    newBody = transformResponsesApiResponse(body);
  }

  res.status(200).json({ ...newBody, proxy: body.proxy });
@@ -143,6 +146,135 @@ function transformTurboInstructResponse(
  return transformed;
 }

+function transformResponsesApiResponse(
+  responsesBody: Record<string, any>
+): Record<string, any> {
+  // If the response is already in chat completion format, return it as is
+  if (responsesBody.choices && responsesBody.choices[0]?.message) {
+    return responsesBody;
+  }
+  
+  // Create a compatible format for clients expecting chat completions format
+  const transformed: Record<string, any> = {
+    id: responsesBody.id || `chatcmpl-${Date.now()}`,
+    object: "chat.completion",
+    created: responsesBody.created_at || Math.floor(Date.now() / 1000),
+    model: responsesBody.model || "o1-pro",
+    choices: [],
+    usage: responsesBody.usage || {
+      prompt_tokens: 0,
+      completion_tokens: 0,
+      total_tokens: 0
+    }
+  };
+
+  // Extract content from the Responses API format - multiple possible structures
+  
+  // Structure 1: output array with message objects
+  if (responsesBody.output && Array.isArray(responsesBody.output)) {
+    // Look for a message type in the output array
+    let messageOutput = null;
+    for (const output of responsesBody.output) {
+      if (output.type === "message") {
+        messageOutput = output;
+        break;
+      }
+    }
+    
+    if (messageOutput) {
+      if (messageOutput.content && Array.isArray(messageOutput.content) && messageOutput.content.length > 0) {
+        // Handle text content
+        let content = "";
+        const toolCalls: any[] = [];
+        
+        for (const contentItem of messageOutput.content) {
+          if (contentItem.type === "output_text") {
+            content += contentItem.text;
+          } else if (contentItem.type === "tool_calls" && Array.isArray(contentItem.tool_calls)) {
+            toolCalls.push(...contentItem.tool_calls);
+          }
+        }
+        
+        const message: Record<string, any> = {
+          role: messageOutput.role || "assistant",
+          content: content
+        };
+        
+        if (toolCalls.length > 0) {
+          message.tool_calls = toolCalls;
+        }
+        
+        transformed.choices.push({
+          index: 0,
+          message,
+          finish_reason: "stop"
+        });
+      } else if (typeof messageOutput.content === 'string') {
+        // Simple string content
+        transformed.choices.push({
+          index: 0,
+          message: {
+            role: messageOutput.role || "assistant",
+            content: messageOutput.content
+          },
+          finish_reason: "stop"
+        });
+      }
+    }
+  }
+  
+  // Structure 2: response object with content
+  else if (responsesBody.response && responsesBody.response.content) {
+    transformed.choices.push({
+      index: 0,
+      message: {
+        role: "assistant",
+        content: typeof responsesBody.response.content === 'string' 
+          ? responsesBody.response.content 
+          : JSON.stringify(responsesBody.response.content)
+      },
+      finish_reason: responsesBody.response.finish_reason || "stop"
+    });
+  }
+  
+  // Structure 3: look for 'content' field directly
+  else if (responsesBody.content) {
+    transformed.choices.push({
+      index: 0,
+      message: {
+        role: "assistant",
+        content: typeof responsesBody.content === 'string' 
+          ? responsesBody.content 
+          : JSON.stringify(responsesBody.content)
+      },
+      finish_reason: "stop"
+    });
+  }
+  
+  // If we couldn't extract content, create a basic response
+  if (transformed.choices.length === 0) {
+    transformed.choices.push({
+      index: 0,
+      message: {
+        role: "assistant",
+        content: ""
+      },
+      finish_reason: "stop"
+    });
+  }
+  
+  // Copy usage information if available
+  if (responsesBody.usage) {
+    transformed.usage = {
+      prompt_tokens: responsesBody.usage.input_tokens || 0,
+      completion_tokens: responsesBody.usage.output_tokens || 0,
+      total_tokens: responsesBody.usage.total_tokens || 0
+    };
+  }
+  
+  return transformed;
+}
+
 const openaiProxy = createQueuedProxyMiddleware({
  mutations: [addKey, finalizeBody],
  target: "https://api.openai.com",
@@ -154,6 +286,13 @@ const openaiEmbeddingsProxy = createQueuedProxyMiddleware({
  target: "https://api.openai.com",
 });

+// New proxy middleware for the Responses API
+const openaiResponsesProxy = createQueuedProxyMiddleware({
+  mutations: [addKey, finalizeBody],
+  target: "https://api.openai.com",
+  blockingResponseHandler: openaiResponseHandler,
+});
+
 const openaiRouter = Router();
 openaiRouter.get("/v1/models", handleModelRequest);
 // Native text completion endpoint, only for turbo-instruct.
@@ -202,17 +341,83 @@ const setupChunkedTransfer: RequestHandler = (req, res, next) => {
  next();
 };

+// Functions to handle model-specific API routing
+function shouldUseResponsesApi(model: string): boolean {
+  return model === "o1-pro" || model.startsWith("o1-pro-");
+}
+
+// Preprocessor to redirect requests to the responses API
+const routeToResponsesApi: RequestPreprocessor = (req) => {
+  if (shouldUseResponsesApi(req.body.model)) {
+    req.log.info(`Routing ${req.body.model} to OpenAI Responses API`);
+    req.url = "/v1/responses";
+    req.outboundApi = "openai-responses";
+  }
+};
+
 // General chat completion endpoint. Turbo-instruct is not supported here.
 openaiRouter.post(
  "/v1/chat/completions",
  ipLimiter,
  createPreprocessorMiddleware(
    { inApi: "openai", outApi: "openai", service: "openai" },
-    { afterTransform: [fixupMaxTokens, setO1ReasoningEffort] }
+    { 
+      afterTransform: [
+        fixupMaxTokens, 
+        setO1ReasoningEffort,
+        routeToResponsesApi
+      ] 
+    }
  ),
  setupChunkedTransfer,
+  (req, _res, next) => {
+    // Route to the responses endpoint if needed
+    if (req.outboundApi === "openai-responses") {
+      // Ensure messages is moved to input properly
+      req.log.info("Final check for Responses API format in chat completions");
+      if (req.body.messages) {
+        req.log.info("Moving 'messages' to 'input' for Responses API");
+        req.body.input = req.body.messages;
+        delete req.body.messages;
+      } else if (req.body.input && req.body.input.messages) {
+        req.log.info("Reformatting input.messages for Responses API");
+        req.body.input = req.body.input.messages;
+      }
+      
+      return openaiResponsesProxy(req, _res, next);
+    }
+    next();
+  },
  openaiProxy
 );
+
+// New endpoint for OpenAI Responses API
+openaiRouter.post(
+  "/v1/responses",
+  ipLimiter,
+  createPreprocessorMiddleware(
+    { inApi: "openai", outApi: "openai-responses", service: "openai" },
+    { afterTransform: [fixupMaxTokens, setO1ReasoningEffort] }
+  ),
+  // Add final check to ensure the body is in the correct format for Responses API
+  (req, _res, next) => {
+    req.log.info("Final check for Responses API format");
+    
+    // Ensure messages is properly formatted for input
+    if (req.body.messages) {
+      req.log.info("Moving 'messages' to 'input' for Responses API");
+      req.body.input = req.body.messages;
+      delete req.body.messages;
+    } else if (req.body.input && req.body.input.messages) {
+      req.log.info("Reformatting input.messages for Responses API");
+      req.body.input = req.body.input.messages;
+    }
+    
+    next();
+  },
+  openaiResponsesProxy
+);
+
 // Embeddings endpoint.
 openaiRouter.post(
  "/v1/embeddings",
@@ -226,10 +431,30 @@ function forceModel(model: string): RequestPreprocessor {
 }

 function fixupMaxTokens(req: Request) {
-  if (!req.body.max_completion_tokens) {
-    req.body.max_completion_tokens = req.body.max_tokens;
+  // For Responses API, use max_output_tokens instead of max_completion_tokens
+  if (req.outboundApi === "openai-responses") {
+    if (!req.body.max_output_tokens) {
+      req.body.max_output_tokens = req.body.max_tokens || req.body.max_completion_tokens;
+    }
+    // Remove the other token params to avoid API errors
+    delete req.body.max_tokens;
+    delete req.body.max_completion_tokens;
+    
+    // Remove other parameters not supported by Responses API
+    const unsupportedParams = ['frequency_penalty', 'presence_penalty'];
+    for (const param of unsupportedParams) {
+      if (req.body[param] !== undefined) {
+        req.log.info(`Removing unsupported parameter for Responses API: ${param}`);
+        delete req.body[param];
+      }
+    }
+  } else {
+    // Original behavior for other APIs
+    if (!req.body.max_completion_tokens) {
+      req.body.max_completion_tokens = req.body.max_tokens;
+    }
+    delete req.body.max_tokens;
  }
-  delete req.body.max_tokens;
 }

 // Models that support 'reasoning_effort'
@@ -238,7 +463,7 @@ function isO1Model(model: string): boolean {
  // - starts with 'o' followed by number (o1, o3, o4, etc.)
  // - optionally followed by suffix like -mini or -preview
  // - optionally followed by a date stamp
-  return /^o\d+(-mini|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model);
+  return /^o\d+(-mini|-pro|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model);
 }

 // most frontends don't currently support custom reasoning effort for o1
@@ -17,6 +17,10 @@ import {
  OpenAIV1ImagesGenerationSchema,
  transformOpenAIToOpenAIImage,
 } from "./openai-image";
+import {
+  OpenAIV1ResponsesSchema,
+  transformOpenAIToOpenAIResponses,
+} from "./openai-responses";
 import {
  GoogleAIV1GenerateContentSchema,
  transformOpenAIToGoogleAI,
@@ -52,6 +56,7 @@ export const API_REQUEST_TRANSFORMERS: TransformerMap = {
  "openai->anthropic-text": transformOpenAIToAnthropicText,
  "openai->openai-text": transformOpenAIToOpenAIText,
  "openai->openai-image": transformOpenAIToOpenAIImage,
+  "openai->openai-responses": transformOpenAIToOpenAIResponses,
  "openai->google-ai": transformOpenAIToGoogleAI,
  "mistral-ai->mistral-text": transformMistralChatToText,
 };
@@ -62,6 +67,7 @@ export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
  openai: OpenAIV1ChatCompletionSchema,
  "openai-text": OpenAIV1TextCompletionSchema,
  "openai-image": OpenAIV1ImagesGenerationSchema,
+  "openai-responses": OpenAIV1ResponsesSchema,
  "google-ai": GoogleAIV1GenerateContentSchema,
  "mistral-ai": MistralAIV1ChatCompletionsSchema,
  "mistral-text": MistralAIV1TextCompletionsSchema,
@@ -0,0 +1,61 @@
+import { z } from "zod";
+import { Request } from "express";
+import { OpenAIChatMessage, OpenAIV1ChatCompletionSchema } from "./openai";
+
+// Schema for the OpenAI Responses API based on the chat completion schema
+// with some additional fields specific to the Responses API
+export const OpenAIV1ResponsesSchema = z.object({
+  model: z.string(),
+  input: z.object({
+    messages: z.array(z.any())
+  }).optional(),
+  previousResponseId: z.string().optional(),
+  max_output_tokens: z.number().int().positive().optional(),
+  temperature: z.number().min(0).max(2).optional(),
+  top_p: z.number().min(0).max(1).optional(),
+  n: z.number().int().positive().optional(),
+  stream: z.boolean().optional(),
+  stop: z.union([z.string(), z.array(z.string())]).optional(),
+  presence_penalty: z.number().min(-2).max(2).optional(),
+  frequency_penalty: z.number().min(-2).max(2).optional(),
+  user: z.string().optional(),
+  tools: z.array(z.any()).optional(),
+  reasoning_effort: z.enum(["low", "medium", "high"]).optional(),
+});
+
+// Allow transforming from OpenAI Chat to Responses format
+export async function transformOpenAIToOpenAIResponses(
+  req: Request
+): Promise<z.infer<typeof OpenAIV1ResponsesSchema>> {
+  const body = { ...req.body };
+
+  // Move 'messages' to 'input.messages' as required by the Responses API
+  if (body.messages && !body.input) {
+    body.input = {
+      messages: body.messages
+    };
+    delete body.messages;
+  }
+
+  // Convert max_tokens to max_output_tokens if present and not set
+  if (body.max_tokens && !body.max_output_tokens) {
+    body.max_output_tokens = body.max_tokens;
+    delete body.max_tokens;
+  }
+
+  // Map conversation_id to previousResponseId if present
+  if (body.conversation_id && !body.previousResponseId) {
+    body.previousResponseId = body.conversation_id;
+    delete body.conversation_id;
+  }
+
+  // Ensure tools have the right format if present
+  if (body.tools) {
+    body.tools = body.tools.map((tool: any) => ({
+      ...tool,
+      type: tool.type || "function"
+    }));
+  }
+
+  return body;
+} 
@@ -6,6 +6,7 @@ export type APIFormat =
  | "openai"
  | "openai-text"
  | "openai-image"
+  | "openai-responses" // New OpenAI Responses API for o1-pro model
  | "anthropic-chat" // Anthropic's newer messages array format
  | "anthropic-text" // Legacy flat string prompt format
  | "google-ai"
@@ -119,6 +119,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
        "gpt41-nanoTokens": 0,
        "o1Tokens": 0,
        "o1-miniTokens": 0,
+        "o1-proTokens": 0,
        "o3-miniTokens": 0,
        "o3Tokens": 0,
        "o4-miniTokens": 0,
@@ -30,6 +30,7 @@ export type OpenAIModelFamily =
  | "gpt45"
  | "o1"
  | "o1-mini"
+  | "o1-pro"
  | "o3-mini"
  | "o3"
  | "o4-mini"
@@ -78,6 +79,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "gpt41-nano",
  "o1",
  "o1-mini",
+  "o1-pro",
  "o3-mini",
  "o3",
  "o4-mini",
@@ -111,6 +113,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "azure-dall-e",
  "azure-o1",
  "azure-o1-mini",
+  "azure-o1-pro",
  "azure-o3-mini",
  "azure-o3",
  "azure-o4-mini",
@@ -146,6 +149,7 @@ export const MODEL_FAMILY_SERVICE: {
  "gpt41-nano": "openai",
  "o1": "openai",
  "o1-mini": "openai",
+  "o1-pro": "openai",
  "o3-mini": "openai",
  "o3": "openai",
  "o4-mini": "openai",
@@ -172,6 +176,7 @@ export const MODEL_FAMILY_SERVICE: {
  "azure-dall-e": "azure",
  "azure-o1": "azure",
  "azure-o1-mini": "azure",
+  "azure-o1-pro": "azure",
  "azure-o3-mini": "azure",
  "azure-o3": "azure",
  "azure-o4-mini": "azure",
@@ -206,6 +211,7 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^text-embedding-ada-002$": "turbo",
  "^dall-e-\\d{1}$": "dall-e",
  "^o1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o1-mini",
+  "^o1-pro(-\\d{4}-\\d{2}-\\d{2})?$": "o1-pro",
  "^o1(-\\d{4}-\\d{2}-\\d{2})?$": "o1",
  "^o3-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o3-mini",
  "^o3(-\\d{4}-\\d{2}-\\d{2})?$": "o3",
@@ -346,6 +352,9 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
      case "mistral-text":
        modelFamily = getMistralAIModelFamily(model);
        break;
+      case "openai-responses":
+        modelFamily = getOpenAIModelFamily(model);
+        break;
      default:
        assertNever(req.outboundApi);
    }
@@ -38,6 +38,11 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
    case "gpt4-turbo":
      cost = 0.00001;
      break;
+    case "azure-o1-pro":
+    case "o1-pro":
+      // OpenAI o1-pro pricing $150/1M input tokens and $600/1M output tokens
+      cost = 0.000325;
+      break;
    case "azure-o1":
    case "o1":
      // Currently we do not track output tokens separately, and O1 uses
@@ -108,6 +108,7 @@ export async function countTokens({
      };
    case "openai":
    case "openai-text":
+    case "openai-responses":
      return {
        ...(await getOpenAITokenCount(prompt ?? completion, req.body.model)),
        tokenization_duration_ms: getElapsedMs(time),