From 5eb4858c692116cea9bcd0bcc00181c26b8dec0d Mon Sep 17 00:00:00 2001 From: reanon <85157-reanon@users.noreply.gitgud.io> Date: Thu, 17 Apr 2025 11:33:58 +0000 Subject: [PATCH] o1-pro test --- src/info-page.ts | 2 + src/proxy/middleware/common.ts | 19 ++ .../middleware/request/mutators/add-key.ts | 3 + .../request/mutators/finalize-body.ts | 45 ++++ .../transform-outbound-payload.ts | 53 ++++ .../preprocessors/validate-context-size.ts | 3 + .../streaming/sse-message-transformer.ts | 2 + src/proxy/openai.ts | 235 +++++++++++++++++- src/shared/api-schemas/index.ts | 6 + src/shared/api-schemas/openai-responses.ts | 61 +++++ src/shared/key-management/index.ts | 1 + src/shared/key-management/openai/provider.ts | 1 + src/shared/models.ts | 9 + src/shared/stats.ts | 5 + src/shared/tokenization/tokenizer.ts | 1 + 15 files changed, 441 insertions(+), 5 deletions(-) create mode 100644 src/shared/api-schemas/openai-responses.ts diff --git a/src/info-page.ts b/src/info-page.ts index 57187b2..dbe5838 100644 --- a/src/info-page.ts +++ b/src/info-page.ts @@ -25,6 +25,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = { gpt45: "GPT-4.5", o1: "OpenAI o1", "o1-mini": "OpenAI o1 mini", + "o1-pro": "OpenAI o1 pro", "o3-mini": "OpenAI o3 mini", "o3": "OpenAI o3", "o4-mini": "OpenAI o4 mini", @@ -57,6 +58,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = { "azure-gpt41-nano": "Azure GPT-4.1 Nano", "azure-o1": "Azure o1", "azure-o1-mini": "Azure o1 mini", + "azure-o1-pro": "Azure o1 pro", "azure-o3-mini": "Azure o3 mini", "azure-o3": "Azure o3", "azure-o4-mini": "Azure o4 mini", diff --git a/src/proxy/middleware/common.ts b/src/proxy/middleware/common.ts index cb83021..804e488 100644 --- a/src/proxy/middleware/common.ts +++ b/src/proxy/middleware/common.ts @@ -12,6 +12,7 @@ const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions"; const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions"; const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings"; const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations"; +const OPENAI_RESPONSES_ENDPOINT = "/v1/responses"; const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete"; const ANTHROPIC_MESSAGES_ENDPOINT = "/v1/messages"; const ANTHROPIC_SONNET_COMPAT_ENDPOINT = "/v1/sonnet"; @@ -25,6 +26,7 @@ export function isTextGenerationRequest(req: Request) { [ OPENAI_CHAT_COMPLETION_ENDPOINT, OPENAI_TEXT_COMPLETION_ENDPOINT, + OPENAI_RESPONSES_ENDPOINT, ANTHROPIC_COMPLETION_ENDPOINT, ANTHROPIC_MESSAGES_ENDPOINT, ANTHROPIC_SONNET_COMPAT_ENDPOINT, @@ -236,6 +238,22 @@ export function getCompletionFromBody(req: Request, body: Record) { // - choices[0].message.content // - choices[0].message with no content if model is invoking a tool return body.choices?.[0]?.message?.content || ""; + case "openai-responses": + // Handle the original Responses API format + if (body.output && Array.isArray(body.output)) { + // Look for a message type in the output array + for (const item of body.output) { + if (item.type === "message" && item.content && Array.isArray(item.content)) { + // Extract text content from each content item + return item.content + .filter((contentItem: any) => contentItem.type === "output_text") + .map((contentItem: any) => contentItem.text) + .join(""); + } + } + } + // If we've been transformed to chat completion format already + return body.choices?.[0]?.message?.content || ""; case "mistral-text": return body.outputs?.[0]?.text || ""; case "openai-text": @@ -287,6 +305,7 @@ export function getModelFromBody(req: Request, resBody: Record) { switch (format) { case "openai": case "openai-text": + case "openai-responses": return resBody.model; case "mistral-ai": case "mistral-text": diff --git a/src/proxy/middleware/request/mutators/add-key.ts b/src/proxy/middleware/request/mutators/add-key.ts index 4907a98..43bb4de 100644 --- a/src/proxy/middleware/request/mutators/add-key.ts +++ b/src/proxy/middleware/request/mutators/add-key.ts @@ -51,6 +51,9 @@ export const addKey: ProxyReqMutator = (manager) => { case "openai-image": assignedKey = keyPool.get("dall-e-3", service); break; + case "openai-responses": + assignedKey = keyPool.get(body.model, service); + break; case "openai": throw new Error( `Outbound API ${outboundApi} is not supported for ${inboundApi}` diff --git a/src/proxy/middleware/request/mutators/finalize-body.ts b/src/proxy/middleware/request/mutators/finalize-body.ts index 658510d..5e5e946 100644 --- a/src/proxy/middleware/request/mutators/finalize-body.ts +++ b/src/proxy/middleware/request/mutators/finalize-body.ts @@ -13,6 +13,51 @@ export const finalizeBody: ProxyReqMutator = (manager) => { if (req.outboundApi === "anthropic-chat") { delete req.body.prompt; } + // For OpenAI Responses API, ensure messages is in the correct format + if (req.outboundApi === "openai-responses") { + // Format messages for the Responses API + if (req.body.messages) { + req.log.info("Formatting messages for Responses API in finalizeBody"); + // The Responses API expects input to be an array, not an object + req.body.input = req.body.messages; + delete req.body.messages; + } else if (req.body.input && req.body.input.messages) { + req.log.info("Reformatting input.messages for Responses API in finalizeBody"); + // If input already exists but contains a messages object, replace input with the messages array + req.body.input = req.body.input.messages; + } + + // Final check to ensure max_completion_tokens is converted to max_output_tokens + if (req.body.max_completion_tokens) { + req.log.info("Converting max_completion_tokens to max_output_tokens in finalizeBody"); + if (!req.body.max_output_tokens) { + req.body.max_output_tokens = req.body.max_completion_tokens; + } + delete req.body.max_completion_tokens; + } + + // Final check to ensure max_tokens is converted to max_output_tokens + if (req.body.max_tokens) { + req.log.info("Converting max_tokens to max_output_tokens in finalizeBody"); + if (!req.body.max_output_tokens) { + req.body.max_output_tokens = req.body.max_tokens; + } + delete req.body.max_tokens; + } + + // Remove all parameters not supported by Responses API + const unsupportedParams = [ + 'frequency_penalty', + 'presence_penalty', + ]; + + for (const param of unsupportedParams) { + if (req.body[param] !== undefined) { + req.log.info(`Removing unsupported parameter for Responses API: ${param}`); + delete req.body[param]; + } + } + } const serialized = typeof req.body === "string" ? req.body : JSON.stringify(req.body); diff --git a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts index e75cd2e..5144135 100644 --- a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts +++ b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts @@ -31,6 +31,7 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => { applyMistralPromptFixes(req); applyGoogleAIKeyTransforms(req); + applyOpenAIResponsesTransform(req); // Native prompts are those which were already provided by the client in the // target API format. We don't need to transform them. @@ -56,6 +57,58 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => { ); }; +// Handle OpenAI Responses API transformation +function applyOpenAIResponsesTransform(req: Request): void { + if (req.outboundApi === "openai-responses") { + req.log.info("Transforming request to OpenAI Responses API format"); + + // Store the original body for reference if needed + const originalBody = { ...req.body }; + + // Map standard OpenAI chat completions format to Responses API format + // The main differences are: + // 1. Endpoint is /v1/responses instead of /v1/chat/completions + // 2. 'messages' field moves to 'input.messages' + + // Move messages to input.messages + if (req.body.messages && !req.body.input) { + req.body.input = { + messages: req.body.messages + }; + delete req.body.messages; + } + + // Keep all the original properties of the request but ensure compatibility + // with Responses API specifics + if (!req.body.previousResponseId && req.body.conversation_id) { + req.body.previousResponseId = req.body.conversation_id; + delete req.body.conversation_id; + } + + // Convert max_tokens to max_output_tokens if present and not already set + if (req.body.max_tokens && !req.body.max_output_tokens) { + req.body.max_output_tokens = req.body.max_tokens; + delete req.body.max_tokens; + } + + // Set the correct tools format if needed + if (req.body.tools) { + // Tools structure is maintained but might need conversion if non-standard + if (!req.body.tools.some((tool: any) => tool.type === "function" || tool.type === "web_search")) { + req.body.tools = req.body.tools.map((tool: any) => ({ + ...tool, + type: tool.type || "function" + })); + } + } + + req.log.info({ + originalModel: originalBody.model, + newFormat: "openai-responses" + }, "Successfully transformed request to Responses API format"); + } +} + // handles weird cases that don't fit into our abstractions function applyMistralPromptFixes(req: Request): void { if (req.inboundApi === "mistral-ai") { diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts index ebb3afe..f5a21a2 100644 --- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts +++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts @@ -28,6 +28,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => { switch (req.outboundApi) { case "openai": case "openai-text": + case "openai-responses": proxyMax = OPENAI_MAX_CONTEXT; break; case "anthropic-chat": @@ -86,6 +87,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => { modelMax = 200000; } else if (model.match(/^o1-mini(-\d{4}-\d{2}-\d{2})?$/)) { modelMax = 128000; + } else if (model.match(/^o1-pro(-\d{4}-\d{2}-\d{2})?$/)) { + modelMax = 200000; } else if (model.match(/^o1-preview(-\d{4}-\d{2}-\d{2})?$/)) { modelMax = 128000; } else if (model.match(/gpt-3.5-turbo/)) { diff --git a/src/proxy/middleware/response/streaming/sse-message-transformer.ts b/src/proxy/middleware/response/streaming/sse-message-transformer.ts index daf5c6a..a33a420 100644 --- a/src/proxy/middleware/response/streaming/sse-message-transformer.ts +++ b/src/proxy/middleware/response/streaming/sse-message-transformer.ts @@ -158,6 +158,8 @@ function getTransformer( : mistralAIToOpenAI; case "openai-image": throw new Error(`SSE transformation not supported for ${responseApi}`); + case "openai-responses": + throw new Error(`SSE transformation not supported for ${responseApi}`); default: assertNever(responseApi); } diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts index 6f97b8c..a0087ab 100644 --- a/src/proxy/openai.ts +++ b/src/proxy/openai.ts @@ -121,6 +121,9 @@ const openaiResponseHandler: ProxyResHandlerWithBody = async ( if (req.outboundApi === "openai-text" && req.inboundApi === "openai") { req.log.info("Transforming Turbo-Instruct response to Chat format"); newBody = transformTurboInstructResponse(body); + } else if (req.outboundApi === "openai-responses" && req.inboundApi === "openai") { + req.log.info("Transforming Responses API response to Chat format"); + newBody = transformResponsesApiResponse(body); } res.status(200).json({ ...newBody, proxy: body.proxy }); @@ -143,6 +146,135 @@ function transformTurboInstructResponse( return transformed; } +function transformResponsesApiResponse( + responsesBody: Record +): Record { + // If the response is already in chat completion format, return it as is + if (responsesBody.choices && responsesBody.choices[0]?.message) { + return responsesBody; + } + + // Create a compatible format for clients expecting chat completions format + const transformed: Record = { + id: responsesBody.id || `chatcmpl-${Date.now()}`, + object: "chat.completion", + created: responsesBody.created_at || Math.floor(Date.now() / 1000), + model: responsesBody.model || "o1-pro", + choices: [], + usage: responsesBody.usage || { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0 + } + }; + + // Extract content from the Responses API format - multiple possible structures + + // Structure 1: output array with message objects + if (responsesBody.output && Array.isArray(responsesBody.output)) { + // Look for a message type in the output array + let messageOutput = null; + for (const output of responsesBody.output) { + if (output.type === "message") { + messageOutput = output; + break; + } + } + + if (messageOutput) { + if (messageOutput.content && Array.isArray(messageOutput.content) && messageOutput.content.length > 0) { + // Handle text content + let content = ""; + const toolCalls: any[] = []; + + for (const contentItem of messageOutput.content) { + if (contentItem.type === "output_text") { + content += contentItem.text; + } else if (contentItem.type === "tool_calls" && Array.isArray(contentItem.tool_calls)) { + toolCalls.push(...contentItem.tool_calls); + } + } + + const message: Record = { + role: messageOutput.role || "assistant", + content: content + }; + + if (toolCalls.length > 0) { + message.tool_calls = toolCalls; + } + + transformed.choices.push({ + index: 0, + message, + finish_reason: "stop" + }); + } else if (typeof messageOutput.content === 'string') { + // Simple string content + transformed.choices.push({ + index: 0, + message: { + role: messageOutput.role || "assistant", + content: messageOutput.content + }, + finish_reason: "stop" + }); + } + } + } + + // Structure 2: response object with content + else if (responsesBody.response && responsesBody.response.content) { + transformed.choices.push({ + index: 0, + message: { + role: "assistant", + content: typeof responsesBody.response.content === 'string' + ? responsesBody.response.content + : JSON.stringify(responsesBody.response.content) + }, + finish_reason: responsesBody.response.finish_reason || "stop" + }); + } + + // Structure 3: look for 'content' field directly + else if (responsesBody.content) { + transformed.choices.push({ + index: 0, + message: { + role: "assistant", + content: typeof responsesBody.content === 'string' + ? responsesBody.content + : JSON.stringify(responsesBody.content) + }, + finish_reason: "stop" + }); + } + + // If we couldn't extract content, create a basic response + if (transformed.choices.length === 0) { + transformed.choices.push({ + index: 0, + message: { + role: "assistant", + content: "" + }, + finish_reason: "stop" + }); + } + + // Copy usage information if available + if (responsesBody.usage) { + transformed.usage = { + prompt_tokens: responsesBody.usage.input_tokens || 0, + completion_tokens: responsesBody.usage.output_tokens || 0, + total_tokens: responsesBody.usage.total_tokens || 0 + }; + } + + return transformed; +} + const openaiProxy = createQueuedProxyMiddleware({ mutations: [addKey, finalizeBody], target: "https://api.openai.com", @@ -154,6 +286,13 @@ const openaiEmbeddingsProxy = createQueuedProxyMiddleware({ target: "https://api.openai.com", }); +// New proxy middleware for the Responses API +const openaiResponsesProxy = createQueuedProxyMiddleware({ + mutations: [addKey, finalizeBody], + target: "https://api.openai.com", + blockingResponseHandler: openaiResponseHandler, +}); + const openaiRouter = Router(); openaiRouter.get("/v1/models", handleModelRequest); // Native text completion endpoint, only for turbo-instruct. @@ -202,17 +341,83 @@ const setupChunkedTransfer: RequestHandler = (req, res, next) => { next(); }; +// Functions to handle model-specific API routing +function shouldUseResponsesApi(model: string): boolean { + return model === "o1-pro" || model.startsWith("o1-pro-"); +} + +// Preprocessor to redirect requests to the responses API +const routeToResponsesApi: RequestPreprocessor = (req) => { + if (shouldUseResponsesApi(req.body.model)) { + req.log.info(`Routing ${req.body.model} to OpenAI Responses API`); + req.url = "/v1/responses"; + req.outboundApi = "openai-responses"; + } +}; + // General chat completion endpoint. Turbo-instruct is not supported here. openaiRouter.post( "/v1/chat/completions", ipLimiter, createPreprocessorMiddleware( { inApi: "openai", outApi: "openai", service: "openai" }, - { afterTransform: [fixupMaxTokens, setO1ReasoningEffort] } + { + afterTransform: [ + fixupMaxTokens, + setO1ReasoningEffort, + routeToResponsesApi + ] + } ), setupChunkedTransfer, + (req, _res, next) => { + // Route to the responses endpoint if needed + if (req.outboundApi === "openai-responses") { + // Ensure messages is moved to input properly + req.log.info("Final check for Responses API format in chat completions"); + if (req.body.messages) { + req.log.info("Moving 'messages' to 'input' for Responses API"); + req.body.input = req.body.messages; + delete req.body.messages; + } else if (req.body.input && req.body.input.messages) { + req.log.info("Reformatting input.messages for Responses API"); + req.body.input = req.body.input.messages; + } + + return openaiResponsesProxy(req, _res, next); + } + next(); + }, openaiProxy ); + +// New endpoint for OpenAI Responses API +openaiRouter.post( + "/v1/responses", + ipLimiter, + createPreprocessorMiddleware( + { inApi: "openai", outApi: "openai-responses", service: "openai" }, + { afterTransform: [fixupMaxTokens, setO1ReasoningEffort] } + ), + // Add final check to ensure the body is in the correct format for Responses API + (req, _res, next) => { + req.log.info("Final check for Responses API format"); + + // Ensure messages is properly formatted for input + if (req.body.messages) { + req.log.info("Moving 'messages' to 'input' for Responses API"); + req.body.input = req.body.messages; + delete req.body.messages; + } else if (req.body.input && req.body.input.messages) { + req.log.info("Reformatting input.messages for Responses API"); + req.body.input = req.body.input.messages; + } + + next(); + }, + openaiResponsesProxy +); + // Embeddings endpoint. openaiRouter.post( "/v1/embeddings", @@ -226,10 +431,30 @@ function forceModel(model: string): RequestPreprocessor { } function fixupMaxTokens(req: Request) { - if (!req.body.max_completion_tokens) { - req.body.max_completion_tokens = req.body.max_tokens; + // For Responses API, use max_output_tokens instead of max_completion_tokens + if (req.outboundApi === "openai-responses") { + if (!req.body.max_output_tokens) { + req.body.max_output_tokens = req.body.max_tokens || req.body.max_completion_tokens; + } + // Remove the other token params to avoid API errors + delete req.body.max_tokens; + delete req.body.max_completion_tokens; + + // Remove other parameters not supported by Responses API + const unsupportedParams = ['frequency_penalty', 'presence_penalty']; + for (const param of unsupportedParams) { + if (req.body[param] !== undefined) { + req.log.info(`Removing unsupported parameter for Responses API: ${param}`); + delete req.body[param]; + } + } + } else { + // Original behavior for other APIs + if (!req.body.max_completion_tokens) { + req.body.max_completion_tokens = req.body.max_tokens; + } + delete req.body.max_tokens; } - delete req.body.max_tokens; } // Models that support 'reasoning_effort' @@ -238,7 +463,7 @@ function isO1Model(model: string): boolean { // - starts with 'o' followed by number (o1, o3, o4, etc.) // - optionally followed by suffix like -mini or -preview // - optionally followed by a date stamp - return /^o\d+(-mini|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model); + return /^o\d+(-mini|-pro|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model); } // most frontends don't currently support custom reasoning effort for o1 diff --git a/src/shared/api-schemas/index.ts b/src/shared/api-schemas/index.ts index 8ccd5de..dadce1f 100644 --- a/src/shared/api-schemas/index.ts +++ b/src/shared/api-schemas/index.ts @@ -17,6 +17,10 @@ import { OpenAIV1ImagesGenerationSchema, transformOpenAIToOpenAIImage, } from "./openai-image"; +import { + OpenAIV1ResponsesSchema, + transformOpenAIToOpenAIResponses, +} from "./openai-responses"; import { GoogleAIV1GenerateContentSchema, transformOpenAIToGoogleAI, @@ -52,6 +56,7 @@ export const API_REQUEST_TRANSFORMERS: TransformerMap = { "openai->anthropic-text": transformOpenAIToAnthropicText, "openai->openai-text": transformOpenAIToOpenAIText, "openai->openai-image": transformOpenAIToOpenAIImage, + "openai->openai-responses": transformOpenAIToOpenAIResponses, "openai->google-ai": transformOpenAIToGoogleAI, "mistral-ai->mistral-text": transformMistralChatToText, }; @@ -62,6 +67,7 @@ export const API_REQUEST_VALIDATORS: Record> = { openai: OpenAIV1ChatCompletionSchema, "openai-text": OpenAIV1TextCompletionSchema, "openai-image": OpenAIV1ImagesGenerationSchema, + "openai-responses": OpenAIV1ResponsesSchema, "google-ai": GoogleAIV1GenerateContentSchema, "mistral-ai": MistralAIV1ChatCompletionsSchema, "mistral-text": MistralAIV1TextCompletionsSchema, diff --git a/src/shared/api-schemas/openai-responses.ts b/src/shared/api-schemas/openai-responses.ts new file mode 100644 index 0000000..972ef34 --- /dev/null +++ b/src/shared/api-schemas/openai-responses.ts @@ -0,0 +1,61 @@ +import { z } from "zod"; +import { Request } from "express"; +import { OpenAIChatMessage, OpenAIV1ChatCompletionSchema } from "./openai"; + +// Schema for the OpenAI Responses API based on the chat completion schema +// with some additional fields specific to the Responses API +export const OpenAIV1ResponsesSchema = z.object({ + model: z.string(), + input: z.object({ + messages: z.array(z.any()) + }).optional(), + previousResponseId: z.string().optional(), + max_output_tokens: z.number().int().positive().optional(), + temperature: z.number().min(0).max(2).optional(), + top_p: z.number().min(0).max(1).optional(), + n: z.number().int().positive().optional(), + stream: z.boolean().optional(), + stop: z.union([z.string(), z.array(z.string())]).optional(), + presence_penalty: z.number().min(-2).max(2).optional(), + frequency_penalty: z.number().min(-2).max(2).optional(), + user: z.string().optional(), + tools: z.array(z.any()).optional(), + reasoning_effort: z.enum(["low", "medium", "high"]).optional(), +}); + +// Allow transforming from OpenAI Chat to Responses format +export async function transformOpenAIToOpenAIResponses( + req: Request +): Promise> { + const body = { ...req.body }; + + // Move 'messages' to 'input.messages' as required by the Responses API + if (body.messages && !body.input) { + body.input = { + messages: body.messages + }; + delete body.messages; + } + + // Convert max_tokens to max_output_tokens if present and not set + if (body.max_tokens && !body.max_output_tokens) { + body.max_output_tokens = body.max_tokens; + delete body.max_tokens; + } + + // Map conversation_id to previousResponseId if present + if (body.conversation_id && !body.previousResponseId) { + body.previousResponseId = body.conversation_id; + delete body.conversation_id; + } + + // Ensure tools have the right format if present + if (body.tools) { + body.tools = body.tools.map((tool: any) => ({ + ...tool, + type: tool.type || "function" + })); + } + + return body; +} \ No newline at end of file diff --git a/src/shared/key-management/index.ts b/src/shared/key-management/index.ts index 580f2b3..40ad934 100644 --- a/src/shared/key-management/index.ts +++ b/src/shared/key-management/index.ts @@ -6,6 +6,7 @@ export type APIFormat = | "openai" | "openai-text" | "openai-image" + | "openai-responses" // New OpenAI Responses API for o1-pro model | "anthropic-chat" // Anthropic's newer messages array format | "anthropic-text" // Legacy flat string prompt format | "google-ai" diff --git a/src/shared/key-management/openai/provider.ts b/src/shared/key-management/openai/provider.ts index 7b978ba..92fa8b7 100644 --- a/src/shared/key-management/openai/provider.ts +++ b/src/shared/key-management/openai/provider.ts @@ -119,6 +119,7 @@ export class OpenAIKeyProvider implements KeyProvider { "gpt41-nanoTokens": 0, "o1Tokens": 0, "o1-miniTokens": 0, + "o1-proTokens": 0, "o3-miniTokens": 0, "o3Tokens": 0, "o4-miniTokens": 0, diff --git a/src/shared/models.ts b/src/shared/models.ts index 2a8dfbc..d67085b 100644 --- a/src/shared/models.ts +++ b/src/shared/models.ts @@ -30,6 +30,7 @@ export type OpenAIModelFamily = | "gpt45" | "o1" | "o1-mini" + | "o1-pro" | "o3-mini" | "o3" | "o4-mini" @@ -78,6 +79,7 @@ export const MODEL_FAMILIES = (( "gpt41-nano", "o1", "o1-mini", + "o1-pro", "o3-mini", "o3", "o4-mini", @@ -111,6 +113,7 @@ export const MODEL_FAMILIES = (( "azure-dall-e", "azure-o1", "azure-o1-mini", + "azure-o1-pro", "azure-o3-mini", "azure-o3", "azure-o4-mini", @@ -146,6 +149,7 @@ export const MODEL_FAMILY_SERVICE: { "gpt41-nano": "openai", "o1": "openai", "o1-mini": "openai", + "o1-pro": "openai", "o3-mini": "openai", "o3": "openai", "o4-mini": "openai", @@ -172,6 +176,7 @@ export const MODEL_FAMILY_SERVICE: { "azure-dall-e": "azure", "azure-o1": "azure", "azure-o1-mini": "azure", + "azure-o1-pro": "azure", "azure-o3-mini": "azure", "azure-o3": "azure", "azure-o4-mini": "azure", @@ -206,6 +211,7 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = { "^text-embedding-ada-002$": "turbo", "^dall-e-\\d{1}$": "dall-e", "^o1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o1-mini", + "^o1-pro(-\\d{4}-\\d{2}-\\d{2})?$": "o1-pro", "^o1(-\\d{4}-\\d{2}-\\d{2})?$": "o1", "^o3-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o3-mini", "^o3(-\\d{4}-\\d{2}-\\d{2})?$": "o3", @@ -346,6 +352,9 @@ export function getModelFamilyForRequest(req: Request): ModelFamily { case "mistral-text": modelFamily = getMistralAIModelFamily(model); break; + case "openai-responses": + modelFamily = getOpenAIModelFamily(model); + break; default: assertNever(req.outboundApi); } diff --git a/src/shared/stats.ts b/src/shared/stats.ts index de4d76d..02222cc 100644 --- a/src/shared/stats.ts +++ b/src/shared/stats.ts @@ -38,6 +38,11 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) { case "gpt4-turbo": cost = 0.00001; break; + case "azure-o1-pro": + case "o1-pro": + // OpenAI o1-pro pricing $150/1M input tokens and $600/1M output tokens + cost = 0.000325; + break; case "azure-o1": case "o1": // Currently we do not track output tokens separately, and O1 uses diff --git a/src/shared/tokenization/tokenizer.ts b/src/shared/tokenization/tokenizer.ts index 65b9db8..d831102 100644 --- a/src/shared/tokenization/tokenizer.ts +++ b/src/shared/tokenization/tokenizer.ts @@ -108,6 +108,7 @@ export async function countTokens({ }; case "openai": case "openai-text": + case "openai-responses": return { ...(await getOpenAITokenCount(prompt ?? completion, req.body.model)), tokenization_duration_ms: getElapsedMs(time),