o1-pro test
This commit is contained in:
@@ -25,6 +25,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
|
||||
gpt45: "GPT-4.5",
|
||||
o1: "OpenAI o1",
|
||||
"o1-mini": "OpenAI o1 mini",
|
||||
"o1-pro": "OpenAI o1 pro",
|
||||
"o3-mini": "OpenAI o3 mini",
|
||||
"o3": "OpenAI o3",
|
||||
"o4-mini": "OpenAI o4 mini",
|
||||
@@ -57,6 +58,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
|
||||
"azure-gpt41-nano": "Azure GPT-4.1 Nano",
|
||||
"azure-o1": "Azure o1",
|
||||
"azure-o1-mini": "Azure o1 mini",
|
||||
"azure-o1-pro": "Azure o1 pro",
|
||||
"azure-o3-mini": "Azure o3 mini",
|
||||
"azure-o3": "Azure o3",
|
||||
"azure-o4-mini": "Azure o4 mini",
|
||||
|
||||
@@ -12,6 +12,7 @@ const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
|
||||
const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
|
||||
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
|
||||
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
|
||||
const OPENAI_RESPONSES_ENDPOINT = "/v1/responses";
|
||||
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
|
||||
const ANTHROPIC_MESSAGES_ENDPOINT = "/v1/messages";
|
||||
const ANTHROPIC_SONNET_COMPAT_ENDPOINT = "/v1/sonnet";
|
||||
@@ -25,6 +26,7 @@ export function isTextGenerationRequest(req: Request) {
|
||||
[
|
||||
OPENAI_CHAT_COMPLETION_ENDPOINT,
|
||||
OPENAI_TEXT_COMPLETION_ENDPOINT,
|
||||
OPENAI_RESPONSES_ENDPOINT,
|
||||
ANTHROPIC_COMPLETION_ENDPOINT,
|
||||
ANTHROPIC_MESSAGES_ENDPOINT,
|
||||
ANTHROPIC_SONNET_COMPAT_ENDPOINT,
|
||||
@@ -236,6 +238,22 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
|
||||
// - choices[0].message.content
|
||||
// - choices[0].message with no content if model is invoking a tool
|
||||
return body.choices?.[0]?.message?.content || "";
|
||||
case "openai-responses":
|
||||
// Handle the original Responses API format
|
||||
if (body.output && Array.isArray(body.output)) {
|
||||
// Look for a message type in the output array
|
||||
for (const item of body.output) {
|
||||
if (item.type === "message" && item.content && Array.isArray(item.content)) {
|
||||
// Extract text content from each content item
|
||||
return item.content
|
||||
.filter((contentItem: any) => contentItem.type === "output_text")
|
||||
.map((contentItem: any) => contentItem.text)
|
||||
.join("");
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we've been transformed to chat completion format already
|
||||
return body.choices?.[0]?.message?.content || "";
|
||||
case "mistral-text":
|
||||
return body.outputs?.[0]?.text || "";
|
||||
case "openai-text":
|
||||
@@ -287,6 +305,7 @@ export function getModelFromBody(req: Request, resBody: Record<string, any>) {
|
||||
switch (format) {
|
||||
case "openai":
|
||||
case "openai-text":
|
||||
case "openai-responses":
|
||||
return resBody.model;
|
||||
case "mistral-ai":
|
||||
case "mistral-text":
|
||||
|
||||
@@ -51,6 +51,9 @@ export const addKey: ProxyReqMutator = (manager) => {
|
||||
case "openai-image":
|
||||
assignedKey = keyPool.get("dall-e-3", service);
|
||||
break;
|
||||
case "openai-responses":
|
||||
assignedKey = keyPool.get(body.model, service);
|
||||
break;
|
||||
case "openai":
|
||||
throw new Error(
|
||||
`Outbound API ${outboundApi} is not supported for ${inboundApi}`
|
||||
|
||||
@@ -13,6 +13,51 @@ export const finalizeBody: ProxyReqMutator = (manager) => {
|
||||
if (req.outboundApi === "anthropic-chat") {
|
||||
delete req.body.prompt;
|
||||
}
|
||||
// For OpenAI Responses API, ensure messages is in the correct format
|
||||
if (req.outboundApi === "openai-responses") {
|
||||
// Format messages for the Responses API
|
||||
if (req.body.messages) {
|
||||
req.log.info("Formatting messages for Responses API in finalizeBody");
|
||||
// The Responses API expects input to be an array, not an object
|
||||
req.body.input = req.body.messages;
|
||||
delete req.body.messages;
|
||||
} else if (req.body.input && req.body.input.messages) {
|
||||
req.log.info("Reformatting input.messages for Responses API in finalizeBody");
|
||||
// If input already exists but contains a messages object, replace input with the messages array
|
||||
req.body.input = req.body.input.messages;
|
||||
}
|
||||
|
||||
// Final check to ensure max_completion_tokens is converted to max_output_tokens
|
||||
if (req.body.max_completion_tokens) {
|
||||
req.log.info("Converting max_completion_tokens to max_output_tokens in finalizeBody");
|
||||
if (!req.body.max_output_tokens) {
|
||||
req.body.max_output_tokens = req.body.max_completion_tokens;
|
||||
}
|
||||
delete req.body.max_completion_tokens;
|
||||
}
|
||||
|
||||
// Final check to ensure max_tokens is converted to max_output_tokens
|
||||
if (req.body.max_tokens) {
|
||||
req.log.info("Converting max_tokens to max_output_tokens in finalizeBody");
|
||||
if (!req.body.max_output_tokens) {
|
||||
req.body.max_output_tokens = req.body.max_tokens;
|
||||
}
|
||||
delete req.body.max_tokens;
|
||||
}
|
||||
|
||||
// Remove all parameters not supported by Responses API
|
||||
const unsupportedParams = [
|
||||
'frequency_penalty',
|
||||
'presence_penalty',
|
||||
];
|
||||
|
||||
for (const param of unsupportedParams) {
|
||||
if (req.body[param] !== undefined) {
|
||||
req.log.info(`Removing unsupported parameter for Responses API: ${param}`);
|
||||
delete req.body[param];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const serialized =
|
||||
typeof req.body === "string" ? req.body : JSON.stringify(req.body);
|
||||
|
||||
@@ -31,6 +31,7 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
|
||||
|
||||
applyMistralPromptFixes(req);
|
||||
applyGoogleAIKeyTransforms(req);
|
||||
applyOpenAIResponsesTransform(req);
|
||||
|
||||
// Native prompts are those which were already provided by the client in the
|
||||
// target API format. We don't need to transform them.
|
||||
@@ -56,6 +57,58 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
|
||||
);
|
||||
};
|
||||
|
||||
// Handle OpenAI Responses API transformation
|
||||
function applyOpenAIResponsesTransform(req: Request): void {
|
||||
if (req.outboundApi === "openai-responses") {
|
||||
req.log.info("Transforming request to OpenAI Responses API format");
|
||||
|
||||
// Store the original body for reference if needed
|
||||
const originalBody = { ...req.body };
|
||||
|
||||
// Map standard OpenAI chat completions format to Responses API format
|
||||
// The main differences are:
|
||||
// 1. Endpoint is /v1/responses instead of /v1/chat/completions
|
||||
// 2. 'messages' field moves to 'input.messages'
|
||||
|
||||
// Move messages to input.messages
|
||||
if (req.body.messages && !req.body.input) {
|
||||
req.body.input = {
|
||||
messages: req.body.messages
|
||||
};
|
||||
delete req.body.messages;
|
||||
}
|
||||
|
||||
// Keep all the original properties of the request but ensure compatibility
|
||||
// with Responses API specifics
|
||||
if (!req.body.previousResponseId && req.body.conversation_id) {
|
||||
req.body.previousResponseId = req.body.conversation_id;
|
||||
delete req.body.conversation_id;
|
||||
}
|
||||
|
||||
// Convert max_tokens to max_output_tokens if present and not already set
|
||||
if (req.body.max_tokens && !req.body.max_output_tokens) {
|
||||
req.body.max_output_tokens = req.body.max_tokens;
|
||||
delete req.body.max_tokens;
|
||||
}
|
||||
|
||||
// Set the correct tools format if needed
|
||||
if (req.body.tools) {
|
||||
// Tools structure is maintained but might need conversion if non-standard
|
||||
if (!req.body.tools.some((tool: any) => tool.type === "function" || tool.type === "web_search")) {
|
||||
req.body.tools = req.body.tools.map((tool: any) => ({
|
||||
...tool,
|
||||
type: tool.type || "function"
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
req.log.info({
|
||||
originalModel: originalBody.model,
|
||||
newFormat: "openai-responses"
|
||||
}, "Successfully transformed request to Responses API format");
|
||||
}
|
||||
}
|
||||
|
||||
// handles weird cases that don't fit into our abstractions
|
||||
function applyMistralPromptFixes(req: Request): void {
|
||||
if (req.inboundApi === "mistral-ai") {
|
||||
|
||||
@@ -28,6 +28,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
switch (req.outboundApi) {
|
||||
case "openai":
|
||||
case "openai-text":
|
||||
case "openai-responses":
|
||||
proxyMax = OPENAI_MAX_CONTEXT;
|
||||
break;
|
||||
case "anthropic-chat":
|
||||
@@ -86,6 +87,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^o1-mini(-\d{4}-\d{2}-\d{2})?$/)) {
|
||||
modelMax = 128000;
|
||||
} else if (model.match(/^o1-pro(-\d{4}-\d{2}-\d{2})?$/)) {
|
||||
modelMax = 200000;
|
||||
} else if (model.match(/^o1-preview(-\d{4}-\d{2}-\d{2})?$/)) {
|
||||
modelMax = 128000;
|
||||
} else if (model.match(/gpt-3.5-turbo/)) {
|
||||
|
||||
@@ -158,6 +158,8 @@ function getTransformer(
|
||||
: mistralAIToOpenAI;
|
||||
case "openai-image":
|
||||
throw new Error(`SSE transformation not supported for ${responseApi}`);
|
||||
case "openai-responses":
|
||||
throw new Error(`SSE transformation not supported for ${responseApi}`);
|
||||
default:
|
||||
assertNever(responseApi);
|
||||
}
|
||||
|
||||
+230
-5
@@ -121,6 +121,9 @@ const openaiResponseHandler: ProxyResHandlerWithBody = async (
|
||||
if (req.outboundApi === "openai-text" && req.inboundApi === "openai") {
|
||||
req.log.info("Transforming Turbo-Instruct response to Chat format");
|
||||
newBody = transformTurboInstructResponse(body);
|
||||
} else if (req.outboundApi === "openai-responses" && req.inboundApi === "openai") {
|
||||
req.log.info("Transforming Responses API response to Chat format");
|
||||
newBody = transformResponsesApiResponse(body);
|
||||
}
|
||||
|
||||
res.status(200).json({ ...newBody, proxy: body.proxy });
|
||||
@@ -143,6 +146,135 @@ function transformTurboInstructResponse(
|
||||
return transformed;
|
||||
}
|
||||
|
||||
function transformResponsesApiResponse(
|
||||
responsesBody: Record<string, any>
|
||||
): Record<string, any> {
|
||||
// If the response is already in chat completion format, return it as is
|
||||
if (responsesBody.choices && responsesBody.choices[0]?.message) {
|
||||
return responsesBody;
|
||||
}
|
||||
|
||||
// Create a compatible format for clients expecting chat completions format
|
||||
const transformed: Record<string, any> = {
|
||||
id: responsesBody.id || `chatcmpl-${Date.now()}`,
|
||||
object: "chat.completion",
|
||||
created: responsesBody.created_at || Math.floor(Date.now() / 1000),
|
||||
model: responsesBody.model || "o1-pro",
|
||||
choices: [],
|
||||
usage: responsesBody.usage || {
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: 0,
|
||||
total_tokens: 0
|
||||
}
|
||||
};
|
||||
|
||||
// Extract content from the Responses API format - multiple possible structures
|
||||
|
||||
// Structure 1: output array with message objects
|
||||
if (responsesBody.output && Array.isArray(responsesBody.output)) {
|
||||
// Look for a message type in the output array
|
||||
let messageOutput = null;
|
||||
for (const output of responsesBody.output) {
|
||||
if (output.type === "message") {
|
||||
messageOutput = output;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (messageOutput) {
|
||||
if (messageOutput.content && Array.isArray(messageOutput.content) && messageOutput.content.length > 0) {
|
||||
// Handle text content
|
||||
let content = "";
|
||||
const toolCalls: any[] = [];
|
||||
|
||||
for (const contentItem of messageOutput.content) {
|
||||
if (contentItem.type === "output_text") {
|
||||
content += contentItem.text;
|
||||
} else if (contentItem.type === "tool_calls" && Array.isArray(contentItem.tool_calls)) {
|
||||
toolCalls.push(...contentItem.tool_calls);
|
||||
}
|
||||
}
|
||||
|
||||
const message: Record<string, any> = {
|
||||
role: messageOutput.role || "assistant",
|
||||
content: content
|
||||
};
|
||||
|
||||
if (toolCalls.length > 0) {
|
||||
message.tool_calls = toolCalls;
|
||||
}
|
||||
|
||||
transformed.choices.push({
|
||||
index: 0,
|
||||
message,
|
||||
finish_reason: "stop"
|
||||
});
|
||||
} else if (typeof messageOutput.content === 'string') {
|
||||
// Simple string content
|
||||
transformed.choices.push({
|
||||
index: 0,
|
||||
message: {
|
||||
role: messageOutput.role || "assistant",
|
||||
content: messageOutput.content
|
||||
},
|
||||
finish_reason: "stop"
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Structure 2: response object with content
|
||||
else if (responsesBody.response && responsesBody.response.content) {
|
||||
transformed.choices.push({
|
||||
index: 0,
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: typeof responsesBody.response.content === 'string'
|
||||
? responsesBody.response.content
|
||||
: JSON.stringify(responsesBody.response.content)
|
||||
},
|
||||
finish_reason: responsesBody.response.finish_reason || "stop"
|
||||
});
|
||||
}
|
||||
|
||||
// Structure 3: look for 'content' field directly
|
||||
else if (responsesBody.content) {
|
||||
transformed.choices.push({
|
||||
index: 0,
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: typeof responsesBody.content === 'string'
|
||||
? responsesBody.content
|
||||
: JSON.stringify(responsesBody.content)
|
||||
},
|
||||
finish_reason: "stop"
|
||||
});
|
||||
}
|
||||
|
||||
// If we couldn't extract content, create a basic response
|
||||
if (transformed.choices.length === 0) {
|
||||
transformed.choices.push({
|
||||
index: 0,
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: ""
|
||||
},
|
||||
finish_reason: "stop"
|
||||
});
|
||||
}
|
||||
|
||||
// Copy usage information if available
|
||||
if (responsesBody.usage) {
|
||||
transformed.usage = {
|
||||
prompt_tokens: responsesBody.usage.input_tokens || 0,
|
||||
completion_tokens: responsesBody.usage.output_tokens || 0,
|
||||
total_tokens: responsesBody.usage.total_tokens || 0
|
||||
};
|
||||
}
|
||||
|
||||
return transformed;
|
||||
}
|
||||
|
||||
const openaiProxy = createQueuedProxyMiddleware({
|
||||
mutations: [addKey, finalizeBody],
|
||||
target: "https://api.openai.com",
|
||||
@@ -154,6 +286,13 @@ const openaiEmbeddingsProxy = createQueuedProxyMiddleware({
|
||||
target: "https://api.openai.com",
|
||||
});
|
||||
|
||||
// New proxy middleware for the Responses API
|
||||
const openaiResponsesProxy = createQueuedProxyMiddleware({
|
||||
mutations: [addKey, finalizeBody],
|
||||
target: "https://api.openai.com",
|
||||
blockingResponseHandler: openaiResponseHandler,
|
||||
});
|
||||
|
||||
const openaiRouter = Router();
|
||||
openaiRouter.get("/v1/models", handleModelRequest);
|
||||
// Native text completion endpoint, only for turbo-instruct.
|
||||
@@ -202,17 +341,83 @@ const setupChunkedTransfer: RequestHandler = (req, res, next) => {
|
||||
next();
|
||||
};
|
||||
|
||||
// Functions to handle model-specific API routing
|
||||
function shouldUseResponsesApi(model: string): boolean {
|
||||
return model === "o1-pro" || model.startsWith("o1-pro-");
|
||||
}
|
||||
|
||||
// Preprocessor to redirect requests to the responses API
|
||||
const routeToResponsesApi: RequestPreprocessor = (req) => {
|
||||
if (shouldUseResponsesApi(req.body.model)) {
|
||||
req.log.info(`Routing ${req.body.model} to OpenAI Responses API`);
|
||||
req.url = "/v1/responses";
|
||||
req.outboundApi = "openai-responses";
|
||||
}
|
||||
};
|
||||
|
||||
// General chat completion endpoint. Turbo-instruct is not supported here.
|
||||
openaiRouter.post(
|
||||
"/v1/chat/completions",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "openai", outApi: "openai", service: "openai" },
|
||||
{ afterTransform: [fixupMaxTokens, setO1ReasoningEffort] }
|
||||
{
|
||||
afterTransform: [
|
||||
fixupMaxTokens,
|
||||
setO1ReasoningEffort,
|
||||
routeToResponsesApi
|
||||
]
|
||||
}
|
||||
),
|
||||
setupChunkedTransfer,
|
||||
(req, _res, next) => {
|
||||
// Route to the responses endpoint if needed
|
||||
if (req.outboundApi === "openai-responses") {
|
||||
// Ensure messages is moved to input properly
|
||||
req.log.info("Final check for Responses API format in chat completions");
|
||||
if (req.body.messages) {
|
||||
req.log.info("Moving 'messages' to 'input' for Responses API");
|
||||
req.body.input = req.body.messages;
|
||||
delete req.body.messages;
|
||||
} else if (req.body.input && req.body.input.messages) {
|
||||
req.log.info("Reformatting input.messages for Responses API");
|
||||
req.body.input = req.body.input.messages;
|
||||
}
|
||||
|
||||
return openaiResponsesProxy(req, _res, next);
|
||||
}
|
||||
next();
|
||||
},
|
||||
openaiProxy
|
||||
);
|
||||
|
||||
// New endpoint for OpenAI Responses API
|
||||
openaiRouter.post(
|
||||
"/v1/responses",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "openai", outApi: "openai-responses", service: "openai" },
|
||||
{ afterTransform: [fixupMaxTokens, setO1ReasoningEffort] }
|
||||
),
|
||||
// Add final check to ensure the body is in the correct format for Responses API
|
||||
(req, _res, next) => {
|
||||
req.log.info("Final check for Responses API format");
|
||||
|
||||
// Ensure messages is properly formatted for input
|
||||
if (req.body.messages) {
|
||||
req.log.info("Moving 'messages' to 'input' for Responses API");
|
||||
req.body.input = req.body.messages;
|
||||
delete req.body.messages;
|
||||
} else if (req.body.input && req.body.input.messages) {
|
||||
req.log.info("Reformatting input.messages for Responses API");
|
||||
req.body.input = req.body.input.messages;
|
||||
}
|
||||
|
||||
next();
|
||||
},
|
||||
openaiResponsesProxy
|
||||
);
|
||||
|
||||
// Embeddings endpoint.
|
||||
openaiRouter.post(
|
||||
"/v1/embeddings",
|
||||
@@ -226,10 +431,30 @@ function forceModel(model: string): RequestPreprocessor {
|
||||
}
|
||||
|
||||
function fixupMaxTokens(req: Request) {
|
||||
if (!req.body.max_completion_tokens) {
|
||||
req.body.max_completion_tokens = req.body.max_tokens;
|
||||
// For Responses API, use max_output_tokens instead of max_completion_tokens
|
||||
if (req.outboundApi === "openai-responses") {
|
||||
if (!req.body.max_output_tokens) {
|
||||
req.body.max_output_tokens = req.body.max_tokens || req.body.max_completion_tokens;
|
||||
}
|
||||
// Remove the other token params to avoid API errors
|
||||
delete req.body.max_tokens;
|
||||
delete req.body.max_completion_tokens;
|
||||
|
||||
// Remove other parameters not supported by Responses API
|
||||
const unsupportedParams = ['frequency_penalty', 'presence_penalty'];
|
||||
for (const param of unsupportedParams) {
|
||||
if (req.body[param] !== undefined) {
|
||||
req.log.info(`Removing unsupported parameter for Responses API: ${param}`);
|
||||
delete req.body[param];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Original behavior for other APIs
|
||||
if (!req.body.max_completion_tokens) {
|
||||
req.body.max_completion_tokens = req.body.max_tokens;
|
||||
}
|
||||
delete req.body.max_tokens;
|
||||
}
|
||||
delete req.body.max_tokens;
|
||||
}
|
||||
|
||||
// Models that support 'reasoning_effort'
|
||||
@@ -238,7 +463,7 @@ function isO1Model(model: string): boolean {
|
||||
// - starts with 'o' followed by number (o1, o3, o4, etc.)
|
||||
// - optionally followed by suffix like -mini or -preview
|
||||
// - optionally followed by a date stamp
|
||||
return /^o\d+(-mini|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model);
|
||||
return /^o\d+(-mini|-pro|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model);
|
||||
}
|
||||
|
||||
// most frontends don't currently support custom reasoning effort for o1
|
||||
|
||||
@@ -17,6 +17,10 @@ import {
|
||||
OpenAIV1ImagesGenerationSchema,
|
||||
transformOpenAIToOpenAIImage,
|
||||
} from "./openai-image";
|
||||
import {
|
||||
OpenAIV1ResponsesSchema,
|
||||
transformOpenAIToOpenAIResponses,
|
||||
} from "./openai-responses";
|
||||
import {
|
||||
GoogleAIV1GenerateContentSchema,
|
||||
transformOpenAIToGoogleAI,
|
||||
@@ -52,6 +56,7 @@ export const API_REQUEST_TRANSFORMERS: TransformerMap = {
|
||||
"openai->anthropic-text": transformOpenAIToAnthropicText,
|
||||
"openai->openai-text": transformOpenAIToOpenAIText,
|
||||
"openai->openai-image": transformOpenAIToOpenAIImage,
|
||||
"openai->openai-responses": transformOpenAIToOpenAIResponses,
|
||||
"openai->google-ai": transformOpenAIToGoogleAI,
|
||||
"mistral-ai->mistral-text": transformMistralChatToText,
|
||||
};
|
||||
@@ -62,6 +67,7 @@ export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
|
||||
openai: OpenAIV1ChatCompletionSchema,
|
||||
"openai-text": OpenAIV1TextCompletionSchema,
|
||||
"openai-image": OpenAIV1ImagesGenerationSchema,
|
||||
"openai-responses": OpenAIV1ResponsesSchema,
|
||||
"google-ai": GoogleAIV1GenerateContentSchema,
|
||||
"mistral-ai": MistralAIV1ChatCompletionsSchema,
|
||||
"mistral-text": MistralAIV1TextCompletionsSchema,
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
import { z } from "zod";
|
||||
import { Request } from "express";
|
||||
import { OpenAIChatMessage, OpenAIV1ChatCompletionSchema } from "./openai";
|
||||
|
||||
// Schema for the OpenAI Responses API based on the chat completion schema
|
||||
// with some additional fields specific to the Responses API
|
||||
export const OpenAIV1ResponsesSchema = z.object({
|
||||
model: z.string(),
|
||||
input: z.object({
|
||||
messages: z.array(z.any())
|
||||
}).optional(),
|
||||
previousResponseId: z.string().optional(),
|
||||
max_output_tokens: z.number().int().positive().optional(),
|
||||
temperature: z.number().min(0).max(2).optional(),
|
||||
top_p: z.number().min(0).max(1).optional(),
|
||||
n: z.number().int().positive().optional(),
|
||||
stream: z.boolean().optional(),
|
||||
stop: z.union([z.string(), z.array(z.string())]).optional(),
|
||||
presence_penalty: z.number().min(-2).max(2).optional(),
|
||||
frequency_penalty: z.number().min(-2).max(2).optional(),
|
||||
user: z.string().optional(),
|
||||
tools: z.array(z.any()).optional(),
|
||||
reasoning_effort: z.enum(["low", "medium", "high"]).optional(),
|
||||
});
|
||||
|
||||
// Allow transforming from OpenAI Chat to Responses format
|
||||
export async function transformOpenAIToOpenAIResponses(
|
||||
req: Request
|
||||
): Promise<z.infer<typeof OpenAIV1ResponsesSchema>> {
|
||||
const body = { ...req.body };
|
||||
|
||||
// Move 'messages' to 'input.messages' as required by the Responses API
|
||||
if (body.messages && !body.input) {
|
||||
body.input = {
|
||||
messages: body.messages
|
||||
};
|
||||
delete body.messages;
|
||||
}
|
||||
|
||||
// Convert max_tokens to max_output_tokens if present and not set
|
||||
if (body.max_tokens && !body.max_output_tokens) {
|
||||
body.max_output_tokens = body.max_tokens;
|
||||
delete body.max_tokens;
|
||||
}
|
||||
|
||||
// Map conversation_id to previousResponseId if present
|
||||
if (body.conversation_id && !body.previousResponseId) {
|
||||
body.previousResponseId = body.conversation_id;
|
||||
delete body.conversation_id;
|
||||
}
|
||||
|
||||
// Ensure tools have the right format if present
|
||||
if (body.tools) {
|
||||
body.tools = body.tools.map((tool: any) => ({
|
||||
...tool,
|
||||
type: tool.type || "function"
|
||||
}));
|
||||
}
|
||||
|
||||
return body;
|
||||
}
|
||||
@@ -6,6 +6,7 @@ export type APIFormat =
|
||||
| "openai"
|
||||
| "openai-text"
|
||||
| "openai-image"
|
||||
| "openai-responses" // New OpenAI Responses API for o1-pro model
|
||||
| "anthropic-chat" // Anthropic's newer messages array format
|
||||
| "anthropic-text" // Legacy flat string prompt format
|
||||
| "google-ai"
|
||||
|
||||
@@ -119,6 +119,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
"gpt41-nanoTokens": 0,
|
||||
"o1Tokens": 0,
|
||||
"o1-miniTokens": 0,
|
||||
"o1-proTokens": 0,
|
||||
"o3-miniTokens": 0,
|
||||
"o3Tokens": 0,
|
||||
"o4-miniTokens": 0,
|
||||
|
||||
@@ -30,6 +30,7 @@ export type OpenAIModelFamily =
|
||||
| "gpt45"
|
||||
| "o1"
|
||||
| "o1-mini"
|
||||
| "o1-pro"
|
||||
| "o3-mini"
|
||||
| "o3"
|
||||
| "o4-mini"
|
||||
@@ -78,6 +79,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
|
||||
"gpt41-nano",
|
||||
"o1",
|
||||
"o1-mini",
|
||||
"o1-pro",
|
||||
"o3-mini",
|
||||
"o3",
|
||||
"o4-mini",
|
||||
@@ -111,6 +113,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
|
||||
"azure-dall-e",
|
||||
"azure-o1",
|
||||
"azure-o1-mini",
|
||||
"azure-o1-pro",
|
||||
"azure-o3-mini",
|
||||
"azure-o3",
|
||||
"azure-o4-mini",
|
||||
@@ -146,6 +149,7 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
"gpt41-nano": "openai",
|
||||
"o1": "openai",
|
||||
"o1-mini": "openai",
|
||||
"o1-pro": "openai",
|
||||
"o3-mini": "openai",
|
||||
"o3": "openai",
|
||||
"o4-mini": "openai",
|
||||
@@ -172,6 +176,7 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
"azure-dall-e": "azure",
|
||||
"azure-o1": "azure",
|
||||
"azure-o1-mini": "azure",
|
||||
"azure-o1-pro": "azure",
|
||||
"azure-o3-mini": "azure",
|
||||
"azure-o3": "azure",
|
||||
"azure-o4-mini": "azure",
|
||||
@@ -206,6 +211,7 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^text-embedding-ada-002$": "turbo",
|
||||
"^dall-e-\\d{1}$": "dall-e",
|
||||
"^o1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o1-mini",
|
||||
"^o1-pro(-\\d{4}-\\d{2}-\\d{2})?$": "o1-pro",
|
||||
"^o1(-\\d{4}-\\d{2}-\\d{2})?$": "o1",
|
||||
"^o3-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o3-mini",
|
||||
"^o3(-\\d{4}-\\d{2}-\\d{2})?$": "o3",
|
||||
@@ -346,6 +352,9 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
|
||||
case "mistral-text":
|
||||
modelFamily = getMistralAIModelFamily(model);
|
||||
break;
|
||||
case "openai-responses":
|
||||
modelFamily = getOpenAIModelFamily(model);
|
||||
break;
|
||||
default:
|
||||
assertNever(req.outboundApi);
|
||||
}
|
||||
|
||||
@@ -38,6 +38,11 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
case "gpt4-turbo":
|
||||
cost = 0.00001;
|
||||
break;
|
||||
case "azure-o1-pro":
|
||||
case "o1-pro":
|
||||
// OpenAI o1-pro pricing $150/1M input tokens and $600/1M output tokens
|
||||
cost = 0.000325;
|
||||
break;
|
||||
case "azure-o1":
|
||||
case "o1":
|
||||
// Currently we do not track output tokens separately, and O1 uses
|
||||
|
||||
@@ -108,6 +108,7 @@ export async function countTokens({
|
||||
};
|
||||
case "openai":
|
||||
case "openai-text":
|
||||
case "openai-responses":
|
||||
return {
|
||||
...(await getOpenAITokenCount(prompt ?? completion, req.body.model)),
|
||||
tokenization_duration_ms: getElapsedMs(time),
|
||||
|
||||
Reference in New Issue
Block a user