o1-pro test

This commit is contained in:
reanon
2025-04-17 11:33:58 +00:00
parent 8081d9516d
commit 5eb4858c69
15 changed files with 441 additions and 5 deletions
+2
View File
@@ -25,6 +25,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
gpt45: "GPT-4.5",
o1: "OpenAI o1",
"o1-mini": "OpenAI o1 mini",
"o1-pro": "OpenAI o1 pro",
"o3-mini": "OpenAI o3 mini",
"o3": "OpenAI o3",
"o4-mini": "OpenAI o4 mini",
@@ -57,6 +58,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
"azure-gpt41-nano": "Azure GPT-4.1 Nano",
"azure-o1": "Azure o1",
"azure-o1-mini": "Azure o1 mini",
"azure-o1-pro": "Azure o1 pro",
"azure-o3-mini": "Azure o3 mini",
"azure-o3": "Azure o3",
"azure-o4-mini": "Azure o4 mini",
+19
View File
@@ -12,6 +12,7 @@ const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
const OPENAI_RESPONSES_ENDPOINT = "/v1/responses";
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
const ANTHROPIC_MESSAGES_ENDPOINT = "/v1/messages";
const ANTHROPIC_SONNET_COMPAT_ENDPOINT = "/v1/sonnet";
@@ -25,6 +26,7 @@ export function isTextGenerationRequest(req: Request) {
[
OPENAI_CHAT_COMPLETION_ENDPOINT,
OPENAI_TEXT_COMPLETION_ENDPOINT,
OPENAI_RESPONSES_ENDPOINT,
ANTHROPIC_COMPLETION_ENDPOINT,
ANTHROPIC_MESSAGES_ENDPOINT,
ANTHROPIC_SONNET_COMPAT_ENDPOINT,
@@ -236,6 +238,22 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
// - choices[0].message.content
// - choices[0].message with no content if model is invoking a tool
return body.choices?.[0]?.message?.content || "";
case "openai-responses":
// Handle the original Responses API format
if (body.output && Array.isArray(body.output)) {
// Look for a message type in the output array
for (const item of body.output) {
if (item.type === "message" && item.content && Array.isArray(item.content)) {
// Extract text content from each content item
return item.content
.filter((contentItem: any) => contentItem.type === "output_text")
.map((contentItem: any) => contentItem.text)
.join("");
}
}
}
// If we've been transformed to chat completion format already
return body.choices?.[0]?.message?.content || "";
case "mistral-text":
return body.outputs?.[0]?.text || "";
case "openai-text":
@@ -287,6 +305,7 @@ export function getModelFromBody(req: Request, resBody: Record<string, any>) {
switch (format) {
case "openai":
case "openai-text":
case "openai-responses":
return resBody.model;
case "mistral-ai":
case "mistral-text":
@@ -51,6 +51,9 @@ export const addKey: ProxyReqMutator = (manager) => {
case "openai-image":
assignedKey = keyPool.get("dall-e-3", service);
break;
case "openai-responses":
assignedKey = keyPool.get(body.model, service);
break;
case "openai":
throw new Error(
`Outbound API ${outboundApi} is not supported for ${inboundApi}`
@@ -13,6 +13,51 @@ export const finalizeBody: ProxyReqMutator = (manager) => {
if (req.outboundApi === "anthropic-chat") {
delete req.body.prompt;
}
// For OpenAI Responses API, ensure messages is in the correct format
if (req.outboundApi === "openai-responses") {
// Format messages for the Responses API
if (req.body.messages) {
req.log.info("Formatting messages for Responses API in finalizeBody");
// The Responses API expects input to be an array, not an object
req.body.input = req.body.messages;
delete req.body.messages;
} else if (req.body.input && req.body.input.messages) {
req.log.info("Reformatting input.messages for Responses API in finalizeBody");
// If input already exists but contains a messages object, replace input with the messages array
req.body.input = req.body.input.messages;
}
// Final check to ensure max_completion_tokens is converted to max_output_tokens
if (req.body.max_completion_tokens) {
req.log.info("Converting max_completion_tokens to max_output_tokens in finalizeBody");
if (!req.body.max_output_tokens) {
req.body.max_output_tokens = req.body.max_completion_tokens;
}
delete req.body.max_completion_tokens;
}
// Final check to ensure max_tokens is converted to max_output_tokens
if (req.body.max_tokens) {
req.log.info("Converting max_tokens to max_output_tokens in finalizeBody");
if (!req.body.max_output_tokens) {
req.body.max_output_tokens = req.body.max_tokens;
}
delete req.body.max_tokens;
}
// Remove all parameters not supported by Responses API
const unsupportedParams = [
'frequency_penalty',
'presence_penalty',
];
for (const param of unsupportedParams) {
if (req.body[param] !== undefined) {
req.log.info(`Removing unsupported parameter for Responses API: ${param}`);
delete req.body[param];
}
}
}
const serialized =
typeof req.body === "string" ? req.body : JSON.stringify(req.body);
@@ -31,6 +31,7 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
applyMistralPromptFixes(req);
applyGoogleAIKeyTransforms(req);
applyOpenAIResponsesTransform(req);
// Native prompts are those which were already provided by the client in the
// target API format. We don't need to transform them.
@@ -56,6 +57,58 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
);
};
// Handle OpenAI Responses API transformation
function applyOpenAIResponsesTransform(req: Request): void {
if (req.outboundApi === "openai-responses") {
req.log.info("Transforming request to OpenAI Responses API format");
// Store the original body for reference if needed
const originalBody = { ...req.body };
// Map standard OpenAI chat completions format to Responses API format
// The main differences are:
// 1. Endpoint is /v1/responses instead of /v1/chat/completions
// 2. 'messages' field moves to 'input.messages'
// Move messages to input.messages
if (req.body.messages && !req.body.input) {
req.body.input = {
messages: req.body.messages
};
delete req.body.messages;
}
// Keep all the original properties of the request but ensure compatibility
// with Responses API specifics
if (!req.body.previousResponseId && req.body.conversation_id) {
req.body.previousResponseId = req.body.conversation_id;
delete req.body.conversation_id;
}
// Convert max_tokens to max_output_tokens if present and not already set
if (req.body.max_tokens && !req.body.max_output_tokens) {
req.body.max_output_tokens = req.body.max_tokens;
delete req.body.max_tokens;
}
// Set the correct tools format if needed
if (req.body.tools) {
// Tools structure is maintained but might need conversion if non-standard
if (!req.body.tools.some((tool: any) => tool.type === "function" || tool.type === "web_search")) {
req.body.tools = req.body.tools.map((tool: any) => ({
...tool,
type: tool.type || "function"
}));
}
}
req.log.info({
originalModel: originalBody.model,
newFormat: "openai-responses"
}, "Successfully transformed request to Responses API format");
}
}
// handles weird cases that don't fit into our abstractions
function applyMistralPromptFixes(req: Request): void {
if (req.inboundApi === "mistral-ai") {
@@ -28,6 +28,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
switch (req.outboundApi) {
case "openai":
case "openai-text":
case "openai-responses":
proxyMax = OPENAI_MAX_CONTEXT;
break;
case "anthropic-chat":
@@ -86,6 +87,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
modelMax = 200000;
} else if (model.match(/^o1-mini(-\d{4}-\d{2}-\d{2})?$/)) {
modelMax = 128000;
} else if (model.match(/^o1-pro(-\d{4}-\d{2}-\d{2})?$/)) {
modelMax = 200000;
} else if (model.match(/^o1-preview(-\d{4}-\d{2}-\d{2})?$/)) {
modelMax = 128000;
} else if (model.match(/gpt-3.5-turbo/)) {
@@ -158,6 +158,8 @@ function getTransformer(
: mistralAIToOpenAI;
case "openai-image":
throw new Error(`SSE transformation not supported for ${responseApi}`);
case "openai-responses":
throw new Error(`SSE transformation not supported for ${responseApi}`);
default:
assertNever(responseApi);
}
+230 -5
View File
@@ -121,6 +121,9 @@ const openaiResponseHandler: ProxyResHandlerWithBody = async (
if (req.outboundApi === "openai-text" && req.inboundApi === "openai") {
req.log.info("Transforming Turbo-Instruct response to Chat format");
newBody = transformTurboInstructResponse(body);
} else if (req.outboundApi === "openai-responses" && req.inboundApi === "openai") {
req.log.info("Transforming Responses API response to Chat format");
newBody = transformResponsesApiResponse(body);
}
res.status(200).json({ ...newBody, proxy: body.proxy });
@@ -143,6 +146,135 @@ function transformTurboInstructResponse(
return transformed;
}
function transformResponsesApiResponse(
responsesBody: Record<string, any>
): Record<string, any> {
// If the response is already in chat completion format, return it as is
if (responsesBody.choices && responsesBody.choices[0]?.message) {
return responsesBody;
}
// Create a compatible format for clients expecting chat completions format
const transformed: Record<string, any> = {
id: responsesBody.id || `chatcmpl-${Date.now()}`,
object: "chat.completion",
created: responsesBody.created_at || Math.floor(Date.now() / 1000),
model: responsesBody.model || "o1-pro",
choices: [],
usage: responsesBody.usage || {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0
}
};
// Extract content from the Responses API format - multiple possible structures
// Structure 1: output array with message objects
if (responsesBody.output && Array.isArray(responsesBody.output)) {
// Look for a message type in the output array
let messageOutput = null;
for (const output of responsesBody.output) {
if (output.type === "message") {
messageOutput = output;
break;
}
}
if (messageOutput) {
if (messageOutput.content && Array.isArray(messageOutput.content) && messageOutput.content.length > 0) {
// Handle text content
let content = "";
const toolCalls: any[] = [];
for (const contentItem of messageOutput.content) {
if (contentItem.type === "output_text") {
content += contentItem.text;
} else if (contentItem.type === "tool_calls" && Array.isArray(contentItem.tool_calls)) {
toolCalls.push(...contentItem.tool_calls);
}
}
const message: Record<string, any> = {
role: messageOutput.role || "assistant",
content: content
};
if (toolCalls.length > 0) {
message.tool_calls = toolCalls;
}
transformed.choices.push({
index: 0,
message,
finish_reason: "stop"
});
} else if (typeof messageOutput.content === 'string') {
// Simple string content
transformed.choices.push({
index: 0,
message: {
role: messageOutput.role || "assistant",
content: messageOutput.content
},
finish_reason: "stop"
});
}
}
}
// Structure 2: response object with content
else if (responsesBody.response && responsesBody.response.content) {
transformed.choices.push({
index: 0,
message: {
role: "assistant",
content: typeof responsesBody.response.content === 'string'
? responsesBody.response.content
: JSON.stringify(responsesBody.response.content)
},
finish_reason: responsesBody.response.finish_reason || "stop"
});
}
// Structure 3: look for 'content' field directly
else if (responsesBody.content) {
transformed.choices.push({
index: 0,
message: {
role: "assistant",
content: typeof responsesBody.content === 'string'
? responsesBody.content
: JSON.stringify(responsesBody.content)
},
finish_reason: "stop"
});
}
// If we couldn't extract content, create a basic response
if (transformed.choices.length === 0) {
transformed.choices.push({
index: 0,
message: {
role: "assistant",
content: ""
},
finish_reason: "stop"
});
}
// Copy usage information if available
if (responsesBody.usage) {
transformed.usage = {
prompt_tokens: responsesBody.usage.input_tokens || 0,
completion_tokens: responsesBody.usage.output_tokens || 0,
total_tokens: responsesBody.usage.total_tokens || 0
};
}
return transformed;
}
const openaiProxy = createQueuedProxyMiddleware({
mutations: [addKey, finalizeBody],
target: "https://api.openai.com",
@@ -154,6 +286,13 @@ const openaiEmbeddingsProxy = createQueuedProxyMiddleware({
target: "https://api.openai.com",
});
// New proxy middleware for the Responses API
const openaiResponsesProxy = createQueuedProxyMiddleware({
mutations: [addKey, finalizeBody],
target: "https://api.openai.com",
blockingResponseHandler: openaiResponseHandler,
});
const openaiRouter = Router();
openaiRouter.get("/v1/models", handleModelRequest);
// Native text completion endpoint, only for turbo-instruct.
@@ -202,17 +341,83 @@ const setupChunkedTransfer: RequestHandler = (req, res, next) => {
next();
};
// Functions to handle model-specific API routing
function shouldUseResponsesApi(model: string): boolean {
return model === "o1-pro" || model.startsWith("o1-pro-");
}
// Preprocessor to redirect requests to the responses API
const routeToResponsesApi: RequestPreprocessor = (req) => {
if (shouldUseResponsesApi(req.body.model)) {
req.log.info(`Routing ${req.body.model} to OpenAI Responses API`);
req.url = "/v1/responses";
req.outboundApi = "openai-responses";
}
};
// General chat completion endpoint. Turbo-instruct is not supported here.
openaiRouter.post(
"/v1/chat/completions",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "openai", outApi: "openai", service: "openai" },
{ afterTransform: [fixupMaxTokens, setO1ReasoningEffort] }
{
afterTransform: [
fixupMaxTokens,
setO1ReasoningEffort,
routeToResponsesApi
]
}
),
setupChunkedTransfer,
(req, _res, next) => {
// Route to the responses endpoint if needed
if (req.outboundApi === "openai-responses") {
// Ensure messages is moved to input properly
req.log.info("Final check for Responses API format in chat completions");
if (req.body.messages) {
req.log.info("Moving 'messages' to 'input' for Responses API");
req.body.input = req.body.messages;
delete req.body.messages;
} else if (req.body.input && req.body.input.messages) {
req.log.info("Reformatting input.messages for Responses API");
req.body.input = req.body.input.messages;
}
return openaiResponsesProxy(req, _res, next);
}
next();
},
openaiProxy
);
// New endpoint for OpenAI Responses API
openaiRouter.post(
"/v1/responses",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "openai", outApi: "openai-responses", service: "openai" },
{ afterTransform: [fixupMaxTokens, setO1ReasoningEffort] }
),
// Add final check to ensure the body is in the correct format for Responses API
(req, _res, next) => {
req.log.info("Final check for Responses API format");
// Ensure messages is properly formatted for input
if (req.body.messages) {
req.log.info("Moving 'messages' to 'input' for Responses API");
req.body.input = req.body.messages;
delete req.body.messages;
} else if (req.body.input && req.body.input.messages) {
req.log.info("Reformatting input.messages for Responses API");
req.body.input = req.body.input.messages;
}
next();
},
openaiResponsesProxy
);
// Embeddings endpoint.
openaiRouter.post(
"/v1/embeddings",
@@ -226,10 +431,30 @@ function forceModel(model: string): RequestPreprocessor {
}
function fixupMaxTokens(req: Request) {
if (!req.body.max_completion_tokens) {
req.body.max_completion_tokens = req.body.max_tokens;
// For Responses API, use max_output_tokens instead of max_completion_tokens
if (req.outboundApi === "openai-responses") {
if (!req.body.max_output_tokens) {
req.body.max_output_tokens = req.body.max_tokens || req.body.max_completion_tokens;
}
// Remove the other token params to avoid API errors
delete req.body.max_tokens;
delete req.body.max_completion_tokens;
// Remove other parameters not supported by Responses API
const unsupportedParams = ['frequency_penalty', 'presence_penalty'];
for (const param of unsupportedParams) {
if (req.body[param] !== undefined) {
req.log.info(`Removing unsupported parameter for Responses API: ${param}`);
delete req.body[param];
}
}
} else {
// Original behavior for other APIs
if (!req.body.max_completion_tokens) {
req.body.max_completion_tokens = req.body.max_tokens;
}
delete req.body.max_tokens;
}
delete req.body.max_tokens;
}
// Models that support 'reasoning_effort'
@@ -238,7 +463,7 @@ function isO1Model(model: string): boolean {
// - starts with 'o' followed by number (o1, o3, o4, etc.)
// - optionally followed by suffix like -mini or -preview
// - optionally followed by a date stamp
return /^o\d+(-mini|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model);
return /^o\d+(-mini|-pro|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model);
}
// most frontends don't currently support custom reasoning effort for o1
+6
View File
@@ -17,6 +17,10 @@ import {
OpenAIV1ImagesGenerationSchema,
transformOpenAIToOpenAIImage,
} from "./openai-image";
import {
OpenAIV1ResponsesSchema,
transformOpenAIToOpenAIResponses,
} from "./openai-responses";
import {
GoogleAIV1GenerateContentSchema,
transformOpenAIToGoogleAI,
@@ -52,6 +56,7 @@ export const API_REQUEST_TRANSFORMERS: TransformerMap = {
"openai->anthropic-text": transformOpenAIToAnthropicText,
"openai->openai-text": transformOpenAIToOpenAIText,
"openai->openai-image": transformOpenAIToOpenAIImage,
"openai->openai-responses": transformOpenAIToOpenAIResponses,
"openai->google-ai": transformOpenAIToGoogleAI,
"mistral-ai->mistral-text": transformMistralChatToText,
};
@@ -62,6 +67,7 @@ export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
openai: OpenAIV1ChatCompletionSchema,
"openai-text": OpenAIV1TextCompletionSchema,
"openai-image": OpenAIV1ImagesGenerationSchema,
"openai-responses": OpenAIV1ResponsesSchema,
"google-ai": GoogleAIV1GenerateContentSchema,
"mistral-ai": MistralAIV1ChatCompletionsSchema,
"mistral-text": MistralAIV1TextCompletionsSchema,
@@ -0,0 +1,61 @@
import { z } from "zod";
import { Request } from "express";
import { OpenAIChatMessage, OpenAIV1ChatCompletionSchema } from "./openai";
// Schema for the OpenAI Responses API based on the chat completion schema
// with some additional fields specific to the Responses API
export const OpenAIV1ResponsesSchema = z.object({
model: z.string(),
input: z.object({
messages: z.array(z.any())
}).optional(),
previousResponseId: z.string().optional(),
max_output_tokens: z.number().int().positive().optional(),
temperature: z.number().min(0).max(2).optional(),
top_p: z.number().min(0).max(1).optional(),
n: z.number().int().positive().optional(),
stream: z.boolean().optional(),
stop: z.union([z.string(), z.array(z.string())]).optional(),
presence_penalty: z.number().min(-2).max(2).optional(),
frequency_penalty: z.number().min(-2).max(2).optional(),
user: z.string().optional(),
tools: z.array(z.any()).optional(),
reasoning_effort: z.enum(["low", "medium", "high"]).optional(),
});
// Allow transforming from OpenAI Chat to Responses format
export async function transformOpenAIToOpenAIResponses(
req: Request
): Promise<z.infer<typeof OpenAIV1ResponsesSchema>> {
const body = { ...req.body };
// Move 'messages' to 'input.messages' as required by the Responses API
if (body.messages && !body.input) {
body.input = {
messages: body.messages
};
delete body.messages;
}
// Convert max_tokens to max_output_tokens if present and not set
if (body.max_tokens && !body.max_output_tokens) {
body.max_output_tokens = body.max_tokens;
delete body.max_tokens;
}
// Map conversation_id to previousResponseId if present
if (body.conversation_id && !body.previousResponseId) {
body.previousResponseId = body.conversation_id;
delete body.conversation_id;
}
// Ensure tools have the right format if present
if (body.tools) {
body.tools = body.tools.map((tool: any) => ({
...tool,
type: tool.type || "function"
}));
}
return body;
}
+1
View File
@@ -6,6 +6,7 @@ export type APIFormat =
| "openai"
| "openai-text"
| "openai-image"
| "openai-responses" // New OpenAI Responses API for o1-pro model
| "anthropic-chat" // Anthropic's newer messages array format
| "anthropic-text" // Legacy flat string prompt format
| "google-ai"
@@ -119,6 +119,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
"gpt41-nanoTokens": 0,
"o1Tokens": 0,
"o1-miniTokens": 0,
"o1-proTokens": 0,
"o3-miniTokens": 0,
"o3Tokens": 0,
"o4-miniTokens": 0,
+9
View File
@@ -30,6 +30,7 @@ export type OpenAIModelFamily =
| "gpt45"
| "o1"
| "o1-mini"
| "o1-pro"
| "o3-mini"
| "o3"
| "o4-mini"
@@ -78,6 +79,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"gpt41-nano",
"o1",
"o1-mini",
"o1-pro",
"o3-mini",
"o3",
"o4-mini",
@@ -111,6 +113,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"azure-dall-e",
"azure-o1",
"azure-o1-mini",
"azure-o1-pro",
"azure-o3-mini",
"azure-o3",
"azure-o4-mini",
@@ -146,6 +149,7 @@ export const MODEL_FAMILY_SERVICE: {
"gpt41-nano": "openai",
"o1": "openai",
"o1-mini": "openai",
"o1-pro": "openai",
"o3-mini": "openai",
"o3": "openai",
"o4-mini": "openai",
@@ -172,6 +176,7 @@ export const MODEL_FAMILY_SERVICE: {
"azure-dall-e": "azure",
"azure-o1": "azure",
"azure-o1-mini": "azure",
"azure-o1-pro": "azure",
"azure-o3-mini": "azure",
"azure-o3": "azure",
"azure-o4-mini": "azure",
@@ -206,6 +211,7 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^text-embedding-ada-002$": "turbo",
"^dall-e-\\d{1}$": "dall-e",
"^o1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o1-mini",
"^o1-pro(-\\d{4}-\\d{2}-\\d{2})?$": "o1-pro",
"^o1(-\\d{4}-\\d{2}-\\d{2})?$": "o1",
"^o3-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o3-mini",
"^o3(-\\d{4}-\\d{2}-\\d{2})?$": "o3",
@@ -346,6 +352,9 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
case "mistral-text":
modelFamily = getMistralAIModelFamily(model);
break;
case "openai-responses":
modelFamily = getOpenAIModelFamily(model);
break;
default:
assertNever(req.outboundApi);
}
+5
View File
@@ -38,6 +38,11 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
case "gpt4-turbo":
cost = 0.00001;
break;
case "azure-o1-pro":
case "o1-pro":
// OpenAI o1-pro pricing $150/1M input tokens and $600/1M output tokens
cost = 0.000325;
break;
case "azure-o1":
case "o1":
// Currently we do not track output tokens separately, and O1 uses
+1
View File
@@ -108,6 +108,7 @@ export async function countTokens({
};
case "openai":
case "openai-text":
case "openai-responses":
return {
...(await getOpenAITokenCount(prompt ?? completion, req.body.model)),
tokenization_duration_ms: getElapsedMs(time),