import { Request } from "express"; import { z } from "zod"; import { config } from "../../../../config"; import { isTextGenerationRequest, isImageGenerationRequest, } from "../../common"; import { RequestPreprocessor } from "../index"; import { APIFormat } from "../../../../shared/key-management"; const CLAUDE_OUTPUT_MAX = config.maxOutputTokensAnthropic; const OPENAI_OUTPUT_MAX = config.maxOutputTokensOpenAI; // TODO: move schemas to shared // https://console.anthropic.com/docs/api/reference#-v1-complete export const AnthropicV1CompleteSchema = z .object({ model: z.string().max(100), prompt: z.string({ required_error: "No prompt found. Are you sending an OpenAI-formatted request to the Claude endpoint?", }), max_tokens_to_sample: z.coerce .number() .int() .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)), stop_sequences: z.array(z.string().max(500)).optional(), stream: z.boolean().optional().default(false), temperature: z.coerce.number().optional().default(1), top_k: z.coerce.number().optional(), top_p: z.coerce.number().optional(), }) .strip(); // https://platform.openai.com/docs/api-reference/chat/create const OpenAIV1ChatContentArraySchema = z.array( z.union([ z.object({ type: z.literal("text"), text: z.string() }), z.object({ type: z.literal("image_url"), image_url: z.object({ url: z.string().url(), detail: z.enum(["low", "auto", "high"]).optional().default("auto"), }), }), ]) ); export const OpenAIV1ChatCompletionSchema = z .object({ model: z.string().max(100), messages: z.array( z.object({ role: z.enum(["system", "user", "assistant"]), content: z.union([z.string(), OpenAIV1ChatContentArraySchema]), name: z.string().optional(), }), { required_error: "No `messages` found. Ensure you've set the correct completion endpoint.", invalid_type_error: "Messages were not formatted correctly. Refer to the OpenAI Chat API documentation for more information.", } ), temperature: z.number().optional().default(1), top_p: z.number().optional().default(1), n: z .literal(1, { errorMap: () => ({ message: "You may only request a single completion at a time.", }), }) .optional(), stream: z.boolean().optional().default(false), stop: z .union([z.string().max(500), z.array(z.string().max(500))]) .optional(), max_tokens: z.coerce .number() .int() .nullish() .default(16) .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)), frequency_penalty: z.number().optional().default(0), presence_penalty: z.number().optional().default(0), logit_bias: z.any().optional(), user: z.string().max(500).optional(), seed: z.number().int().optional(), logprobs: z.boolean().optional().default(false), top_logprobs: z.number().int().optional(), }) .strip(); export type OpenAIChatMessage = z.infer< typeof OpenAIV1ChatCompletionSchema >["messages"][0]; const OpenAIV1TextCompletionSchema = z .object({ model: z .string() .max(100) .regex( /^gpt-3.5-turbo-instruct/, "Model must start with 'gpt-3.5-turbo-instruct'" ), prompt: z.string({ required_error: "No `prompt` found. Ensure you've set the correct completion endpoint.", }), logprobs: z.number().int().nullish().default(null), echo: z.boolean().optional().default(false), best_of: z.literal(1).optional(), stop: z .union([z.string().max(500), z.array(z.string().max(500)).max(4)]) .optional(), suffix: z.string().max(1000).optional(), }) .strip() .merge(OpenAIV1ChatCompletionSchema.omit({ messages: true, logprobs: true })); // https://platform.openai.com/docs/api-reference/images/create const OpenAIV1ImagesGenerationSchema = z .object({ prompt: z.string().max(4000), model: z.string().max(100).optional(), quality: z.enum(["standard", "hd"]).optional().default("standard"), n: z.number().int().min(1).max(4).optional().default(1), response_format: z.enum(["url", "b64_json"]).optional(), size: z .enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]) .optional() .default("1024x1024"), style: z.enum(["vivid", "natural"]).optional().default("vivid"), user: z.string().max(500).optional(), }) .strip(); // https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent const GoogleAIV1GenerateContentSchema = z .object({ model: z.string().max(100), //actually specified in path but we need it for the router stream: z.boolean().optional().default(false), // also used for router contents: z.array( z.object({ parts: z.array(z.object({ text: z.string() })), role: z.enum(["user", "model"]), }) ), tools: z.array(z.object({})).max(0).optional(), safetySettings: z.array(z.object({})).max(0).optional(), generationConfig: z.object({ temperature: z.number().optional(), maxOutputTokens: z.coerce .number() .int() .optional() .default(16) .transform((v) => Math.min(v, 1024)), // TODO: Add config candidateCount: z.literal(1).optional(), topP: z.number().optional(), topK: z.number().optional(), stopSequences: z.array(z.string().max(500)).max(5).optional(), }), }) .strip(); export type GoogleAIChatMessage = z.infer< typeof GoogleAIV1GenerateContentSchema >["contents"][0]; // https://docs.mistral.ai/api#operation/createChatCompletion const MistralAIV1ChatCompletionsSchema = z.object({ model: z.string(), messages: z.array( z.object({ role: z.enum(["system", "user", "assistant"]), content: z.string(), }) ), temperature: z.number().optional().default(0.7), top_p: z.number().optional().default(1), max_tokens: z.coerce .number() .int() .nullish() .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)), stream: z.boolean().optional().default(false), safe_prompt: z.boolean().optional().default(false), random_seed: z.number().int().optional(), }); export type MistralAIChatMessage = z.infer< typeof MistralAIV1ChatCompletionsSchema >["messages"][0]; const VALIDATORS: Record> = { anthropic: AnthropicV1CompleteSchema, openai: OpenAIV1ChatCompletionSchema, "openai-text": OpenAIV1TextCompletionSchema, "openai-image": OpenAIV1ImagesGenerationSchema, "google-ai": GoogleAIV1GenerateContentSchema, "mistral-ai": MistralAIV1ChatCompletionsSchema, }; /** Transforms an incoming request body to one that matches the target API. */ export const transformOutboundPayload: RequestPreprocessor = async (req) => { const sameService = req.inboundApi === req.outboundApi; const alreadyTransformed = req.retryCount > 0; const notTransformable = !isTextGenerationRequest(req) && !isImageGenerationRequest(req); if (alreadyTransformed || notTransformable) return; if (sameService) { const result = VALIDATORS[req.inboundApi].safeParse(req.body); if (!result.success) { req.log.error( { issues: result.error.issues, body: req.body }, "Request validation failed" ); throw result.error; } req.body = result.data; return; } if (req.inboundApi === "openai" && req.outboundApi === "anthropic") { req.body = openaiToAnthropic(req); return; } if (req.inboundApi === "openai" && req.outboundApi === "google-ai") { req.body = openaiToGoogleAI(req); return; } if (req.inboundApi === "openai" && req.outboundApi === "openai-text") { req.body = openaiToOpenaiText(req); return; } if (req.inboundApi === "openai" && req.outboundApi === "openai-image") { req.body = openaiToOpenaiImage(req); return; } throw new Error( `'${req.inboundApi}' -> '${req.outboundApi}' request proxying is not supported. Make sure your client is configured to use the correct API.` ); }; function openaiToAnthropic(req: Request) { const { body } = req; const result = OpenAIV1ChatCompletionSchema.safeParse(body); if (!result.success) { req.log.warn( { issues: result.error.issues, body }, "Invalid OpenAI-to-Anthropic request" ); throw result.error; } req.headers["anthropic-version"] = "2023-06-01"; const { messages, ...rest } = result.data; const prompt = openAIMessagesToClaudePrompt(messages); let stops = rest.stop ? Array.isArray(rest.stop) ? rest.stop : [rest.stop] : []; // Recommended by Anthropic stops.push("\n\nHuman:"); // Helps with jailbreak prompts that send fake system messages and multi-bot // chats that prefix bot messages with "System: Respond as ". stops.push("\n\nSystem:"); // Remove duplicates stops = [...new Set(stops)]; return { model: rest.model, prompt: prompt, max_tokens_to_sample: rest.max_tokens, stop_sequences: stops, stream: rest.stream, temperature: rest.temperature, top_p: rest.top_p, }; } function openaiToOpenaiText(req: Request) { const { body } = req; const result = OpenAIV1ChatCompletionSchema.safeParse(body); if (!result.success) { req.log.warn( { issues: result.error.issues, body }, "Invalid OpenAI-to-OpenAI-text request" ); throw result.error; } const { messages, ...rest } = result.data; const prompt = flattenOpenAIChatMessages(messages); let stops = rest.stop ? Array.isArray(rest.stop) ? rest.stop : [rest.stop] : []; stops.push("\n\nUser:"); stops = [...new Set(stops)]; const transformed = { ...rest, prompt: prompt, stop: stops }; return OpenAIV1TextCompletionSchema.parse(transformed); } // Takes the last chat message and uses it verbatim as the image prompt. function openaiToOpenaiImage(req: Request) { const { body } = req; const result = OpenAIV1ChatCompletionSchema.safeParse(body); if (!result.success) { req.log.warn( { issues: result.error.issues, body }, "Invalid OpenAI-to-OpenAI-image request" ); throw result.error; } const { messages } = result.data; const prompt = messages.filter((m) => m.role === "user").pop()?.content; if (Array.isArray(prompt)) { throw new Error("Image generation prompt must be a text message."); } if (body.stream) { throw new Error( "Streaming is not supported for image generation requests." ); } // Some frontends do weird things with the prompt, like prefixing it with a // character name or wrapping the entire thing in quotes. We will look for // the index of "Image:" and use everything after that as the prompt. const index = prompt?.toLowerCase().indexOf("image:"); if (index === -1 || !prompt) { throw new Error( `Start your prompt with 'Image:' followed by a description of the image you want to generate (received: ${prompt}).` ); } // TODO: Add some way to specify parameters via chat message const transformed = { model: body.model.includes("dall-e") ? body.model : "dall-e-3", quality: "standard", size: "1024x1024", response_format: "url", prompt: prompt.slice(index! + 6).trim(), }; return OpenAIV1ImagesGenerationSchema.parse(transformed); } function openaiToGoogleAI( req: Request ): z.infer { const { body } = req; const result = OpenAIV1ChatCompletionSchema.safeParse({ ...body, model: "gpt-3.5-turbo", }); if (!result.success) { req.log.warn( { issues: result.error.issues, body }, "Invalid OpenAI-to-Google AI request" ); throw result.error; } const { messages, ...rest } = result.data; const foundNames = new Set(); const contents = messages .map((m) => { const role = m.role === "assistant" ? "model" : "user"; // Detects character names so we can set stop sequences for them as Gemini // is prone to continuing as the next character. // If names are not available, we'll still try to prefix the message // with generic names so we can set stops for them but they don't work // as well as real names. const text = flattenOpenAIMessageContent(m.content); const propName = m.name?.trim(); const textName = m.role === "system" ? "" : text.match(/^(.{0,50}?): /)?.[1]?.trim(); const name = propName || textName || (role === "model" ? "Character" : "User"); foundNames.add(name); // Prefixing messages with their character name seems to help avoid // Gemini trying to continue as the next character, or at the very least // ensures it will hit the stop sequence. Otherwise it will start a new // paragraph and switch perspectives. // The response will be very likely to include this prefix so frontends // will need to strip it out. const textPrefix = textName ? "" : `${name}: `; return { parts: [{ text: textPrefix + text }], role: m.role === "assistant" ? ("model" as const) : ("user" as const), }; }) .reduce((acc, msg) => { const last = acc[acc.length - 1]; if (last?.role === msg.role) { last.parts[0].text += "\n\n" + msg.parts[0].text; } else { acc.push(msg); } return acc; }, []); let stops = rest.stop ? Array.isArray(rest.stop) ? rest.stop : [rest.stop] : []; stops.push(...Array.from(foundNames).map((name) => `\n${name}:`)); stops = [...new Set(stops)].slice(0, 5); return { model: "gemini-pro", stream: rest.stream, contents, tools: [], generationConfig: { maxOutputTokens: rest.max_tokens, stopSequences: stops, topP: rest.top_p, topK: 40, // openai schema doesn't have this, google ai defaults to 40 temperature: rest.temperature, }, safetySettings: [ { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_HATE_SPEECH", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" }, ], }; } export function openAIMessagesToClaudePrompt(messages: OpenAIChatMessage[]) { return ( messages .map((m) => { let role: string = m.role; if (role === "assistant") { role = "Assistant"; } else if (role === "system") { role = "System"; } else if (role === "user") { role = "Human"; } const name = m.name?.trim(); const content = flattenOpenAIMessageContent(m.content); // https://console.anthropic.com/docs/prompt-design // `name` isn't supported by Anthropic but we can still try to use it. return `\n\n${role}: ${name ? `(as ${name}) ` : ""}${content}`; }) .join("") + "\n\nAssistant:" ); } function flattenOpenAIChatMessages(messages: OpenAIChatMessage[]) { // Temporary to allow experimenting with prompt strategies const PROMPT_VERSION: number = 1; switch (PROMPT_VERSION) { case 1: return ( messages .map((m) => { // Claude-style human/assistant turns let role: string = m.role; if (role === "assistant") { role = "Assistant"; } else if (role === "system") { role = "System"; } else if (role === "user") { role = "User"; } return `\n\n${role}: ${flattenOpenAIMessageContent(m.content)}`; }) .join("") + "\n\nAssistant:" ); case 2: return messages .map((m) => { // Claude without prefixes (except system) and no Assistant priming let role: string = ""; if (role === "system") { role = "System: "; } return `\n\n${role}${flattenOpenAIMessageContent(m.content)}`; }) .join(""); default: throw new Error(`Unknown prompt version: ${PROMPT_VERSION}`); } } function flattenOpenAIMessageContent( content: OpenAIChatMessage["content"] ): string { return Array.isArray(content) ? content .map((contentItem) => { if ("text" in contentItem) return contentItem.text; if ("image_url" in contentItem) return "[ Uploaded Image Omitted ]"; }) .join("\n") : content; }