refactors api transformers and adds oai->anthropic chat api translation

2024-03-08 20:59:19 -06:00
parent 8d84f289b2
commit fab404b232
12 changed files with 440 additions and 142 deletions
@@ -1,11 +1,11 @@
 import { z } from "zod";
-import { Request } from "express";
 import { config } from "../../config";
 import {
  flattenOpenAIMessageContent,
  OpenAIChatMessage,
  OpenAIV1ChatCompletionSchema,
 } from "./openai";
+import { APIFormatTransformer } from "./index";

 const CLAUDE_OUTPUT_MAX = config.maxOutputTokensAnthropic;

@@ -69,9 +69,7 @@ export type AnthropicChatMessage = z.infer<
  typeof AnthropicV1MessagesSchema
 >["messages"][0];

-export function openAIMessagesToClaudeTextPrompt(
-  messages: OpenAIChatMessage[]
-) {
+function openAIMessagesToClaudeTextPrompt(messages: OpenAIChatMessage[]) {
  return (
    messages
      .map((m) => {
@@ -93,7 +91,44 @@ export function openAIMessagesToClaudeTextPrompt(
  );
 }

-export function openAIToAnthropicText(req: Request) {
+export const transformOpenAIToAnthropicChat: APIFormatTransformer<
+  typeof AnthropicV1MessagesSchema
+> = async (req) => {
+  const { body } = req;
+  const result = OpenAIV1ChatCompletionSchema.safeParse(body);
+  if (!result.success) {
+    req.log.warn(
+      { issues: result.error.issues, body },
+      "Invalid OpenAI-to-Anthropic Chat request"
+    );
+    throw result.error;
+  }
+
+  req.headers["anthropic-version"] = "2023-06-01";
+
+  const { messages, ...rest } = result.data;
+  const { messages: newMessages, system } =
+    openAIMessagesToClaudeChatPrompt(messages);
+
+  return {
+    system,
+    messages: newMessages,
+    model: rest.model,
+    max_tokens: rest.max_tokens,
+    stream: rest.stream,
+    temperature: rest.temperature,
+    top_p: rest.top_p,
+    stop_sequences: typeof rest.stop === "string" ? [rest.stop] : rest.stop,
+    ...(rest.user ? { metadata: { user_id: rest.user } } : {}),
+    // Anthropic supports top_k, but OpenAI does not
+    // OpenAI supports frequency_penalty, presence_penalty, logit_bias, n, seed,
+    // and function calls, but Anthropic does not.
+  };
+};
+
+export const transformOpenAIToAnthropicText: APIFormatTransformer<
+  typeof AnthropicV1TextSchema
+> = async (req) => {
  const { body } = req;
  const result = OpenAIV1ChatCompletionSchema.safeParse(body);
  if (!result.success) {
@@ -131,13 +166,15 @@ export function openAIToAnthropicText(req: Request) {
    temperature: rest.temperature,
    top_p: rest.top_p,
  };
-}
+};

 /**
 * Converts an older Anthropic Text Completion prompt to the newer Messages API
 * by splitting the flat text into messages.
 */
-export function anthropicTextToAnthropicChat(req: Request) {
+export const transformAnthropicTextToAnthropicChat: APIFormatTransformer<
+  typeof AnthropicV1MessagesSchema
+> = async (req) => {
  const { body } = req;
  const result = AnthropicV1TextSchema.safeParse(body);
  if (!result.success) {
@@ -163,8 +200,8 @@ export function anthropicTextToAnthropicChat(req: Request) {
  while (remaining) {
    const isHuman = remaining.startsWith("\n\nHuman:");

-    // TODO: Are multiple consecutive human or assistant messages allowed?
-    // Currently we will enforce alternating turns.
+    // Multiple messages from the same role are not permitted in Messages API.
+    // We collect all messages until the next message from the opposite role.
    const thisRole = isHuman ? "\n\nHuman:" : "\n\nAssistant:";
    const nextRole = isHuman ? "\n\nAssistant:" : "\n\nHuman:";
    const nextIndex = remaining.indexOf(nextRole);
@@ -199,7 +236,7 @@ export function anthropicTextToAnthropicChat(req: Request) {
    max_tokens: max_tokens_to_sample,
    ...rest,
  };
-}
+};

 function validateAnthropicTextPrompt(prompt: string) {
  if (!prompt.includes("\n\nHuman:") || !prompt.includes("\n\nAssistant:")) {
@@ -236,3 +273,167 @@ export function flattenAnthropicMessages(
    })
    .join("\n\n");
 }
+
+/**
+ * Represents the union of all content types without the `string` shorthand
+ * for `text` content.
+ */
+type AnthropicChatMessageContentWithoutString = Exclude<
+  AnthropicChatMessage["content"],
+  string
+>;
+/** Represents a message with all shorthand `string` content expanded. */
+type ConvertedAnthropicChatMessage = AnthropicChatMessage & {
+  content: AnthropicChatMessageContentWithoutString;
+};
+
+function openAIMessagesToClaudeChatPrompt(messages: OpenAIChatMessage[]): {
+  messages: AnthropicChatMessage[];
+  system: string;
+} {
+  // Similar formats, but Claude doesn't use `name` property and doesn't have
+  // a `system` role.  Also, Claude does not allow consecutive messages from
+  // the same role, so we need to merge them.
+  // 1. Collect all system messages up to the first non-system message and set
+  // that as the `system` prompt.
+  // 2. Iterate through messages and:
+  //   - If the message is from system, reassign it to assistant with System:
+  //     prefix.
+  //   - If message is from same role as previous, append it to the previous
+  //     message rather than creating a new one.
+  //   - Otherwise, create a new message and prefix with `name` if present.
+
+  // TODO: When a Claude message has multiple `text` contents, does the internal
+  // message flattening insert newlines between them?  If not, we may need to
+  // do that here...
+
+  let firstNonSystem = -1;
+  const result: { messages: ConvertedAnthropicChatMessage[]; system: string } =
+    { messages: [], system: "" };
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i];
+    const isSystem = isSystemOpenAIRole(msg.role);
+
+    if (firstNonSystem === -1 && isSystem) {
+      // Still merging initial system messages into the system prompt
+      result.system += getFirstTextContent(msg.content) + "\n";
+      continue;
+    }
+
+    if (firstNonSystem === -1 && !isSystem) {
+      // Encountered the first non-system message
+      firstNonSystem = i;
+
+      if (msg.role === "assistant") {
+        // There is an annoying rule that the first message must be from the user.
+        // This is commonly not the case with roleplay prompts that start with a
+        // block of system messages followed by an assistant message. We will try
+        // to reconcile this by splicing the last line of the system prompt into
+        // a beginning user message -- this is *commonly* ST's [Start a new chat]
+        // nudge, which works okay as a user message.
+
+        // Find the last non-empty line in the system prompt
+        const execResult = /(?:[^\r\n]*\r?\n)*([^\r\n]+)(?:\r?\n)*/d.exec(
+          result.system
+        );
+
+        let text = "";
+        if (execResult) {
+          text = execResult[1];
+          // Remove last line from system so it doesn't get duplicated
+          const [_, [lastLineStart]] = execResult.indices || [];
+          result.system = result.system.slice(0, lastLineStart);
+        } else {
+          // This is a bad prompt; there's no system content to move to user and
+          // it starts with assistant. We don't have any good options.
+          text = "[ Joining chat... ]";
+        }
+
+        result.messages.push({
+          role: "user",
+          content: [{ type: "text", text }],
+        });
+      }
+    }
+
+    const last = result.messages[result.messages.length - 1];
+    // I have to handle tools as system messages to be exhaustive here but the
+    // experience will be bad.
+    const role = isSystemOpenAIRole(msg.role) ? "assistant" : msg.role;
+
+    // Here we will lose the original name if it was a system message, but that
+    // is generally okay because the system message is usually a prompt and not
+    // a character in the chat.
+    const name = msg.role === "system" ? "System" : msg.name?.trim();
+    const content = convertOpenAIContent(msg.content);
+
+    // Prepend the display name to the first text content in the current message
+    // if it exists. We don't need to add the name to every content block.
+    if (name?.length) {
+      const firstTextContent = content.find((c) => c.type === "text");
+      if (firstTextContent && "text" in firstTextContent) {
+        // This mutates the element in `content`.
+        firstTextContent.text = `${name}: ${firstTextContent.text}`;
+      }
+    }
+
+    // Merge messages if necessary. If two assistant roles are consecutive but
+    // had different names, the final converted assistant message will have
+    // multiple characters in it, but the name prefixes should assist the model
+    // in differentiating between speakers.
+    if (last && last.role === role) {
+      last.content.push(...content);
+    } else {
+      result.messages.push({ role, content });
+    }
+  }
+
+  result.system = result.system.trimEnd();
+  return result;
+}
+
+function isSystemOpenAIRole(
+  role: OpenAIChatMessage["role"]
+): role is "system" | "function" | "tool" {
+  return ["system", "function", "tool"].includes(role);
+}
+
+function getFirstTextContent(content: OpenAIChatMessage["content"]) {
+  if (typeof content === "string") return content;
+  for (const c of content) {
+    if ("text" in c) return c.text;
+  }
+  return "[ No text content in this message ]";
+}
+
+function convertOpenAIContent(
+  content: OpenAIChatMessage["content"]
+): AnthropicChatMessageContentWithoutString {
+  if (typeof content === "string") {
+    return [{ type: "text", text: content.trimEnd() }];
+  }
+
+  return content.map((c) => {
+    if ("text" in c) {
+      return { type: "text", text: c.text.trimEnd() };
+    } else if ("image_url" in c) {
+      const url = c.image_url.url;
+      try {
+        const mimeType = url.split(";")[0].split(":")[1];
+        const data = url.split(",")[1];
+        return {
+          type: "image",
+          source: { type: "base64", media_type: mimeType, data },
+        };
+      } catch (e) {
+        return {
+          type: "text",
+          text: `[ Unsupported image URL: ${url.slice(0, 200)} ]`,
+        };
+      }
+    } else {
+      const type = String((c as any)?.type);
+      return { type: "text", text: `[ Unsupported content type: ${type} ]` };
+    }
+  });
+}
@@ -1,9 +1,9 @@
 import { z } from "zod";
-import { Request } from "express";
 import {
  flattenOpenAIMessageContent,
  OpenAIV1ChatCompletionSchema,
 } from "./openai";
+import { APIFormatTransformer } from "./index";

 // https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent
 export const GoogleAIV1GenerateContentSchema = z
@@ -14,7 +14,7 @@ export const GoogleAIV1GenerateContentSchema = z
      z.object({
        parts: z.array(z.object({ text: z.string() })),
        role: z.enum(["user", "model"]),
-      }),
+      })
    ),
    tools: z.array(z.object({})).max(0).optional(),
    safetySettings: z.array(z.object({})).max(0).optional(),
@@ -37,9 +37,9 @@ export type GoogleAIChatMessage = z.infer<
  typeof GoogleAIV1GenerateContentSchema
 >["contents"][0];

-export function openAIToGoogleAI(
-  req: Request,
-): z.infer<typeof GoogleAIV1GenerateContentSchema> {
+export const transformOpenAIToGoogleAI: APIFormatTransformer<
+  typeof GoogleAIV1GenerateContentSchema
+> = async (req) => {
  const { body } = req;
  const result = OpenAIV1ChatCompletionSchema.safeParse({
    ...body,
@@ -48,7 +48,7 @@ export function openAIToGoogleAI(
  if (!result.success) {
    req.log.warn(
      { issues: result.error.issues, body },
-      "Invalid OpenAI-to-Google AI request",
+      "Invalid OpenAI-to-Google AI request"
    );
    throw result.error;
  }
@@ -121,4 +121,4 @@ export function openAIToGoogleAI(
      { category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" },
    ],
  };
-}
+};
@@ -1,18 +1,57 @@
+import type { Request } from "express";
 import { z } from "zod";
 import { APIFormat } from "../key-management";
-import { AnthropicV1TextSchema, AnthropicV1MessagesSchema } from "./anthropic";
+import {
+  AnthropicV1TextSchema,
+  AnthropicV1MessagesSchema,
+  transformAnthropicTextToAnthropicChat,
+  transformOpenAIToAnthropicText,
+  transformOpenAIToAnthropicChat,
+} from "./anthropic";
 import { OpenAIV1ChatCompletionSchema } from "./openai";
-import { OpenAIV1TextCompletionSchema } from "./openai-text";
-import { OpenAIV1ImagesGenerationSchema } from "./openai-image";
-import { GoogleAIV1GenerateContentSchema } from "./google-ai";
+import {
+  OpenAIV1TextCompletionSchema,
+  transformOpenAIToOpenAIText,
+} from "./openai-text";
+import {
+  OpenAIV1ImagesGenerationSchema,
+  transformOpenAIToOpenAIImage,
+} from "./openai-image";
+import {
+  GoogleAIV1GenerateContentSchema,
+  transformOpenAIToGoogleAI,
+} from "./google-ai";
 import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";

 export { OpenAIChatMessage } from "./openai";
-export { AnthropicChatMessage, flattenAnthropicMessages } from "./anthropic";
+export {
+  AnthropicChatMessage,
+  AnthropicV1TextSchema,
+  AnthropicV1MessagesSchema,
+  flattenAnthropicMessages,
+} from "./anthropic";
 export { GoogleAIChatMessage } from "./google-ai";
 export { MistralAIChatMessage } from "./mistral-ai";

-export const API_SCHEMA_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
+type APIPair = `${APIFormat}->${APIFormat}`;
+type TransformerMap = {
+  [key in APIPair]?: APIFormatTransformer<any>;
+};
+
+export type APIFormatTransformer<Z extends z.ZodType<any, any>> = (
+  req: Request
+) => Promise<z.infer<Z>>;
+
+export const API_REQUEST_TRANSFORMERS: TransformerMap = {
+  "anthropic-text->anthropic-chat": transformAnthropicTextToAnthropicChat,
+  "openai->anthropic-chat": transformOpenAIToAnthropicChat,
+  "openai->anthropic-text": transformOpenAIToAnthropicText,
+  "openai->openai-text": transformOpenAIToOpenAIText,
+  "openai->openai-image": transformOpenAIToOpenAIImage,
+  "openai->google-ai": transformOpenAIToGoogleAI,
+};
+
+export const API_REQUEST_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
  "anthropic-chat": AnthropicV1MessagesSchema,
  "anthropic-text": AnthropicV1TextSchema,
  openai: OpenAIV1ChatCompletionSchema,
@@ -1,6 +1,6 @@
 import { z } from "zod";
-import { Request } from "express";
 import { OpenAIV1ChatCompletionSchema } from "./openai";
+import { APIFormatTransformer } from "./index";

 // https://platform.openai.com/docs/api-reference/images/create
 export const OpenAIV1ImagesGenerationSchema = z
@@ -20,47 +20,49 @@ export const OpenAIV1ImagesGenerationSchema = z
  .strip();

 // Takes the last chat message and uses it verbatim as the image prompt.
-export function openAIToOpenAIImage(req: Request) {
-    const { body } = req;
-    const result = OpenAIV1ChatCompletionSchema.safeParse(body);
-    if (!result.success) {
-        req.log.warn(
-          { issues: result.error.issues, body },
-          "Invalid OpenAI-to-OpenAI-image request",
-        );
-        throw result.error;
-    }
+export const transformOpenAIToOpenAIImage: APIFormatTransformer<
+  typeof OpenAIV1ImagesGenerationSchema
+> = async (req) => {
+  const { body } = req;
+  const result = OpenAIV1ChatCompletionSchema.safeParse(body);
+  if (!result.success) {
+    req.log.warn(
+      { issues: result.error.issues, body },
+      "Invalid OpenAI-to-OpenAI-image request"
+    );
+    throw result.error;
+  }

-    const { messages } = result.data;
-    const prompt = messages.filter((m) => m.role === "user").pop()?.content;
-    if (Array.isArray(prompt)) {
-        throw new Error("Image generation prompt must be a text message.");
-    }
+  const { messages } = result.data;
+  const prompt = messages.filter((m) => m.role === "user").pop()?.content;
+  if (Array.isArray(prompt)) {
+    throw new Error("Image generation prompt must be a text message.");
+  }

-    if (body.stream) {
-        throw new Error(
-          "Streaming is not supported for image generation requests.",
-        );
-    }
+  if (body.stream) {
+    throw new Error(
+      "Streaming is not supported for image generation requests."
+    );
+  }

-    // Some frontends do weird things with the prompt, like prefixing it with a
-    // character name or wrapping the entire thing in quotes. We will look for
-    // the index of "Image:" and use everything after that as the prompt.
+  // Some frontends do weird things with the prompt, like prefixing it with a
+  // character name or wrapping the entire thing in quotes. We will look for
+  // the index of "Image:" and use everything after that as the prompt.

-    const index = prompt?.toLowerCase().indexOf("image:");
-    if (index === -1 || !prompt) {
-        throw new Error(
-          `Start your prompt with 'Image:' followed by a description of the image you want to generate (received: ${prompt}).`,
-        );
-    }
+  const index = prompt?.toLowerCase().indexOf("image:");
+  if (index === -1 || !prompt) {
+    throw new Error(
+      `Start your prompt with 'Image:' followed by a description of the image you want to generate (received: ${prompt}).`
+    );
+  }

-    // TODO: Add some way to specify parameters via chat message
-    const transformed = {
-        model: body.model.includes("dall-e") ? body.model : "dall-e-3",
-        quality: "standard",
-        size: "1024x1024",
-        response_format: "url",
-        prompt: prompt.slice(index! + 6).trim(),
-    };
-    return OpenAIV1ImagesGenerationSchema.parse(transformed);
-}
+  // TODO: Add some way to specify parameters via chat message
+  const transformed = {
+    model: body.model.includes("dall-e") ? body.model : "dall-e-3",
+    quality: "standard",
+    size: "1024x1024",
+    response_format: "url",
+    prompt: prompt.slice(index! + 6).trim(),
+  };
+  return OpenAIV1ImagesGenerationSchema.parse(transformed);
+};
@@ -3,7 +3,7 @@ import {
  flattenOpenAIChatMessages,
  OpenAIV1ChatCompletionSchema,
 } from "./openai";
-import { Request } from "express";
+import { APIFormatTransformer } from "./index";

 export const OpenAIV1TextCompletionSchema = z
  .object({
@@ -29,7 +29,9 @@ export const OpenAIV1TextCompletionSchema = z
  .strip()
  .merge(OpenAIV1ChatCompletionSchema.omit({ messages: true, logprobs: true }));

-export function openAIToOpenAIText(req: Request) {
+export const transformOpenAIToOpenAIText: APIFormatTransformer<
+  typeof OpenAIV1TextCompletionSchema
+> = async (req) => {
  const { body } = req;
  const result = OpenAIV1ChatCompletionSchema.safeParse(body);
  if (!result.success) {
@@ -53,4 +55,4 @@ export function openAIToOpenAIText(req: Request) {

  const transformed = { ...rest, prompt: prompt, stop: stops };
  return OpenAIV1TextCompletionSchema.parse(transformed);
-}
+};
@@ -338,12 +338,13 @@ function refreshAllQuotas() {
 // store to sync it with Firebase when it changes. Will refactor to abstract
 // persistence layer later so we can support multiple stores.
 let firebaseTimeout: NodeJS.Timeout | undefined;
+const USERS_REF = process.env.FIREBASE_USERS_REF_NAME ?? "users";

 async function initFirebase() {
  log.info("Connecting to Firebase...");
  const app = getFirebaseApp();
  const db = admin.database(app);
-  const usersRef = db.ref("users");
+  const usersRef = db.ref(USERS_REF);
  const snapshot = await usersRef.once("value");
  const users: Record<string, User> | null = snapshot.val();
  firebaseTimeout = setInterval(flushUsers, 20 * 1000);
@@ -362,7 +363,7 @@ async function initFirebase() {
 async function flushUsers() {
  const app = getFirebaseApp();
  const db = admin.database(app);
-  const usersRef = db.ref("users");
+  const usersRef = db.ref(USERS_REF);
  const updates: Record<string, User> = {};
  const deletions = [];