Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23)

2023-06-09 07:16:40 +00:00
parent 4f2a12ef14
commit 96cf4a0e2d
8 changed files with 122 additions and 16 deletions
@@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
 import { handleProxyError } from "./middleware/common";
 import {
  addKey,
+  addAnthropicPreamble,
  createPreprocessorMiddleware,
  finalizeBody,
  languageFilter,
@@ -69,6 +70,7 @@ const rewriteAnthropicRequest = (
 ) => {
  const rewriterPipeline = [
    addKey,
+    addAnthropicPreamble,
    languageFilter,
    limitOutputTokens,
    finalizeBody,
@@ -0,0 +1,32 @@
+import { AnthropicKey, Key } from "../../../key-management";
+import { isCompletionRequest } from "../common";
+import { ProxyRequestMiddleware } from ".";
+
+/**
+ * Some keys require the prompt to start with `\n\nHuman:`. There is no way to
+ * know this without trying to send the request and seeing if it fails. If a
+ * key is marked as requiring a preamble, it will be added here.
+ */
+export const addAnthropicPreamble: ProxyRequestMiddleware = (
+  _proxyReq,
+  req
+) => {
+  if (!isCompletionRequest(req) || req.key?.service !== "anthropic") {
+    return;
+  }
+
+  let preamble = "";
+  let prompt = req.body.prompt;
+  assertAnthropicKey(req.key);
+  if (req.key.requiresPreamble) {
+    preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
+    req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt");
+  }
+  req.body.prompt = preamble + prompt;
+};
+
+function assertAnthropicKey(key: Key): asserts key is AnthropicKey {
+  if (key.service !== "anthropic") {
+    throw new Error(`Expected an Anthropic key, got '${key.service}'`);
+  }
+}
@@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload";

 // HPM middleware (runs on onProxyReq, cannot be async)
 export { addKey } from "./add-key";
+export { addAnthropicPreamble } from "./add-anthropic-preamble";
 export { finalizeBody } from "./finalize-body";
 export { languageFilter } from "./language-filter";
 export { limitCompletions } from "./limit-completions";
@@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) {
  // Remove duplicates
  stops = [...new Set(stops)];

-  // TEMP: More shitty anthropic API hacks
-  // If you receive a 400 Bad Request error from Anthropic complaining about
-  // "prompt must start with a '\n\nHuman: ' turn", enable this setting.
-  // I will try to fix this when I can identify why it only happens sometimes.
-  let preamble = "";
-  if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) {
-    preamble = "\n\nHuman: Hello Claude.";
-  }
-
  return {
    ...rest,
    model,
-    prompt: preamble + prompt,
+    prompt: prompt,
    max_tokens_to_sample: rest.max_tokens,
    stop_sequences: stops,
  };
@@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
 function reenqueueRequest(req: Request) {
  req.log.info(
    { key: req.key?.hash, retryCount: req.retryCount },
-    `Re-enqueueing request due to rate-limit error`
+    `Re-enqueueing request due to retryable error`
  );
  req.retryCount++;
  enqueue(req);
@@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (

  if (statusCode === 400) {
    // Bad request (likely prompt is too long)
-    errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
+    if (req.outboundApi === "openai") {
+      errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
+    } else if (req.outboundApi === "anthropic") {
+      maybeHandleMissingPreambleError(req, errorPayload);
+    }
  } else if (statusCode === 401) {
    // Key is invalid or was revoked
    keyPool.disable(req.key!);
@@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
    // OpenAI uses this for a bunch of different rate-limiting scenarios.
    if (req.outboundApi === "openai") {
      handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
-    } else {
+    } else if (req.outboundApi === "anthropic") {
      handleAnthropicRateLimitError(req, errorPayload);
    }
  } else if (statusCode === 404) {
@@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
  throw new Error(errorPayload.error?.message);
 };

+/**
+ * This is a workaround for a very strange issue where certain API keys seem to
+ * enforce more strict input validation than others -- specifically, they will
+ * require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
+ * being used as a generic text completion service and to enforce the use of
+ * the chat RLHF.  This is not documented anywhere, and it's not clear why some
+ * keys enforce this and others don't.
+ * This middleware checks for that specific error and marks the key as being
+ * one that requires the prefix, and then re-enqueues the request.
+ * The exact error is:
+ * ```
+ * {
+ *   "error": {
+ *     "type": "invalid_request_error",
+ *     "message": "prompt must start with \"\n\nHuman:\" turn"
+ *   }
+ * }
+ * ```
+ */
+function maybeHandleMissingPreambleError(
+  req: Request,
+  errorPayload: Record<string, any>
+) {
+  if (
+    errorPayload.error?.type === "invalid_request_error" &&
+    errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn'
+  ) {
+    req.log.warn(
+      { key: req.key?.hash },
+      "Request failed due to missing preamble. Key will be marked as such for subsequent requests."
+    );
+    keyPool.update(req.key!, { requiresPreamble: true });
+    if (config.queueMode !== "none") {
+      reenqueueRequest(req);
+      throw new RetryableError("Claude request re-enqueued to add preamble.");
+    }
+    errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
+  } else {
+    errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
+  }
+}
+
 function handleAnthropicRateLimitError(
  req: Request,
  errorPayload: Record<string, any>