Implement support for Anthropic keys and Claude API (khanon/oai-reverse-proxy!15)

2023-05-29 17:08:08 +00:00
parent 03aaa6daad
commit 2d93463247
23 changed files with 1530 additions and 656 deletions
@@ -17,7 +17,7 @@

 import type { Handler, Request } from "express";
 import { config, DequeueMode } from "../config";
-import { keyPool } from "../key-management";
+import { keyPool, SupportedModel } from "../key-management";
 import { logger } from "../logger";
 import { AGNAI_DOT_CHAT_IP } from "./rate-limit";

@@ -78,7 +78,7 @@ export function enqueue(req: Request) {
  // If the request opted into streaming, we need to register a heartbeat
  // handler to keep the connection alive while it waits in the queue. We
  // deregister the handler when the request is dequeued.
-  if (req.body.stream) {
+  if (req.body.stream === "true" || req.body.stream === true) {
    const res = req.res!;
    if (!res.headersSent) {
      initStreaming(req);
@@ -91,7 +91,7 @@ export function enqueue(req: Request) {
        const avgWait = Math.round(getEstimatedWaitTime() / 1000);
        const currentDuration = Math.round((Date.now() - req.startTime) / 1000);
        const debugMsg = `queue length: ${queue.length}; elapsed time: ${currentDuration}s; avg wait: ${avgWait}s`;
-        req.res!.write(buildFakeSseMessage("heartbeat", debugMsg));
+        req.res!.write(buildFakeSseMessage("heartbeat", debugMsg, req));
      }
    }, 10000);
  }
@@ -118,12 +118,24 @@ export function enqueue(req: Request) {
  }
 }

-export function dequeue(model: string): Request | undefined {
-  // TODO: This should be set by some middleware that checks the request body.
-  const modelQueue =
-    model === "gpt-4"
-      ? queue.filter((req) => req.body.model?.startsWith("gpt-4"))
-      : queue.filter((req) => !req.body.model?.startsWith("gpt-4"));
+export function dequeue(model: SupportedModel): Request | undefined {
+  const modelQueue = queue.filter((req) => {
+    const reqProvider = req.originalUrl.startsWith("/proxy/anthropic")
+      ? "anthropic"
+      : "openai";
+
+    // This sucks, but the `req.body.model` on Anthropic requests via the
+    // OpenAI-compat endpoint isn't actually claude-*, it's a fake gpt value.
+    // TODO: refactor model/service detection
+
+    if (model.startsWith("claude")) {
+      return reqProvider === "anthropic";
+    }
+    if (model.startsWith("gpt-4")) {
+      return reqProvider === "openai" && req.body.model?.startsWith("gpt-4");
+    }
+    return reqProvider === "openai" && req.body.model?.startsWith("gpt-3");
+  });

  if (modelQueue.length === 0) {
    return undefined;
@@ -172,6 +184,7 @@ function processQueue() {
  // the others, because we only track one rate limit per key.
  const gpt4Lockout = keyPool.getLockoutPeriod("gpt-4");
  const turboLockout = keyPool.getLockoutPeriod("gpt-3.5-turbo");
+  const claudeLockout = keyPool.getLockoutPeriod("claude-v1");

  const reqs: (Request | undefined)[] = [];
  if (gpt4Lockout === 0) {
@@ -180,6 +193,9 @@ function processQueue() {
  if (turboLockout === 0) {
    reqs.push(dequeue("gpt-3.5-turbo"));
  }
+  if (claudeLockout === 0) {
+    reqs.push(dequeue("claude-v1"));
+  }

  reqs.filter(Boolean).forEach((req) => {
    if (req?.proceed) {
@@ -266,7 +282,7 @@ export function createQueueMiddleware(proxyMiddleware: Handler): Handler {
        type: "proxy_error",
        message: err.message,
        stack: err.stack,
-        proxy_note: `Only one request per IP can be queued at a time. If you don't have another request queued, your IP may be in use by another user.`,
+        proxy_note: `Only one request can be queued at a time. If you don't have another request queued, your IP or user token might be in use by another request.`,
      });
    }
  };
@@ -281,7 +297,11 @@ function killQueuedRequest(req: Request) {
  try {
    const message = `Your request has been terminated by the proxy because it has been in the queue for more than 5 minutes. The queue is currently ${queue.length} requests long.`;
    if (res.headersSent) {
-      const fakeErrorEvent = buildFakeSseMessage("proxy queue error", message);
+      const fakeErrorEvent = buildFakeSseMessage(
+        "proxy queue error",
+        message,
+        req
+      );
      res.write(fakeErrorEvent);
      res.end();
    } else {
@@ -305,20 +325,38 @@ function initStreaming(req: Request) {
  res.write(": joining queue\n\n");
 }

-export function buildFakeSseMessage(type: string, string: string) {
-  const fakeEvent = {
-    id: "chatcmpl-" + type,
-    object: "chat.completion.chunk",
-    created: Date.now(),
-    model: "",
-    choices: [
-      {
-        delta: { content: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n` },
-        index: 0,
-        finish_reason: type,
-      },
-    ],
-  };
+export function buildFakeSseMessage(
+  type: string,
+  string: string,
+  req: Request
+) {
+  let fakeEvent;
+
+  if (req.api === "anthropic") {
+    // data: {"completion": " Here is a paragraph of lorem ipsum text:\n\nLorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor inc", "stop_reason": "max_tokens", "truncated": false, "stop": null, "model": "claude-instant-v1", "log_id": "???", "exception": null}
+    fakeEvent = {
+      completion: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`,
+      stop_reason: type,
+      truncated: false, // I've never seen this be true
+      stop: null,
+      model: req.body?.model,
+      log_id: "proxy-req-" + req.id,
+    };
+  } else {
+    fakeEvent = {
+      id: "chatcmpl-" + req.id,
+      object: "chat.completion.chunk",
+      created: Date.now(),
+      model: req.body?.model,
+      choices: [
+        {
+          delta: { content: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n` },
+          index: 0,
+          finish_reason: type,
+        },
+      ],
+    };
+  }
  return `data: ${JSON.stringify(fakeEvent)}\n\n`;
 }