From e813cd9d22cdbeadf398c2fff37c388cec0d6a46 Mon Sep 17 00:00:00 2001
From: nai-degen <khoners@protonmail.com>
Date: Wed, 17 Jan 2024 02:36:21 -0600
Subject: [PATCH] default claude 2.1 instead of 1.3 in openai compat endpoint
 since 1.3 is not accessible on all keys

---
 .env.example                                          |  6 ++++--
 src/proxy/anthropic.ts                                | 11 +----------
 .../preprocessors/transform-outbound-payload.ts       |  6 +-----
 .../request/preprocessors/validate-context-size.ts    |  1 +
 .../response/streaming/sse-stream-adapter.ts          |  6 +-----
 src/proxy/openai.ts                                   |  4 +---
 src/proxy/queue.ts                                    |  2 +-
 7 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/.env.example b/.env.example
index b523395..62abbae 100644
--- a/.env.example
+++ b/.env.example
@@ -5,6 +5,9 @@
 # All values have reasonable defaults, so you only need to change the ones you
 # want to override.
 
+# Use production mode unless you are developing locally.
+NODE_ENV=production
+
 # ------------------------------------------------------------------------------
 # General settings:
 
@@ -112,8 +115,7 @@
 
 # ------------------------------------------------------------------------------
 # Secrets and keys:
-# Do not put any passwords or API keys directly in this file.
-# For Huggingface, set them via the Secrets section in your Space's config UI.
+# For Huggingface, set them via the Secrets section in your Space's config UI. Dp not set them in .env.
 # For Render, create a "secret file" called .env using the Environment tab.
 
 # You can add multiple API keys by separating them with a comma.
diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts
index ab4743e..2f8a133 100644
--- a/src/proxy/anthropic.ts
+++ b/src/proxy/anthropic.ts
@@ -173,16 +173,7 @@ anthropicRouter.post(
 function maybeReassignModel(req: Request) {
   const model = req.body.model;
   if (!model.startsWith("gpt-")) return;
-
-  const bigModel = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
-  const contextSize = req.promptTokens! + req.outputTokens!;
-  if (contextSize > 8500) {
-    req.log.debug(
-      { model: bigModel, contextSize },
-      "Using Claude 100k model for OpenAI-to-Anthropic request"
-    );
-    req.body.model = bigModel;
-  }
+  req.body.model = "claude-2.1";
 }
 
 export const anthropic = anthropicRouter;
diff --git a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts
index 36502c4..e355111 100644
--- a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts
+++ b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts
@@ -279,11 +279,7 @@ function openaiToAnthropic(req: Request) {
   stops = [...new Set(stops)];
 
   return {
-    // Model may be overridden in `calculate-context-size.ts` to avoid having
-    // a circular dependency (`calculate-context-size.ts` needs an already-
-    // transformed request body to count tokens, but this function would like
-    // to know the count to select a model).
-    model: process.env.CLAUDE_SMALL_MODEL || "claude-v1",
+    model: rest.model,
     prompt: prompt,
     max_tokens_to_sample: rest.max_tokens,
     stop_sequences: stops,
diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts
index aa42636..69c1061 100644
--- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts
+++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts
@@ -37,6 +37,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
       break;
     case "mistral-ai":
       proxyMax = MISTRAL_AI_MAX_CONTENT;
+      break;
     case "openai-image":
       return;
     default:
diff --git a/src/proxy/middleware/response/streaming/sse-stream-adapter.ts b/src/proxy/middleware/response/streaming/sse-stream-adapter.ts
index 92ade02..31ebef7 100644
--- a/src/proxy/middleware/response/streaming/sse-stream-adapter.ts
+++ b/src/proxy/middleware/response/streaming/sse-stream-adapter.ts
@@ -85,9 +85,6 @@ export class SSEStreamAdapter extends Transform {
       }
     } else {
       const { bytes } = payload;
-      // technically this is a transformation but we don't really distinguish
-      // between aws claude and anthropic claude at the APIFormat level, so
-      // these will short circuit the message transformer
       return [
         "event: completion",
         `data: ${Buffer.from(bytes, "base64").toString("utf8")}`,
@@ -95,8 +92,7 @@ export class SSEStreamAdapter extends Transform {
     }
   }
 
-  // Google doesn't use event streams and just sends elements in an array over
-  // a long-lived HTTP connection. Needs stream-json to parse the array.
+  /** Processes an incoming array element from the Google AI JSON stream. */
   protected processGoogleValue(value: any): string | null {
     try {
       const candidates = value.candidates ?? [{}];
diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts
index 48eb07e..a20c6d3 100644
--- a/src/proxy/openai.ts
+++ b/src/proxy/openai.ts
@@ -163,9 +163,7 @@ const openaiProxy = createQueueMiddleware({
     selfHandleResponse: true,
     logger,
     on: {
-      proxyReq: createOnProxyReqHandler({
-        pipeline: [addKey, finalizeBody],
-      }),
+      proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody], }),
       proxyRes: createOnProxyResHandler([openaiResponseHandler]),
       error: handleProxyError,
     },
diff --git a/src/proxy/queue.ts b/src/proxy/queue.ts
index 4c86fcf..b980ff4 100644
--- a/src/proxy/queue.ts
+++ b/src/proxy/queue.ts
@@ -527,7 +527,7 @@ function monitorHeartbeat(req: Request) {
     if (bytesSinceLast < minBytes) {
       req.log.warn(
         { minBytes, bytesSinceLast },
-        "Queued request is processing heartbeats enough data or server is overloaded; killing connection."
+        "Queued request is not processing heartbeats enough data or server is overloaded; killing connection."
       );
       res.destroy();
     }