default claude 2.1 instead of 1.3 in openai compat endpoint since 1.3 is not accessible on all keys

2024-01-17 02:36:21 -06:00
parent 4c2a2c1e6c
commit e813cd9d22
7 changed files with 10 additions and 26 deletions
@@ -5,6 +5,9 @@
 # All values have reasonable defaults, so you only need to change the ones you
 # want to override.

+# Use production mode unless you are developing locally.
+NODE_ENV=production
+
 # ------------------------------------------------------------------------------
 # General settings:

@@ -112,8 +115,7 @@

 # ------------------------------------------------------------------------------
 # Secrets and keys:
-# Do not put any passwords or API keys directly in this file.
-# For Huggingface, set them via the Secrets section in your Space's config UI.
+# For Huggingface, set them via the Secrets section in your Space's config UI. Dp not set them in .env.
 # For Render, create a "secret file" called .env using the Environment tab.

 # You can add multiple API keys by separating them with a comma.
@@ -173,16 +173,7 @@ anthropicRouter.post(
 function maybeReassignModel(req: Request) {
  const model = req.body.model;
  if (!model.startsWith("gpt-")) return;
-
-  const bigModel = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
-  const contextSize = req.promptTokens! + req.outputTokens!;
-  if (contextSize > 8500) {
-    req.log.debug(
-      { model: bigModel, contextSize },
-      "Using Claude 100k model for OpenAI-to-Anthropic request"
-    );
-    req.body.model = bigModel;
-  }
+  req.body.model = "claude-2.1";
 }

 export const anthropic = anthropicRouter;
@@ -279,11 +279,7 @@ function openaiToAnthropic(req: Request) {
  stops = [...new Set(stops)];

  return {
-    // Model may be overridden in `calculate-context-size.ts` to avoid having
-    // a circular dependency (`calculate-context-size.ts` needs an already-
-    // transformed request body to count tokens, but this function would like
-    // to know the count to select a model).
-    model: process.env.CLAUDE_SMALL_MODEL || "claude-v1",
+    model: rest.model,
    prompt: prompt,
    max_tokens_to_sample: rest.max_tokens,
    stop_sequences: stops,
@@ -37,6 +37,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
      break;
    case "mistral-ai":
      proxyMax = MISTRAL_AI_MAX_CONTENT;
+      break;
    case "openai-image":
      return;
    default:
@@ -85,9 +85,6 @@ export class SSEStreamAdapter extends Transform {
      }
    } else {
      const { bytes } = payload;
-      // technically this is a transformation but we don't really distinguish
-      // between aws claude and anthropic claude at the APIFormat level, so
-      // these will short circuit the message transformer
      return [
        "event: completion",
        `data: ${Buffer.from(bytes, "base64").toString("utf8")}`,
@@ -95,8 +92,7 @@ export class SSEStreamAdapter extends Transform {
    }
  }

-  // Google doesn't use event streams and just sends elements in an array over
-  // a long-lived HTTP connection. Needs stream-json to parse the array.
+  /** Processes an incoming array element from the Google AI JSON stream. */
  protected processGoogleValue(value: any): string | null {
    try {
      const candidates = value.candidates ?? [{}];
@@ -163,9 +163,7 @@ const openaiProxy = createQueueMiddleware({
    selfHandleResponse: true,
    logger,
    on: {
-      proxyReq: createOnProxyReqHandler({
-        pipeline: [addKey, finalizeBody],
-      }),
+      proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody], }),
      proxyRes: createOnProxyResHandler([openaiResponseHandler]),
      error: handleProxyError,
    },
@@ -527,7 +527,7 @@ function monitorHeartbeat(req: Request) {
    if (bytesSinceLast < minBytes) {
      req.log.warn(
        { minBytes, bytesSinceLast },
-        "Queued request is processing heartbeats enough data or server is overloaded; killing connection."
+        "Queued request is not processing heartbeats enough data or server is overloaded; killing connection."
      );
      res.destroy();
    }