adds azure rate limit auto-retry

2023-12-04 01:23:55 -06:00
parent fbdea30264
commit fdd824f0e4
7 changed files with 88 additions and 55 deletions
@@ -15,6 +15,8 @@ import crypto from "crypto";
 import type { Handler, Request } from "express";
 import { keyPool } from "../shared/key-management";
 import {
+  getAwsBedrockModelFamily,
+  getAzureOpenAIModelFamily,
  getClaudeModelFamily,
  getGooglePalmModelFamily,
  getOpenAIModelFamily,
@@ -136,11 +138,10 @@ function getPartitionForRequest(req: Request): ModelFamily {
  // they should be treated as separate queues.
  const model = req.body.model ?? "gpt-3.5-turbo";

-  // Weird special case for AWS because they serve multiple models from
+  // Weird special case for AWS/Azure because they serve multiple models from
  // different vendors, even if currently only one is supported.
-  if (req.service === "aws") {
-    return "aws-claude";
-  }
+  if (req.service === "aws") return getAwsBedrockModelFamily(model);
+  if (req.service === "azure") return getAzureOpenAIModelFamily(model);

  switch (req.outboundApi) {
    case "anthropic":
@@ -221,7 +222,11 @@ function processQueue() {

  reqs.filter(Boolean).forEach((req) => {
    if (req?.proceed) {
-      req.log.info({ retries: req.retryCount }, `Dequeuing request.`);
+      const modelFamily = getPartitionForRequest(req!);
+      req.log.info({
+        retries: req.retryCount,
+        partition: modelFamily,
+      }, `Dequeuing request.`);
      req.proceed();
    }
  });
@@ -415,6 +420,7 @@ function initStreaming(req: Request) {
    // Some clients have a broken SSE parser that doesn't handle comments
    // correctly. These clients can pass ?badSseParser=true to
    // disable comments in the SSE stream.
+    res.write(getHeartbeatPayload());
    return;
  }