adds preliminary openai o1 support and some improvements to openai keychecker

2024-09-12 22:55:45 -05:00
parent 86772ab32a
commit 6a908b09cb
18 changed files with 197 additions and 268 deletions
@@ -68,6 +68,10 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 131072;
  } else if (model.match(/^gpt-4(-\d{4})?-vision(-preview)?$/)) {
    modelMax = 131072;
+  } else if (model.match(/^o1-mini(-\d{4}-\d{2}-\d{2})?$/)) {
+    modelMax = 128000;
+  } else if (model.match(/^o1(-preview)?(-\d{4}-\d{2}-\d{2})?$/)) {
+    modelMax = 128000;
  } else if (model.match(/gpt-3.5-turbo/)) {
    modelMax = 16384;
  } else if (model.match(/gpt-4-32k/)) {
@@ -212,8 +212,12 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
    delete errorPayload.message;
  } else if (service === "gcp") {
    // Try to standardize the error format for GCP
-    if (errorPayload.error?.code) { // GCP Error
-      errorPayload.error = { message: errorPayload.error.message, type: errorPayload.error.status || errorPayload.error.code };
+    if (errorPayload.error?.code) {
+      // GCP Error
+      errorPayload.error = {
+        message: errorPayload.error.message,
+        type: errorPayload.error.status || errorPayload.error.code,
+      };
    }
  }

@@ -231,7 +235,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
          // same 429 billing error that other models return.
          await handleOpenAIRateLimitError(req, errorPayload);
        } else {
-          errorPayload.proxy_note = `The upstream API rejected the request. Your prompt may be too long for ${req.body?.model}.`;
+          errorPayload.proxy_note = `The upstream API rejected the request. Check the error message for details.`;
        }
        break;
      case "anthropic":
@@ -293,8 +297,8 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
            errorPayload.proxy_note = `Received 403 error. Key may be invalid.`;
        }
        return;
-        case "mistral-ai":
-        case "gcp":
+      case "mistral-ai":
+      case "gcp":
        keyPool.disable(req.key!, "revoked");
        errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`;
        return;
@@ -688,15 +692,23 @@ const countResponseTokens: ProxyResHandlerWithBody = async (
    const completion = getCompletionFromBody(req, body);
    const tokens = await countTokens({ req, completion, service });

+    if (req.service === "openai" || req.service === "azure") {
+      // O1 consumes (a significant amount of) invisible tokens for the chain-
+      // of-thought reasoning. We have no way to count these other than to check
+      // the response body.
+      tokens.reasoning_tokens =
+        body.usage?.completion_tokens_details?.reasoning_tokens;
+    }
+
    req.log.debug(
-      { service, tokens, prevOutputTokens: req.outputTokens },
+      { service, prevOutputTokens: req.outputTokens, tokens },
      `Counted tokens for completion`
    );
    if (req.tokenizerInfo) {
      req.tokenizerInfo.completion_tokens = tokens;
    }

-    req.outputTokens = tokens.token_count;
+    req.outputTokens = tokens.token_count + (tokens.reasoning_tokens ?? 0);
  } catch (error) {
    req.log.warn(
      error,