From d979edbc0aa2531cce55b40c0b81a78e7b147009 Mon Sep 17 00:00:00 2001 From: nai-degen Date: Sat, 3 Jun 2023 23:11:45 -0500 Subject: [PATCH] trying to figure out why it's selecting incorrect model --- src/proxy/middleware/request/add-key.ts | 2 -- .../middleware/request/transform-outbound-payload.ts | 12 ++++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/proxy/middleware/request/add-key.ts b/src/proxy/middleware/request/add-key.ts index 08dbab3..df06858 100644 --- a/src/proxy/middleware/request/add-key.ts +++ b/src/proxy/middleware/request/add-key.ts @@ -41,8 +41,6 @@ export const addKey: ProxyRequestMiddleware = (proxyReq, req) => { // For such cases, ignore the requested model entirely. if (req.inboundApi === "openai" && req.outboundApi === "anthropic") { req.log.debug("Using an Anthropic key for an OpenAI-compatible request"); - // We don't assign the model here, that will happen when transforming the - // request body. assignedKey = keyPool.get("claude-v1"); } else { assignedKey = keyPool.get(req.body.model); diff --git a/src/proxy/middleware/request/transform-outbound-payload.ts b/src/proxy/middleware/request/transform-outbound-payload.ts index de87a77..7f416aa 100644 --- a/src/proxy/middleware/request/transform-outbound-payload.ts +++ b/src/proxy/middleware/request/transform-outbound-payload.ts @@ -8,7 +8,7 @@ import { OpenAIPromptMessage } from "../../../tokenization/openai"; * The maximum number of tokens an Anthropic prompt can have before we switch to * the larger claude-100k context model. */ -const CLAUDE_100K_THRESHOLD = 8200; +const CLAUDE_100K_TOKEN_THRESHOLD = 8200; // https://console.anthropic.com/docs/api/reference#-v1-complete const AnthropicV1CompleteSchema = z.object({ @@ -127,8 +127,16 @@ function openaiToAnthropic(body: any, req: Request) { const CLAUDE_BIG = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k"; const CLAUDE_SMALL = process.env.CLAUDE_SMALL_MODEL || "claude-v1"; + const contextTokens = Number(req.promptTokens ?? 0) + Number(rest.max_tokens); const model = - req.promptTokens ?? 0 > CLAUDE_100K_THRESHOLD ? CLAUDE_BIG : CLAUDE_SMALL; + contextTokens ?? 0 > CLAUDE_100K_TOKEN_THRESHOLD + ? CLAUDE_BIG + : CLAUDE_SMALL; + + req.log.debug( + { contextTokens, model, CLAUDE_100K_TOKEN_THRESHOLD }, + "Selected Claude model" + ); let stops = rest.stop ? Array.isArray(rest.stop)