From e813cd9d22cdbeadf398c2fff37c388cec0d6a46 Mon Sep 17 00:00:00 2001 From: nai-degen Date: Wed, 17 Jan 2024 02:36:21 -0600 Subject: [PATCH] default claude 2.1 instead of 1.3 in openai compat endpoint since 1.3 is not accessible on all keys --- .env.example | 6 ++++-- src/proxy/anthropic.ts | 11 +---------- .../preprocessors/transform-outbound-payload.ts | 6 +----- .../request/preprocessors/validate-context-size.ts | 1 + .../response/streaming/sse-stream-adapter.ts | 6 +----- src/proxy/openai.ts | 4 +--- src/proxy/queue.ts | 2 +- 7 files changed, 10 insertions(+), 26 deletions(-) diff --git a/.env.example b/.env.example index b523395..62abbae 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,9 @@ # All values have reasonable defaults, so you only need to change the ones you # want to override. +# Use production mode unless you are developing locally. +NODE_ENV=production + # ------------------------------------------------------------------------------ # General settings: @@ -112,8 +115,7 @@ # ------------------------------------------------------------------------------ # Secrets and keys: -# Do not put any passwords or API keys directly in this file. -# For Huggingface, set them via the Secrets section in your Space's config UI. +# For Huggingface, set them via the Secrets section in your Space's config UI. Dp not set them in .env. # For Render, create a "secret file" called .env using the Environment tab. # You can add multiple API keys by separating them with a comma. diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts index ab4743e..2f8a133 100644 --- a/src/proxy/anthropic.ts +++ b/src/proxy/anthropic.ts @@ -173,16 +173,7 @@ anthropicRouter.post( function maybeReassignModel(req: Request) { const model = req.body.model; if (!model.startsWith("gpt-")) return; - - const bigModel = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k"; - const contextSize = req.promptTokens! + req.outputTokens!; - if (contextSize > 8500) { - req.log.debug( - { model: bigModel, contextSize }, - "Using Claude 100k model for OpenAI-to-Anthropic request" - ); - req.body.model = bigModel; - } + req.body.model = "claude-2.1"; } export const anthropic = anthropicRouter; diff --git a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts index 36502c4..e355111 100644 --- a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts +++ b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts @@ -279,11 +279,7 @@ function openaiToAnthropic(req: Request) { stops = [...new Set(stops)]; return { - // Model may be overridden in `calculate-context-size.ts` to avoid having - // a circular dependency (`calculate-context-size.ts` needs an already- - // transformed request body to count tokens, but this function would like - // to know the count to select a model). - model: process.env.CLAUDE_SMALL_MODEL || "claude-v1", + model: rest.model, prompt: prompt, max_tokens_to_sample: rest.max_tokens, stop_sequences: stops, diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts index aa42636..69c1061 100644 --- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts +++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts @@ -37,6 +37,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => { break; case "mistral-ai": proxyMax = MISTRAL_AI_MAX_CONTENT; + break; case "openai-image": return; default: diff --git a/src/proxy/middleware/response/streaming/sse-stream-adapter.ts b/src/proxy/middleware/response/streaming/sse-stream-adapter.ts index 92ade02..31ebef7 100644 --- a/src/proxy/middleware/response/streaming/sse-stream-adapter.ts +++ b/src/proxy/middleware/response/streaming/sse-stream-adapter.ts @@ -85,9 +85,6 @@ export class SSEStreamAdapter extends Transform { } } else { const { bytes } = payload; - // technically this is a transformation but we don't really distinguish - // between aws claude and anthropic claude at the APIFormat level, so - // these will short circuit the message transformer return [ "event: completion", `data: ${Buffer.from(bytes, "base64").toString("utf8")}`, @@ -95,8 +92,7 @@ export class SSEStreamAdapter extends Transform { } } - // Google doesn't use event streams and just sends elements in an array over - // a long-lived HTTP connection. Needs stream-json to parse the array. + /** Processes an incoming array element from the Google AI JSON stream. */ protected processGoogleValue(value: any): string | null { try { const candidates = value.candidates ?? [{}]; diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts index 48eb07e..a20c6d3 100644 --- a/src/proxy/openai.ts +++ b/src/proxy/openai.ts @@ -163,9 +163,7 @@ const openaiProxy = createQueueMiddleware({ selfHandleResponse: true, logger, on: { - proxyReq: createOnProxyReqHandler({ - pipeline: [addKey, finalizeBody], - }), + proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody], }), proxyRes: createOnProxyResHandler([openaiResponseHandler]), error: handleProxyError, }, diff --git a/src/proxy/queue.ts b/src/proxy/queue.ts index 4c86fcf..b980ff4 100644 --- a/src/proxy/queue.ts +++ b/src/proxy/queue.ts @@ -527,7 +527,7 @@ function monitorHeartbeat(req: Request) { if (bytesSinceLast < minBytes) { req.log.warn( { minBytes, bytesSinceLast }, - "Queued request is processing heartbeats enough data or server is overloaded; killing connection." + "Queued request is not processing heartbeats enough data or server is overloaded; killing connection." ); res.destroy(); }