Files
simple-proxy/src/proxy/middleware/request/check-context-size.ts
T

135 lines
4.4 KiB
TypeScript

import { Request } from "express";
import { z } from "zod";
import { config } from "../../../config";
import { OpenAIPromptMessage, countTokens } from "../../../tokenization";
import { RequestPreprocessor } from ".";
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
/**
* Assigns `req.promptTokens` and `req.outputTokens` based on the request body
* and outbound API format, which combined determine the size of the context.
* If the context is too large, an error is thrown.
* This preprocessor should run after any preprocessor that transforms the
* request body.
*/
export const checkContextSize: RequestPreprocessor = async (req) => {
const service = req.outboundApi;
let result;
switch (service) {
case "openai": {
req.outputTokens = req.body.max_tokens;
const prompt: OpenAIPromptMessage[] = req.body.messages;
result = await countTokens({ req, prompt, service });
break;
}
case "anthropic": {
req.outputTokens = req.body.max_tokens_to_sample;
const prompt: string = req.body.prompt;
result = await countTokens({ req, prompt, service });
break;
}
default:
throw new Error(`Unknown outbound API: ${req.outboundApi}`);
}
req.promptTokens = result.token_count;
// TODO: Remove once token counting is stable
req.log.debug({ result: result }, "Counted prompt tokens.");
req.debug = req.debug ?? {};
req.debug = { ...req.debug, ...result };
maybeReassignModel(req);
validateContextSize(req);
};
function validateContextSize(req: Request) {
assertRequestHasTokenCounts(req);
const promptTokens = req.promptTokens;
const outputTokens = req.outputTokens;
const contextTokens = promptTokens + outputTokens;
const model = req.body.model;
const proxyMax =
(req.outboundApi === "openai" ? OPENAI_MAX_CONTEXT : CLAUDE_MAX_CONTEXT) ||
Number.MAX_SAFE_INTEGER;
let modelMax = 0;
if (model.match(/gpt-3.5-turbo-16k/)) {
modelMax = 16384;
} else if (model.match(/gpt-3.5-turbo/)) {
modelMax = 4096;
} else if (model.match(/gpt-4-32k/)) {
modelMax = 32768;
} else if (model.match(/gpt-4/)) {
modelMax = 8192;
} else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?(?:-100k)/)) {
modelMax = 100000;
} else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?$/)) {
modelMax = 9000;
} else if (model.match(/claude-2/)) {
modelMax = 100000;
} else {
// Don't really want to throw here because I don't want to have to update
// this ASAP every time a new model is released.
req.log.warn({ model }, "Unknown model, using 100k token limit.");
modelMax = 100000;
}
const finalMax = Math.min(proxyMax, modelMax);
z.number()
.int()
.max(finalMax, {
message: `Your request exceeds the context size limit for this model or proxy. (max: ${finalMax} tokens, requested: ${promptTokens} prompt + ${outputTokens} output = ${contextTokens} context tokens)`,
})
.parse(contextTokens);
req.log.debug(
{ promptTokens, outputTokens, contextTokens, modelMax, proxyMax },
"Prompt size validated"
);
req.debug.prompt_tokens = promptTokens;
req.debug.completion_tokens = outputTokens;
req.debug.max_model_tokens = modelMax;
req.debug.max_proxy_tokens = proxyMax;
}
function assertRequestHasTokenCounts(
req: Request
): asserts req is Request & { promptTokens: number; outputTokens: number } {
z.object({
promptTokens: z.number().int().min(1),
outputTokens: z.number().int().min(1),
})
.nonstrict()
.parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
}
/**
* For OpenAI-to-Anthropic requests, users can't specify the model, so we need
* to pick one based on the final context size. Ideally this would happen in
* the `transformOutboundPayload` preprocessor, but we don't have the context
* size at that point (and need a transformed body to calculate it).
*/
function maybeReassignModel(req: Request) {
if (req.inboundApi !== "openai" || req.outboundApi !== "anthropic") {
return;
}
const bigModel = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
const contextSize = req.promptTokens! + req.outputTokens!;
if (contextSize > 8500) {
req.log.debug(
{ model: bigModel, contextSize },
"Using Claude 100k model for OpenAI-to-Anthropic request"
);
req.body.model = bigModel;
}
// Small model is the default already set in `transformOutboundPayload`
}