Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23)

This commit is contained in:
khanon
2023-06-09 07:16:40 +00:00
parent 4f2a12ef14
commit 96cf4a0e2d
8 changed files with 122 additions and 16 deletions
+2
View File
@@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common";
import {
addKey,
addAnthropicPreamble,
createPreprocessorMiddleware,
finalizeBody,
languageFilter,
@@ -69,6 +70,7 @@ const rewriteAnthropicRequest = (
) => {
const rewriterPipeline = [
addKey,
addAnthropicPreamble,
languageFilter,
limitOutputTokens,
finalizeBody,
@@ -0,0 +1,32 @@
import { AnthropicKey, Key } from "../../../key-management";
import { isCompletionRequest } from "../common";
import { ProxyRequestMiddleware } from ".";
/**
* Some keys require the prompt to start with `\n\nHuman:`. There is no way to
* know this without trying to send the request and seeing if it fails. If a
* key is marked as requiring a preamble, it will be added here.
*/
export const addAnthropicPreamble: ProxyRequestMiddleware = (
_proxyReq,
req
) => {
if (!isCompletionRequest(req) || req.key?.service !== "anthropic") {
return;
}
let preamble = "";
let prompt = req.body.prompt;
assertAnthropicKey(req.key);
if (req.key.requiresPreamble) {
preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt");
}
req.body.prompt = preamble + prompt;
};
function assertAnthropicKey(key: Key): asserts key is AnthropicKey {
if (key.service !== "anthropic") {
throw new Error(`Expected an Anthropic key, got '${key.service}'`);
}
}
+1
View File
@@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload";
// HPM middleware (runs on onProxyReq, cannot be async)
export { addKey } from "./add-key";
export { addAnthropicPreamble } from "./add-anthropic-preamble";
export { finalizeBody } from "./finalize-body";
export { languageFilter } from "./language-filter";
export { limitCompletions } from "./limit-completions";
@@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) {
// Remove duplicates
stops = [...new Set(stops)];
// TEMP: More shitty anthropic API hacks
// If you receive a 400 Bad Request error from Anthropic complaining about
// "prompt must start with a '\n\nHuman: ' turn", enable this setting.
// I will try to fix this when I can identify why it only happens sometimes.
let preamble = "";
if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) {
preamble = "\n\nHuman: Hello Claude.";
}
return {
...rest,
model,
prompt: preamble + prompt,
prompt: prompt,
max_tokens_to_sample: rest.max_tokens,
stop_sequences: stops,
};
+49 -3
View File
@@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
function reenqueueRequest(req: Request) {
req.log.info(
{ key: req.key?.hash, retryCount: req.retryCount },
`Re-enqueueing request due to rate-limit error`
`Re-enqueueing request due to retryable error`
);
req.retryCount++;
enqueue(req);
@@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
if (statusCode === 400) {
// Bad request (likely prompt is too long)
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
if (req.outboundApi === "openai") {
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
} else if (req.outboundApi === "anthropic") {
maybeHandleMissingPreambleError(req, errorPayload);
}
} else if (statusCode === 401) {
// Key is invalid or was revoked
keyPool.disable(req.key!);
@@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
// OpenAI uses this for a bunch of different rate-limiting scenarios.
if (req.outboundApi === "openai") {
handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
} else {
} else if (req.outboundApi === "anthropic") {
handleAnthropicRateLimitError(req, errorPayload);
}
} else if (statusCode === 404) {
@@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
throw new Error(errorPayload.error?.message);
};
/**
* This is a workaround for a very strange issue where certain API keys seem to
* enforce more strict input validation than others -- specifically, they will
* require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
* being used as a generic text completion service and to enforce the use of
* the chat RLHF. This is not documented anywhere, and it's not clear why some
* keys enforce this and others don't.
* This middleware checks for that specific error and marks the key as being
* one that requires the prefix, and then re-enqueues the request.
* The exact error is:
* ```
* {
* "error": {
* "type": "invalid_request_error",
* "message": "prompt must start with \"\n\nHuman:\" turn"
* }
* }
* ```
*/
function maybeHandleMissingPreambleError(
req: Request,
errorPayload: Record<string, any>
) {
if (
errorPayload.error?.type === "invalid_request_error" &&
errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn'
) {
req.log.warn(
{ key: req.key?.hash },
"Request failed due to missing preamble. Key will be marked as such for subsequent requests."
);
keyPool.update(req.key!, { requiresPreamble: true });
if (config.queueMode !== "none") {
reenqueueRequest(req);
throw new RetryableError("Claude request re-enqueued to add preamble.");
}
errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
} else {
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
}
}
function handleAnthropicRateLimitError(
req: Request,
errorPayload: Record<string, any>