Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23)
This commit is contained in:
@@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
|
||||
import { handleProxyError } from "./middleware/common";
|
||||
import {
|
||||
addKey,
|
||||
addAnthropicPreamble,
|
||||
createPreprocessorMiddleware,
|
||||
finalizeBody,
|
||||
languageFilter,
|
||||
@@ -69,6 +70,7 @@ const rewriteAnthropicRequest = (
|
||||
) => {
|
||||
const rewriterPipeline = [
|
||||
addKey,
|
||||
addAnthropicPreamble,
|
||||
languageFilter,
|
||||
limitOutputTokens,
|
||||
finalizeBody,
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
import { AnthropicKey, Key } from "../../../key-management";
|
||||
import { isCompletionRequest } from "../common";
|
||||
import { ProxyRequestMiddleware } from ".";
|
||||
|
||||
/**
|
||||
* Some keys require the prompt to start with `\n\nHuman:`. There is no way to
|
||||
* know this without trying to send the request and seeing if it fails. If a
|
||||
* key is marked as requiring a preamble, it will be added here.
|
||||
*/
|
||||
export const addAnthropicPreamble: ProxyRequestMiddleware = (
|
||||
_proxyReq,
|
||||
req
|
||||
) => {
|
||||
if (!isCompletionRequest(req) || req.key?.service !== "anthropic") {
|
||||
return;
|
||||
}
|
||||
|
||||
let preamble = "";
|
||||
let prompt = req.body.prompt;
|
||||
assertAnthropicKey(req.key);
|
||||
if (req.key.requiresPreamble) {
|
||||
preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
|
||||
req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt");
|
||||
}
|
||||
req.body.prompt = preamble + prompt;
|
||||
};
|
||||
|
||||
function assertAnthropicKey(key: Key): asserts key is AnthropicKey {
|
||||
if (key.service !== "anthropic") {
|
||||
throw new Error(`Expected an Anthropic key, got '${key.service}'`);
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload";
|
||||
|
||||
// HPM middleware (runs on onProxyReq, cannot be async)
|
||||
export { addKey } from "./add-key";
|
||||
export { addAnthropicPreamble } from "./add-anthropic-preamble";
|
||||
export { finalizeBody } from "./finalize-body";
|
||||
export { languageFilter } from "./language-filter";
|
||||
export { limitCompletions } from "./limit-completions";
|
||||
|
||||
@@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) {
|
||||
// Remove duplicates
|
||||
stops = [...new Set(stops)];
|
||||
|
||||
// TEMP: More shitty anthropic API hacks
|
||||
// If you receive a 400 Bad Request error from Anthropic complaining about
|
||||
// "prompt must start with a '\n\nHuman: ' turn", enable this setting.
|
||||
// I will try to fix this when I can identify why it only happens sometimes.
|
||||
let preamble = "";
|
||||
if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) {
|
||||
preamble = "\n\nHuman: Hello Claude.";
|
||||
}
|
||||
|
||||
return {
|
||||
...rest,
|
||||
model,
|
||||
prompt: preamble + prompt,
|
||||
prompt: prompt,
|
||||
max_tokens_to_sample: rest.max_tokens,
|
||||
stop_sequences: stops,
|
||||
};
|
||||
|
||||
@@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
|
||||
function reenqueueRequest(req: Request) {
|
||||
req.log.info(
|
||||
{ key: req.key?.hash, retryCount: req.retryCount },
|
||||
`Re-enqueueing request due to rate-limit error`
|
||||
`Re-enqueueing request due to retryable error`
|
||||
);
|
||||
req.retryCount++;
|
||||
enqueue(req);
|
||||
@@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||
|
||||
if (statusCode === 400) {
|
||||
// Bad request (likely prompt is too long)
|
||||
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
|
||||
if (req.outboundApi === "openai") {
|
||||
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
|
||||
} else if (req.outboundApi === "anthropic") {
|
||||
maybeHandleMissingPreambleError(req, errorPayload);
|
||||
}
|
||||
} else if (statusCode === 401) {
|
||||
// Key is invalid or was revoked
|
||||
keyPool.disable(req.key!);
|
||||
@@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||
// OpenAI uses this for a bunch of different rate-limiting scenarios.
|
||||
if (req.outboundApi === "openai") {
|
||||
handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
|
||||
} else {
|
||||
} else if (req.outboundApi === "anthropic") {
|
||||
handleAnthropicRateLimitError(req, errorPayload);
|
||||
}
|
||||
} else if (statusCode === 404) {
|
||||
@@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||
throw new Error(errorPayload.error?.message);
|
||||
};
|
||||
|
||||
/**
|
||||
* This is a workaround for a very strange issue where certain API keys seem to
|
||||
* enforce more strict input validation than others -- specifically, they will
|
||||
* require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
|
||||
* being used as a generic text completion service and to enforce the use of
|
||||
* the chat RLHF. This is not documented anywhere, and it's not clear why some
|
||||
* keys enforce this and others don't.
|
||||
* This middleware checks for that specific error and marks the key as being
|
||||
* one that requires the prefix, and then re-enqueues the request.
|
||||
* The exact error is:
|
||||
* ```
|
||||
* {
|
||||
* "error": {
|
||||
* "type": "invalid_request_error",
|
||||
* "message": "prompt must start with \"\n\nHuman:\" turn"
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
function maybeHandleMissingPreambleError(
|
||||
req: Request,
|
||||
errorPayload: Record<string, any>
|
||||
) {
|
||||
if (
|
||||
errorPayload.error?.type === "invalid_request_error" &&
|
||||
errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn'
|
||||
) {
|
||||
req.log.warn(
|
||||
{ key: req.key?.hash },
|
||||
"Request failed due to missing preamble. Key will be marked as such for subsequent requests."
|
||||
);
|
||||
keyPool.update(req.key!, { requiresPreamble: true });
|
||||
if (config.queueMode !== "none") {
|
||||
reenqueueRequest(req);
|
||||
throw new RetryableError("Claude request re-enqueued to add preamble.");
|
||||
}
|
||||
errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
|
||||
} else {
|
||||
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
|
||||
}
|
||||
}
|
||||
|
||||
function handleAnthropicRateLimitError(
|
||||
req: Request,
|
||||
errorPayload: Record<string, any>
|
||||
|
||||
Reference in New Issue
Block a user