Implement AWS Bedrock support (khanon/oai-reverse-proxy!45)

This commit is contained in:
khanon
2023-10-01 01:40:18 +00:00
parent 7e681a7bef
commit fa4bf468d2
38 changed files with 1438 additions and 410 deletions
+4 -1
View File
@@ -80,7 +80,6 @@ export const addKey: ProxyRequestMiddleware = (proxyReq, req) => {
proxyReq.setHeader("X-API-Key", assignedKey.key);
break;
case "openai":
case "openai-text":
const key: OpenAIKey = assignedKey as OpenAIKey;
if (key.organizationId) {
proxyReq.setHeader("OpenAI-Organization", key.organizationId);
@@ -94,6 +93,10 @@ export const addKey: ProxyRequestMiddleware = (proxyReq, req) => {
`?key=${assignedKey.key}`
);
break;
case "aws":
throw new Error(
"add-key should not be used for AWS security credentials. Use sign-aws-request instead."
);
default:
assertNever(assignedKey.service);
}
@@ -0,0 +1,48 @@
import { RequestPreprocessor } from "./index";
import { countTokens, OpenAIPromptMessage } from "../../../shared/tokenization";
import { assertNever } from "../../../shared/utils";
/**
* Given a request with an already-transformed body, counts the number of
* tokens and assigns the count to the request.
*/
export const countPromptTokens: RequestPreprocessor = async (req) => {
const service = req.outboundApi;
let result;
switch (service) {
case "openai": {
req.outputTokens = req.body.max_tokens;
const prompt: OpenAIPromptMessage[] = req.body.messages;
result = await countTokens({ req, prompt, service });
break;
}
case "openai-text": {
req.outputTokens = req.body.max_tokens;
const prompt: string = req.body.prompt;
result = await countTokens({ req, prompt, service });
break;
}
case "anthropic": {
req.outputTokens = req.body.max_tokens_to_sample;
const prompt: string = req.body.prompt;
result = await countTokens({ req, prompt, service });
break;
}
case "google-palm": {
req.outputTokens = req.body.maxOutputTokens;
const prompt: string = req.body.prompt.text;
result = await countTokens({ req, prompt, service });
break;
}
default:
assertNever(service);
}
req.promptTokens = result.token_count;
// TODO: Remove once token counting is stable
req.log.debug({ result: result }, "Counted prompt tokens.");
req.debug = req.debug ?? {};
req.debug = { ...req.debug, ...result };
};
@@ -0,0 +1,26 @@
import type { ProxyRequestMiddleware } from ".";
/**
* For AWS requests, the body is signed earlier in the request pipeline, before
* the proxy middleware. This function just assigns the path and headers to the
* proxy request.
*/
export const finalizeAwsRequest: ProxyRequestMiddleware = (proxyReq, req) => {
if (!req.signedRequest) {
throw new Error("Expected req.signedRequest to be set");
}
// The path depends on the selected model and the assigned key's region.
proxyReq.path = req.signedRequest.path;
// Amazon doesn't want extra headers, so we need to remove all of them and
// reassign only the ones specified in the signed request.
proxyReq.getRawHeaderNames().forEach(proxyReq.removeHeader.bind(proxyReq));
Object.entries(req.signedRequest.headers).forEach(([key, value]) => {
proxyReq.setHeader(key, value);
});
// Don't use fixRequestBody here because it adds a content-length header.
// Amazon doesn't want that and it breaks the signature.
proxyReq.write(req.signedRequest.body);
};
+10 -3
View File
@@ -2,14 +2,17 @@ import type { Request } from "express";
import type { ClientRequest } from "http";
import type { ProxyReqCallback } from "http-proxy";
// Express middleware (runs before http-proxy-middleware, can be async)
export { applyQuotaLimits } from "./apply-quota-limits";
export {
createPreprocessorMiddleware,
createEmbeddingsPreprocessorMiddleware,
} from "./preprocess";
export { checkContextSize } from "./check-context-size";
// Express middleware (runs before http-proxy-middleware, can be async)
export { applyQuotaLimits } from "./apply-quota-limits";
export { validateContextSize } from "./validate-context-size";
export { countPromptTokens } from "./count-prompt-tokens";
export { setApiFormat } from "./set-api-format";
export { signAwsRequest } from "./sign-aws-request";
export { transformOutboundPayload } from "./transform-outbound-payload";
// HPM middleware (runs on onProxyReq, cannot be async)
@@ -17,6 +20,7 @@ export { addKey, addKeyForEmbeddingsRequest } from "./add-key";
export { addAnthropicPreamble } from "./add-anthropic-preamble";
export { blockZoomerOrigins } from "./block-zoomer-origins";
export { finalizeBody } from "./finalize-body";
export { finalizeAwsRequest } from "./finalize-aws-request";
export { languageFilter } from "./language-filter";
export { limitCompletions } from "./limit-completions";
export { stripHeaders } from "./strip-headers";
@@ -50,3 +54,6 @@ export type RequestPreprocessor = (req: Request) => void | Promise<void>;
* request queue middleware.
*/
export type ProxyRequestMiddleware = ProxyReqCallback<ClientRequest, Request>;
export const forceModel = (model: string) => (req: Request) =>
void (req.body.model = model);
+24 -9
View File
@@ -2,24 +2,42 @@ import { RequestHandler } from "express";
import { handleInternalError } from "../common";
import {
RequestPreprocessor,
checkContextSize,
validateContextSize,
countPromptTokens,
setApiFormat,
transformOutboundPayload,
} from ".";
type RequestPreprocessorOptions = {
/**
* Functions to run before the request body is transformed between API
* formats. Use this to change the behavior of the transformation, such as for
* endpoints which can accept multiple API formats.
*/
beforeTransform?: RequestPreprocessor[];
/**
* Functions to run after the request body is transformed and token counts are
* assigned. Use this to perform validation or other actions that depend on
* the request body being in the final API format.
*/
afterTransform?: RequestPreprocessor[];
};
/**
* Returns a middleware function that processes the request body into the given
* API format, and then sequentially runs the given additional preprocessors.
*/
export const createPreprocessorMiddleware = (
apiFormat: Parameters<typeof setApiFormat>[0],
additionalPreprocessors?: RequestPreprocessor[]
{ beforeTransform, afterTransform }: RequestPreprocessorOptions = {}
): RequestHandler => {
const preprocessors: RequestPreprocessor[] = [
setApiFormat(apiFormat),
...(additionalPreprocessors ?? []),
...(beforeTransform ?? []),
transformOutboundPayload,
checkContextSize,
countPromptTokens,
...(afterTransform ?? []),
validateContextSize,
];
return async (...args) => executePreprocessors(preprocessors, args);
};
@@ -29,13 +47,10 @@ export const createPreprocessorMiddleware = (
* OpenAI's embeddings API. Tokens are not counted because embeddings requests
* are basically free.
*/
export const createEmbeddingsPreprocessorMiddleware = (
additionalPreprocessors?: RequestPreprocessor[]
): RequestHandler => {
export const createEmbeddingsPreprocessorMiddleware = (): RequestHandler => {
const preprocessors: RequestPreprocessor[] = [
setApiFormat({ inApi: "openai", outApi: "openai" }),
setApiFormat({ inApi: "openai", outApi: "openai", service: "openai" }),
(req) => void (req.promptTokens = req.outputTokens = 0),
...(additionalPreprocessors ?? []),
];
return async (...args) => executePreprocessors(preprocessors, args);
};
@@ -1,13 +1,15 @@
import { Request } from "express";
import { APIFormat } from "../../../shared/key-management";
import { APIFormat, LLMService } from "../../../shared/key-management";
import { RequestPreprocessor } from ".";
export const setApiFormat = (api: {
inApi: Request["inboundApi"];
outApi: APIFormat;
service: LLMService,
}): RequestPreprocessor => {
return (req) => {
req.inboundApi = api.inApi;
req.outboundApi = api.outApi;
req.service = api.service;
};
};
@@ -0,0 +1,93 @@
import express from "express";
import { Sha256 } from "@aws-crypto/sha256-js";
import { SignatureV4 } from "@smithy/signature-v4";
import { HttpRequest } from "@smithy/protocol-http";
import { keyPool } from "../../../shared/key-management";
import { RequestPreprocessor } from ".";
import { AnthropicV1CompleteSchema } from "./transform-outbound-payload";
const AMZ_HOST =
process.env.AMZ_HOST || "invoke-bedrock.%REGION%.amazonaws.com";
/**
* Signs an outgoing AWS request with the appropriate headers modifies the
* request object in place to fix the path.
*/
export const signAwsRequest: RequestPreprocessor = async (req) => {
req.key = keyPool.get("anthropic.claude-v2");
const { model, stream } = req.body;
req.isStreaming = stream === true || stream === "true";
let preamble = req.body.prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
req.body.prompt = preamble + req.body.prompt;
// AWS supports only a subset of Anthropic's parameters and is more strict
// about unknown parameters.
// TODO: This should happen in transform-outbound-payload.ts
const strippedParams = AnthropicV1CompleteSchema.pick({
prompt: true,
max_tokens_to_sample: true,
stop_sequences: true,
temperature: true,
top_k: true,
top_p: true,
}).parse(req.body);
const credential = getCredentialParts(req);
const host = AMZ_HOST.replace("%REGION%", credential.region);
// Uses the AWS SDK to sign a request, then modifies our HPM proxy request
// with the headers generated by the SDK.
const newRequest = new HttpRequest({
method: "POST",
protocol: "https:",
hostname: host,
path: `/model/${model}/invoke${stream ? "-with-response-stream" : ""}`,
headers: {
["Host"]: host,
["content-type"]: "application/json",
},
body: JSON.stringify(strippedParams),
});
if (stream) {
newRequest.headers["x-amzn-bedrock-accept"] = "application/json";
} else {
newRequest.headers["accept"] = "*/*";
}
req.signedRequest = await sign(newRequest, getCredentialParts(req));
};
type Credential = {
accessKeyId: string;
secretAccessKey: string;
region: string;
};
function getCredentialParts(req: express.Request): Credential {
const [accessKeyId, secretAccessKey, region] = req.key!.key.split(":");
if (!accessKeyId || !secretAccessKey || !region) {
req.log.error(
{ key: req.key!.hash },
"AWS_CREDENTIALS isn't correctly formatted; refer to the docs"
);
throw new Error("The key assigned to this request is invalid.");
}
return { accessKeyId, secretAccessKey, region };
}
async function sign(request: HttpRequest, credential: Credential) {
const { accessKeyId, secretAccessKey, region } = credential;
const signer = new SignatureV4({
sha256: Sha256,
credentials: { accessKeyId, secretAccessKey },
region,
service: "bedrock",
});
return signer.sign(request);
}
@@ -10,8 +10,8 @@ const CLAUDE_OUTPUT_MAX = config.maxOutputTokensAnthropic;
const OPENAI_OUTPUT_MAX = config.maxOutputTokensOpenAI;
// https://console.anthropic.com/docs/api/reference#-v1-complete
const AnthropicV1CompleteSchema = z.object({
model: z.string().regex(/^claude-/, "Model must start with 'claude-'"),
export const AnthropicV1CompleteSchema = z.object({
model: z.string(),
prompt: z.string({
required_error:
"No prompt found. Are you sending an OpenAI-formatted request to the Claude endpoint?",
@@ -23,14 +23,14 @@ const AnthropicV1CompleteSchema = z.object({
stop_sequences: z.array(z.string()).optional(),
stream: z.boolean().optional().default(false),
temperature: z.coerce.number().optional().default(1),
top_k: z.coerce.number().optional().default(-1),
top_p: z.coerce.number().optional().default(-1),
top_k: z.coerce.number().optional(),
top_p: z.coerce.number().optional(),
metadata: z.any().optional(),
});
// https://platform.openai.com/docs/api-reference/chat/create
const OpenAIV1ChatCompletionSchema = z.object({
model: z.string().regex(/^gpt/, "Model must start with 'gpt-'"),
model: z.string(),
messages: z.array(
z.object({
role: z.enum(["system", "user", "assistant"]),
@@ -89,7 +89,7 @@ const OpenAIV1TextCompletionSchema = z
// https://developers.generativeai.google/api/rest/generativelanguage/models/generateText
const PalmV1GenerateTextSchema = z.object({
model: z.string().regex(/^\w+-bison-\d{3}$/),
model: z.string(),
prompt: z.object({ text: z.string() }),
temperature: z.number().optional(),
maxOutputTokens: z.coerce
@@ -159,7 +159,7 @@ function openaiToAnthropic(req: Request) {
const { body } = req;
const result = OpenAIV1ChatCompletionSchema.safeParse(body);
if (!result.success) {
req.log.error(
req.log.warn(
{ issues: result.error.issues, body },
"Invalid OpenAI-to-Anthropic request"
);
@@ -208,7 +208,7 @@ function openaiToOpenaiText(req: Request) {
const { body } = req;
const result = OpenAIV1ChatCompletionSchema.safeParse(body);
if (!result.success) {
req.log.error(
req.log.warn(
{ issues: result.error.issues, body },
"Invalid OpenAI-to-OpenAI-text request"
);
@@ -227,8 +227,7 @@ function openaiToOpenaiText(req: Request) {
stops = [...new Set(stops)];
const transformed = { ...rest, prompt: prompt, stop: stops };
const validated = OpenAIV1TextCompletionSchema.parse(transformed);
return validated;
return OpenAIV1TextCompletionSchema.parse(transformed);
}
function openaiToPalm(req: Request): z.infer<typeof PalmV1GenerateTextSchema> {
@@ -238,7 +237,7 @@ function openaiToPalm(req: Request): z.infer<typeof PalmV1GenerateTextSchema> {
model: "gpt-3.5-turbo",
});
if (!result.success) {
req.log.error(
req.log.warn(
{ issues: result.error.issues, body },
"Invalid OpenAI-to-Palm request"
);
@@ -1,9 +1,8 @@
import { Request } from "express";
import { z } from "zod";
import { config } from "../../../config";
import { OpenAIPromptMessage, countTokens } from "../../../shared/tokenization";
import { RequestPreprocessor } from ".";
import { assertNever } from "../../../shared/utils";
import { RequestPreprocessor } from ".";
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
@@ -16,51 +15,7 @@ const BISON_MAX_CONTEXT = 8100;
* This preprocessor should run after any preprocessor that transforms the
* request body.
*/
export const checkContextSize: RequestPreprocessor = async (req) => {
const service = req.outboundApi;
let result;
switch (service) {
case "openai": {
req.outputTokens = req.body.max_tokens;
const prompt: OpenAIPromptMessage[] = req.body.messages;
result = await countTokens({ req, prompt, service });
break;
}
case "openai-text": {
req.outputTokens = req.body.max_tokens;
const prompt: string = req.body.prompt;
result = await countTokens({ req, prompt, service });
break;
}
case "anthropic": {
req.outputTokens = req.body.max_tokens_to_sample;
const prompt: string = req.body.prompt;
result = await countTokens({ req, prompt, service });
break;
}
case "google-palm": {
req.outputTokens = req.body.maxOutputTokens;
const prompt: string = req.body.prompt.text;
result = await countTokens({ req, prompt, service });
break;
}
default:
assertNever(service);
}
req.promptTokens = result.token_count;
// TODO: Remove once token counting is stable
req.log.debug({ result: result }, "Counted prompt tokens.");
req.debug = req.debug ?? {};
req.debug = { ...req.debug, ...result };
maybeTranslateOpenAIModel(req);
validateContextSize(req);
};
function validateContextSize(req: Request) {
export const validateContextSize: RequestPreprocessor = async (req) => {
assertRequestHasTokenCounts(req);
const promptTokens = req.promptTokens;
const outputTokens = req.outputTokens;
@@ -125,7 +80,7 @@ function validateContextSize(req: Request) {
req.debug.completion_tokens = outputTokens;
req.debug.max_model_tokens = modelMax;
req.debug.max_proxy_tokens = proxyMax;
}
};
function assertRequestHasTokenCounts(
req: Request
@@ -137,27 +92,3 @@ function assertRequestHasTokenCounts(
.nonstrict()
.parse({ promptTokens: req.promptTokens, outputTokens: req.outputTokens });
}
/**
* For OpenAI-to-Anthropic requests, users can't specify the model, so we need
* to pick one based on the final context size. Ideally this would happen in
* the `transformOutboundPayload` preprocessor, but we don't have the context
* size at that point (and need a transformed body to calculate it).
*/
function maybeTranslateOpenAIModel(req: Request) {
if (req.inboundApi !== "openai" || req.outboundApi !== "anthropic") {
return;
}
const bigModel = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
const contextSize = req.promptTokens! + req.outputTokens!;
if (contextSize > 8500) {
req.log.debug(
{ model: bigModel, contextSize },
"Using Claude 100k model for OpenAI-to-Anthropic request"
);
req.body.model = bigModel;
}
// Small model is the default already set in `transformOutboundPayload`
}