Implements prompt logging via Google Sheets (khanon/oai-reverse-proxy!1)

This commit is contained in:
nai-degen
2023-04-15 01:21:04 +00:00
parent a767044850
commit fc3043dad0
30 changed files with 1078 additions and 80 deletions
+283
View File
@@ -0,0 +1,283 @@
import { Request, Response } from "express";
import * as http from "http";
import util from "util";
import zlib from "zlib";
import * as httpProxy from "http-proxy";
import { logger } from "../../../logger";
import { keyPool } from "../../../key-management";
import { logPrompt } from "./log-prompt";
export const QUOTA_ROUTES = ["/v1/chat/completions"];
const DECODER_MAP = {
gzip: util.promisify(zlib.gunzip),
deflate: util.promisify(zlib.inflate),
br: util.promisify(zlib.brotliDecompress),
};
const isSupportedContentEncoding = (
contentEncoding: string
): contentEncoding is keyof typeof DECODER_MAP => {
return contentEncoding in DECODER_MAP;
};
type DecodeResponseBodyHandler = (
proxyRes: http.IncomingMessage,
req: Request,
res: Response
) => Promise<string | Record<string, any>>;
export type ProxyResHandlerWithBody = (
proxyRes: http.IncomingMessage,
req: Request,
res: Response,
/**
* This will be an object if the response content-type is application/json,
* otherwise it will be a string.
*/
body: string | Record<string, any>
) => Promise<void>;
export type ProxyResMiddleware = ProxyResHandlerWithBody[];
/**
* Returns a on.proxyRes handler that executes the given middleware stack after
* the common proxy response handlers have processed the response and decoded
* the body. Custom middleware won't execute if the response is determined to
* be an error from the downstream service as the response will be taken over
* by the common error handler.
*/
export const createOnProxyResHandler = (middleware: ProxyResMiddleware) => {
return async (
proxyRes: http.IncomingMessage,
req: Request,
res: Response
) => {
let lastMiddlewareName = decodeResponseBody.name;
try {
const body = await decodeResponseBody(proxyRes, req, res);
const middlewareStack: ProxyResMiddleware = [
handleDownstreamErrors,
incrementKeyUsage,
copyHttpHeaders,
logPrompt,
...middleware,
];
for (const middleware of middlewareStack) {
lastMiddlewareName = middleware.name;
await middleware(proxyRes, req, res, body);
}
} catch (error: any) {
// downstream errors will have already been responded to
if (res.headersSent) {
return;
}
const message = `Error while executing proxy response middleware: ${lastMiddlewareName} (${error.message})`;
logger.error(
{
error: error.stack,
thrownBy: lastMiddlewareName,
key: req.key?.hash,
},
message
);
res
.status(500)
.json({ error: "Internal server error", proxy_note: message });
}
};
};
/**
* Handles the response from the downstream service and decodes the body if
* necessary. If the response is JSON, it will be parsed and returned as an
* object. Otherwise, it will be returned as a string.
* @throws {Error} Unsupported content-encoding or invalid application/json body
*/
const decodeResponseBody: DecodeResponseBodyHandler = async (
proxyRes,
req,
res
) => {
const promise = new Promise<string>((resolve, reject) => {
let chunks: Buffer[] = [];
proxyRes.on("data", (chunk) => chunks.push(chunk));
proxyRes.on("end", async () => {
let body = Buffer.concat(chunks);
const contentEncoding = proxyRes.headers["content-encoding"];
if (contentEncoding) {
if (isSupportedContentEncoding(contentEncoding)) {
const decoder = DECODER_MAP[contentEncoding];
body = await decoder(body);
} else {
const errorMessage = `Proxy received response with unsupported content-encoding: ${contentEncoding}`;
logger.warn({ contentEncoding, key: req.key?.hash }, errorMessage);
res.status(500).json({ error: errorMessage, contentEncoding });
return reject(errorMessage);
}
}
try {
if (proxyRes.headers["content-type"]?.includes("application/json")) {
const json = JSON.parse(body.toString());
return resolve(json);
}
return resolve(body.toString());
} catch (error: any) {
const errorMessage = `Proxy received response with invalid JSON: ${error.message}`;
logger.warn({ error, key: req.key?.hash }, errorMessage);
res.status(500).json({ error: errorMessage });
return reject(errorMessage);
}
});
});
return promise;
};
// TODO: This is too specific to OpenAI's error responses, Anthropic errors
// will need a different handler.
/**
* Handles non-2xx responses from the downstream service. If the proxied
* response is an error, this will respond to the client with an error payload
* and throw an error to stop the middleware stack.
* @throws {Error} HTTP error status code from downstream service
*/
const handleDownstreamErrors: ProxyResHandlerWithBody = async (
proxyRes,
req,
res,
body
) => {
const statusCode = proxyRes.statusCode || 500;
if (statusCode < 400) {
return;
}
let errorPayload: Record<string, any>;
// Subtract 1 from available keys because if this message is being shown,
// it's because the key is about to be disabled.
const availableKeys = keyPool.available() - 1;
const tryAgainMessage = Boolean(availableKeys)
? `There are ${availableKeys} more keys available; try your request again.`
: "There are no more keys available.";
try {
if (typeof body === "object") {
errorPayload = body;
} else {
throw new Error("Received non-JSON error response from downstream.");
}
} catch (parseError: any) {
const statusMessage = proxyRes.statusMessage || "Unknown error";
// Likely Bad Gateway or Gateway Timeout from OpenAI's Cloudflare proxy
logger.warn(
{ statusCode, statusMessage, key: req.key?.hash },
parseError.message
);
const errorObject = {
statusCode,
statusMessage: proxyRes.statusMessage,
error: parseError.message,
proxy_note: `This is likely a temporary error with the downstream service.`,
};
res.status(statusCode).json(errorObject);
throw new Error(parseError.message);
}
logger.warn(
{
statusCode,
type: errorPayload.error?.code,
errorPayload,
key: req.key?.hash,
},
`Received error response from downstream. (${proxyRes.statusMessage})`
);
if (statusCode === 400) {
// Bad request (likely prompt is too long)
errorPayload.proxy_note = `OpenAI rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
} else if (statusCode === 401) {
// Key is invalid or was revoked
keyPool.disable(req.key!);
errorPayload.proxy_note = `The OpenAI key is invalid or revoked. ${tryAgainMessage}`;
} else if (statusCode === 429) {
// One of:
// - Quota exceeded (key is dead, disable it)
// - Rate limit exceeded (key is fine, just try again)
// - Model overloaded (their fault, just try again)
if (errorPayload.error?.type === "insufficient_quota") {
keyPool.disable(req.key!);
errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
} else {
errorPayload.proxy_note = `This is likely a temporary error with OpenAI. Try again in a few seconds.`;
}
} else if (statusCode === 404) {
// Most likely model not found
if (errorPayload.error?.code === "model_not_found") {
if (req.key!.isGpt4) {
keyPool.downgradeKey(req.key?.hash);
errorPayload.proxy_note = `This key was incorrectly assigned to GPT-4. It has been downgraded to Turbo.`;
} else {
errorPayload.proxy_note = `No model was found for this key.`;
}
}
} else {
errorPayload.proxy_note = `Unrecognized error from OpenAI.`;
}
res.status(statusCode).json(errorPayload);
throw new Error(errorPayload.error?.message);
};
/** Handles errors in the request rewriter pipeline. */
export const handleInternalError: httpProxy.ErrorCallback = (
err,
_req,
res
) => {
logger.error({ error: err }, "Error in proxy request pipeline.");
(res as http.ServerResponse).writeHead(500, {
"Content-Type": "application/json",
});
res.end(
JSON.stringify({
error: {
type: "proxy_error",
message: err.message,
stack: err.stack,
proxy_note: `Reverse proxy encountered an error before it could reach the downstream API.`,
},
})
);
};
const incrementKeyUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
if (QUOTA_ROUTES.includes(req.path)) {
keyPool.incrementPrompt(req.key?.hash);
}
};
const copyHttpHeaders: ProxyResHandlerWithBody = async (
proxyRes,
_req,
res
) => {
Object.keys(proxyRes.headers).forEach((key) => {
// Omit content-encoding because we will always decode the response body
if (key === "content-encoding") {
return;
}
// We're usually using res.json() to send the response, which causes express
// to set content-length. That's not valid for chunked responses and some
// clients will reject it so we need to omit it.
if (key === "transfer-encoding") {
return;
}
res.setHeader(key, proxyRes.headers[key] as string);
});
};
@@ -0,0 +1,54 @@
import { config } from "../../../config";
import { logQueue } from "../../../prompt-logging";
import { ProxyResHandlerWithBody } from ".";
/** If prompt logging is enabled, enqueues the prompt for logging. */
export const logPrompt: ProxyResHandlerWithBody = async (
_proxyRes,
req,
_res,
responseBody
) => {
if (!config.promptLogging) {
return;
}
if (typeof responseBody !== "object") {
throw new Error("Expected body to be an object");
}
const model = req.body.model;
const promptFlattened = flattenMessages(req.body.messages);
const response = getResponseForModel({ model, body: responseBody });
logQueue.enqueue({
model,
endpoint: req.api,
promptRaw: JSON.stringify(req.body.messages),
promptFlattened,
response,
});
};
type OaiMessage = {
role: "user" | "assistant" | "system";
content: string;
};
const flattenMessages = (messages: OaiMessage[]): string => {
return messages.map((m) => `${m.role}: ${m.content}`).join("\n");
};
const getResponseForModel = ({
model,
body,
}: {
model: string;
body: Record<string, any>;
}) => {
if (model.startsWith("claude")) {
// TODO: confirm if there is supposed to be a leading space
return body.completion.trim();
} else {
return body.choices[0].message.content;
}
};