406 lines
11 KiB
TypeScript
406 lines
11 KiB
TypeScript
import express from "express";
|
|
import { APIFormat } from "../../../shared/key-management";
|
|
import { assertNever } from "../../../shared/utils";
|
|
import { initializeSseStream } from "../../../shared/streaming";
|
|
import http from "http";
|
|
|
|
/**
|
|
* Returns a Markdown-formatted message that renders semi-nicely in most chat
|
|
* frontends. For example:
|
|
*
|
|
* **Proxy error (HTTP 404 Not Found)**
|
|
* The proxy encountered an error while trying to send your prompt to the upstream service. Further technical details are provided below.
|
|
* ***
|
|
* *The requested Claude model might not exist, or the key might not be provisioned for it.*
|
|
* ```
|
|
* {
|
|
* "type": "error",
|
|
* "error": {
|
|
* "type": "not_found_error",
|
|
* "message": "model: some-invalid-model-id",
|
|
* },
|
|
* "proxy_note": "The requested Claude model might not exist, or the key might not be provisioned for it."
|
|
* }
|
|
* ```
|
|
*/
|
|
function getMessageContent(params: {
|
|
title: string;
|
|
message: string;
|
|
obj?: Record<string, any>;
|
|
}) {
|
|
const { title, message, obj } = params;
|
|
const note = obj?.proxy_note || obj?.error?.message || "";
|
|
const header = `### **${title}**`;
|
|
const friendlyMessage = note ? `${message}\n\n----\n\n*${note}*` : message;
|
|
|
|
const serializedObj = obj
|
|
? ["```", JSON.stringify(obj, null, 2), "```"].join("\n")
|
|
: "";
|
|
|
|
const { stack } = JSON.parse(JSON.stringify(obj ?? {}));
|
|
let prettyTrace = "";
|
|
if (stack && obj) {
|
|
prettyTrace = [
|
|
"Include this trace when reporting an issue.",
|
|
"```",
|
|
stack,
|
|
"```",
|
|
].join("\n");
|
|
delete obj.stack;
|
|
}
|
|
|
|
return [
|
|
header,
|
|
friendlyMessage,
|
|
serializedObj,
|
|
prettyTrace,
|
|
"<!-- oai-proxy-error -->",
|
|
].join("\n\n");
|
|
}
|
|
|
|
type ErrorGeneratorOptions = {
|
|
format: APIFormat | "unknown";
|
|
title: string;
|
|
message: string;
|
|
obj?: Record<string, any>;
|
|
reqId: string | number | object;
|
|
model?: string;
|
|
statusCode?: number;
|
|
};
|
|
|
|
/**
|
|
* Very crude inference of the request format based on the request body. Don't
|
|
* rely on this to be very accurate.
|
|
*/
|
|
function tryInferFormat(body: any): APIFormat | "unknown" {
|
|
if (typeof body !== "object" || !body.model) {
|
|
return "unknown";
|
|
}
|
|
|
|
if (body.model.includes("gpt")) {
|
|
return "openai";
|
|
}
|
|
|
|
if (body.model.includes("mistral")) {
|
|
return "mistral-ai";
|
|
}
|
|
|
|
if (body.model.includes("claude")) {
|
|
return body.messages?.length ? "anthropic-chat" : "anthropic-text";
|
|
}
|
|
|
|
if (body.model.includes("gemini")) {
|
|
return "google-ai";
|
|
}
|
|
|
|
return "unknown";
|
|
}
|
|
|
|
/**
|
|
* Redacts the hostname from the error message if it contains a DNS resolution
|
|
* error. This is to avoid leaking upstream hostnames on DNS resolution errors,
|
|
* as those may contain sensitive information about the proxy's configuration.
|
|
*/
|
|
function redactHostname(options: ErrorGeneratorOptions): ErrorGeneratorOptions {
|
|
if (!options.message.includes("getaddrinfo")) return options;
|
|
|
|
const redacted = { ...options };
|
|
redacted.message = "Could not resolve hostname";
|
|
|
|
if (typeof redacted.obj?.error === "object") {
|
|
redacted.obj = {
|
|
...redacted.obj,
|
|
error: { message: "Could not resolve hostname" },
|
|
};
|
|
}
|
|
|
|
return redacted;
|
|
}
|
|
|
|
/**
|
|
* Generates an appropriately-formatted error response and sends it to the
|
|
* client over their requested transport (blocking or SSE stream).
|
|
*/
|
|
export function sendErrorToClient(params: {
|
|
options: ErrorGeneratorOptions;
|
|
req: express.Request;
|
|
res: express.Response;
|
|
}) {
|
|
const { req, res } = params;
|
|
const options = redactHostname(params.options);
|
|
const { statusCode, message, title, obj: details } = options;
|
|
|
|
// Since we want to send the error in a format the client understands, we
|
|
// need to know the request format. `setApiFormat` might not have been called
|
|
// yet, so we'll try to infer it from the request body.
|
|
const format =
|
|
options.format === "unknown" ? tryInferFormat(req.body) : options.format;
|
|
if (format === "unknown") {
|
|
// Early middleware error (auth, rate limit) so we can only send something
|
|
// generic.
|
|
const code = statusCode || 400;
|
|
const hasDetails = details && Object.keys(details).length > 0;
|
|
return res.status(code).json({
|
|
error: {
|
|
message,
|
|
type: http.STATUS_CODES[code]!.replace(/\s+/g, "_").toLowerCase(),
|
|
},
|
|
...(hasDetails ? { details } : {}),
|
|
});
|
|
}
|
|
|
|
// Cannot modify headers if client opted into streaming and made it into the
|
|
// proxy request queue, because that immediately starts an SSE stream.
|
|
if (!res.headersSent) {
|
|
res.setHeader("x-oai-proxy-error", title);
|
|
res.setHeader("x-oai-proxy-error-status", statusCode || 500);
|
|
}
|
|
|
|
// By this point, we know the request format. To get the error to display in
|
|
// chat clients' UIs, we'll send it as a 200 response as a spoofed completion
|
|
// from the language model. Depending on whether the client is streaming, we
|
|
// will either send an SSE event or a JSON response.
|
|
const isStreaming = req.isStreaming || String(req.body.stream) === "true";
|
|
if (isStreaming) {
|
|
// User can have opted into streaming but not made it into the queue yet,
|
|
// in which case the stream must be started first.
|
|
if (!res.headersSent) {
|
|
initializeSseStream(res);
|
|
}
|
|
res.write(buildSpoofedSSE({ ...options, format }));
|
|
res.write(`data: [DONE]\n\n`);
|
|
res.end();
|
|
} else {
|
|
res.status(200).json(buildSpoofedCompletion({ ...options, format }));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns a non-streaming completion object that looks like it came from the
|
|
* service that the request is being proxied to. Used to send error messages to
|
|
* the client and have them look like normal responses, for clients with poor
|
|
* error handling.
|
|
*/
|
|
export function buildSpoofedCompletion({
|
|
format,
|
|
title,
|
|
message,
|
|
obj,
|
|
reqId,
|
|
model = "unknown",
|
|
}: ErrorGeneratorOptions & { format: Exclude<APIFormat, "unknown"> }) {
|
|
const id = String(reqId);
|
|
const content = getMessageContent({ title, message, obj });
|
|
|
|
switch (format) {
|
|
case "openai":
|
|
case "mistral-ai":
|
|
return {
|
|
id: "error-" + id,
|
|
object: "chat.completion",
|
|
created: Date.now(),
|
|
model,
|
|
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
choices: [
|
|
{
|
|
message: { role: "assistant", content },
|
|
finish_reason: title,
|
|
index: 0,
|
|
},
|
|
],
|
|
};
|
|
case "mistral-text":
|
|
return {
|
|
outputs: [{ text: content, stop_reason: title }],
|
|
model,
|
|
};
|
|
case "openai-text":
|
|
return {
|
|
id: "error-" + id,
|
|
object: "text_completion",
|
|
created: Date.now(),
|
|
model,
|
|
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
choices: [
|
|
{ text: content, index: 0, logprobs: null, finish_reason: title },
|
|
],
|
|
};
|
|
case "anthropic-text":
|
|
return {
|
|
id: "error-" + id,
|
|
type: "completion",
|
|
completion: content,
|
|
stop_reason: title,
|
|
stop: null,
|
|
model,
|
|
};
|
|
case "anthropic-chat":
|
|
return {
|
|
id: "error-" + id,
|
|
type: "message",
|
|
role: "assistant",
|
|
content: [{ type: "text", text: content }],
|
|
model,
|
|
stop_reason: title,
|
|
stop_sequence: null,
|
|
};
|
|
case "google-ai":
|
|
return {
|
|
candidates: [
|
|
{
|
|
content: { parts: [{ text: content }], role: "model" },
|
|
finishReason: title,
|
|
index: 0,
|
|
tokenCount: null,
|
|
safetyRatings: [],
|
|
},
|
|
],
|
|
};
|
|
case "openai-image":
|
|
return obj;
|
|
default:
|
|
assertNever(format);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns an SSE message that looks like a completion event for the service
|
|
* that the request is being proxied to. Used to send error messages to the
|
|
* client in the middle of a streaming request.
|
|
*/
|
|
export function buildSpoofedSSE({
|
|
format,
|
|
title,
|
|
message,
|
|
obj,
|
|
reqId,
|
|
model = "unknown",
|
|
}: ErrorGeneratorOptions & { format: Exclude<APIFormat, "unknown"> }) {
|
|
const id = String(reqId);
|
|
const content = getMessageContent({ title, message, obj });
|
|
|
|
let event;
|
|
|
|
switch (format) {
|
|
case "openai":
|
|
case "mistral-ai":
|
|
event = {
|
|
id: "chatcmpl-" + id,
|
|
object: "chat.completion.chunk",
|
|
created: Date.now(),
|
|
model,
|
|
choices: [{ delta: { content }, index: 0, finish_reason: title }],
|
|
};
|
|
break;
|
|
case "mistral-text":
|
|
event = {
|
|
outputs: [{ text: content, stop_reason: title }],
|
|
};
|
|
break;
|
|
case "openai-text":
|
|
event = {
|
|
id: "cmpl-" + id,
|
|
object: "text_completion",
|
|
created: Date.now(),
|
|
choices: [
|
|
{ text: content, index: 0, logprobs: null, finish_reason: title },
|
|
],
|
|
model,
|
|
};
|
|
break;
|
|
case "anthropic-text":
|
|
event = {
|
|
completion: content,
|
|
stop_reason: title,
|
|
truncated: false,
|
|
stop: null,
|
|
model,
|
|
log_id: "proxy-req-" + id,
|
|
};
|
|
break;
|
|
case "anthropic-chat":
|
|
event = {
|
|
type: "content_block_delta",
|
|
index: 0,
|
|
delta: { type: "text_delta", text: content },
|
|
};
|
|
break;
|
|
case "google-ai":
|
|
// TODO: google ai supports two streaming transports, SSE and JSON.
|
|
// we currently only support SSE.
|
|
// return JSON.stringify({
|
|
event = {
|
|
candidates: [
|
|
{
|
|
content: { parts: [{ text: content }], role: "model" },
|
|
finishReason: title,
|
|
index: 0,
|
|
tokenCount: null,
|
|
safetyRatings: [],
|
|
},
|
|
],
|
|
};
|
|
break;
|
|
case "openai-image":
|
|
return JSON.stringify(obj);
|
|
default:
|
|
assertNever(format);
|
|
}
|
|
|
|
if (format === "anthropic-text") {
|
|
return (
|
|
["event: completion", `data: ${JSON.stringify(event)}`].join("\n") +
|
|
"\n\n"
|
|
);
|
|
}
|
|
|
|
// ugh.
|
|
if (format === "anthropic-chat") {
|
|
return (
|
|
[
|
|
[
|
|
"event: message_start",
|
|
`data: ${JSON.stringify({
|
|
type: "message_start",
|
|
message: {
|
|
id: "error-" + id,
|
|
type: "message",
|
|
role: "assistant",
|
|
content: [],
|
|
model,
|
|
},
|
|
})}`,
|
|
].join("\n"),
|
|
[
|
|
"event: content_block_start",
|
|
`data: ${JSON.stringify({
|
|
type: "content_block_start",
|
|
index: 0,
|
|
content_block: { type: "text", text: "" },
|
|
})}`,
|
|
].join("\n"),
|
|
["event: content_block_delta", `data: ${JSON.stringify(event)}`].join(
|
|
"\n"
|
|
),
|
|
[
|
|
"event: content_block_stop",
|
|
`data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`,
|
|
].join("\n"),
|
|
[
|
|
"event: message_delta",
|
|
`data: ${JSON.stringify({
|
|
type: "message_delta",
|
|
delta: { stop_reason: title, stop_sequence: null, usage: null },
|
|
})}`,
|
|
],
|
|
[
|
|
"event: message_stop",
|
|
`data: ${JSON.stringify({ type: "message_stop" })}`,
|
|
].join("\n"),
|
|
].join("\n\n") + "\n\n"
|
|
);
|
|
}
|
|
|
|
return `data: ${JSON.stringify(event)}\n\n`;
|
|
}
|