10 Commits

Author SHA1 Message Date
nai-degen 59141813d9 adds quick scale keyprovider 2023-07-05 22:11:25 -05:00
nai-degen 327e860967 fixes wrong/misleading error msg when no Turbo keys available 2023-07-04 11:49:12 -05:00
nai-degen 6598b4df0d requests old version of Anthropic API due to breaking SSE changes 2023-06-24 14:50:48 -05:00
nai-degen 6a7f64b037 adds missed change from origin header adjustment 2023-06-24 14:25:30 -05:00
nai-degen c8b3238398 reorganizes origin header middleware 2023-06-24 14:25:01 -05:00
nai-degen 602931bf7f removes origin/referer headers from proxied request 2023-06-23 00:08:09 -05:00
nai-degen db034a51b3 prevents crash on startup when git is not installed 2023-06-21 01:24:41 -05:00
khanon 43359779e7 Implements more robust anti-zoomer functionality (khanon/oai-reverse-proxy!24) 2023-06-14 04:05:51 +00:00
nai-degen c0ac69df27 adjusts default origin block 2023-06-13 21:18:31 -05:00
nai-degen 3a2a6e96fd adds new OpenAI June 2023 models 2023-06-13 16:24:34 -05:00
12 changed files with 261 additions and 14 deletions
+7 -2
View File
@@ -18,6 +18,8 @@ type Config = {
openaiKey?: string;
/** Comma-delimited list of Anthropic API keys. */
anthropicKey?: string;
scaleKey?: string;
scaleMinDeployments: number;
/**
* The proxy key to require for requests. Only applicable if the user
* management mode is set to 'proxy_key', and required if so.
@@ -26,7 +28,7 @@ type Config = {
/**
* The admin key used to access the /admin API. Required if the user
* management mode is set to 'user_token'.
**/
*/
adminKey?: string;
/**
* Which user management mode to use.
@@ -49,7 +51,7 @@ type Config = {
*
* `firebase_rtdb`: Users are stored in a Firebase Realtime Database; requires
* `firebaseKey` and `firebaseRtdbUrl` to be set.
**/
*/
gatekeeperStore: "memory" | "firebase_rtdb";
/** URL of the Firebase Realtime Database if using the Firebase RTDB store. */
firebaseRtdbUrl?: string;
@@ -127,6 +129,8 @@ export const config: Config = {
port: getEnvWithDefault("PORT", 7860),
openaiKey: getEnvWithDefault("OPENAI_KEY", ""),
anthropicKey: getEnvWithDefault("ANTHROPIC_KEY", ""),
scaleKey: getEnvWithDefault("SCALE_KEY", ""),
scaleMinDeployments: getEnvWithDefault("SCALE_MIN_DEPLOYMENTS", 0),
proxyKey: getEnvWithDefault("PROXY_KEY", ""),
adminKey: getEnvWithDefault("ADMIN_KEY", ""),
gatekeeper: getEnvWithDefault("GATEKEEPER", "none"),
@@ -262,6 +266,7 @@ export const OMITTED_KEYS: (keyof Config)[] = [
"logLevel",
"openaiKey",
"anthropicKey",
"scaleKey",
"proxyKey",
"adminKey",
"checkKeys",
+1 -1
View File
@@ -5,7 +5,7 @@ import {
} from "./anthropic/provider";
import { KeyPool } from "./key-pool";
export type AIService = "openai" | "anthropic";
export type AIService = "openai" | "anthropic" | "scale";
export type Model = OpenAIModel | AnthropicModel;
export interface Key {
+2 -2
View File
@@ -128,8 +128,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
);
if (availableKeys.length === 0) {
let message = needGpt4
? "No active OpenAI keys available."
: "No GPT-4 keys available. Try selecting a non-GPT-4 model.";
? "No GPT-4 keys available. Try selecting a non-GPT-4 model."
: "No active OpenAI keys available.";
throw new Error(message);
}
+155
View File
@@ -0,0 +1,155 @@
import crypto from "crypto";
import { Key, KeyProvider } from "..";
import { config } from "../../config";
import { logger } from "../../logger";
export interface ScaleDeployment extends Key {
readonly service: "scale";
deploymentUrl: string;
createdAt: number;
}
/*
Scale is a bit different from the other providers. It doesn't have set API keys;
instead there are "deployments", which are created in the Scale dashboard and
are accessible via a URL and API key together.
The operator can provide these accounts via the SCALE_KEY environment variable,
but more likely they will want the proxy to just automatically create new
accounts and deployments as older ones reach their usage limits.
*/
export class ScaleKeyProvider implements KeyProvider<ScaleDeployment> {
readonly service = "scale";
private deployments: ScaleDeployment[] = [];
private log = logger.child({ module: "key-provider", service: this.service });
private churnerEnabled = false;
constructor() {
const keyConfig = config.scaleKey?.trim();
if (!keyConfig) return;
let initialKeys: string[];
initialKeys = [...new Set(keyConfig.split(",").map((k) => k.trim()))];
for (const keyStr of initialKeys) {
const [key, deploymentUrl] = keyStr.split("$");
const newDeployment: ScaleDeployment = {
key,
deploymentUrl,
service: this.service,
isGpt4: false,
isTrial: false,
isDisabled: false,
promptCount: 0,
lastUsed: 0,
createdAt: Date.now(),
hash: `sca-${crypto
.createHash("sha256")
.update(keyStr)
.digest("hex")
.slice(0, 8)}`,
lastChecked: 0,
};
this.deployments.push(newDeployment);
}
this.log.info(
{ keyCount: this.deployments.length },
"Loaded initial Scale deployments"
);
}
public init() {
// TODO: Start account churner
this.churnerEnabled = true;
}
public list() {
return this.deployments.map((k) => Object.freeze({ ...k, key: undefined }));
}
public get(_model: unknown) {
// Scale doesn't support changing models on the fly
const availableDeployments = this.deployments.filter((a) => !a.isDisabled);
const canCreateNewAccounts = config.scaleMinDeployments > 0;
if (availableDeployments.length === 0) {
if (canCreateNewAccounts) {
this.log.warn(
"Ran out of Scale deployments and the churner is not creating new ones fast enough."
);
throw new Error(
"No Scale deployments available. Try again in a few minutes when the churner has created new deployments."
);
} else {
throw new Error(
"No Scale deployments available and account churner is disabled (possible IP ban or signup rate limit)."
);
}
}
// Unlike other providers, Scale doesn't want to rotate keys. Instead, we
// want to use the same key for as long as possible while building up a
// reserve of new accounts. Once an account dies there should be a fresh
// one ready to go.
const now = Date.now();
const deploymentsByPriority = availableDeployments.sort((a, b) => {
return a.createdAt - b.createdAt;
});
const selectedKey = deploymentsByPriority[0];
selectedKey.lastUsed = now;
return { ...selectedKey };
}
public disable(deployment: ScaleDeployment) {
const deploymentFromPool = this.deployments.find(
(d) => d.hash === deployment.hash
);
if (!deploymentFromPool || deploymentFromPool.isDisabled) return;
deploymentFromPool.isDisabled = true;
this.log.warn({ key: deployment.hash }, "Scale deployment disabled");
}
public update(hash: string, update: Partial<ScaleDeployment>) {
const deploymentFromPool = this.deployments.find((d) => d.hash === hash)!;
Object.assign(deploymentFromPool, update);
}
public available() {
return this.deployments.filter((k) => !k.isDisabled).length;
}
// Normally this would return the number of unchecked keys but we will
// repurpose it to return the number of pending accounts the churner is
// creating.
public anyUnchecked() {
return config.scaleMinDeployments - this.available() > 0;
}
public incrementPrompt(hash?: string) {
const deployment = this.deployments.find((d) => d.hash === hash);
if (!deployment) return;
deployment.promptCount++;
}
public getLockoutPeriod(_model: unknown) {
// TODO: Scale doesn't have rate limits but this may need to be repurposed
// to lock out the request queue if the account churner enabled but falling
// behind.
return 0;
}
public markRateLimited(keyHash: string) {
// Do nothing
}
/** Doesn't really mean anything for Scale */
public remainingQuota() {
return 1;
}
public usageInUsd() {
return "$0.00 / ∞";
}
}
+4
View File
@@ -9,10 +9,12 @@ import { handleProxyError } from "./middleware/common";
import {
addKey,
addAnthropicPreamble,
blockZoomerOrigins,
createPreprocessorMiddleware,
finalizeBody,
languageFilter,
limitOutputTokens,
removeOriginHeaders,
} from "./middleware/request";
import {
ProxyResHandlerWithBody,
@@ -73,6 +75,8 @@ const rewriteAnthropicRequest = (
addAnthropicPreamble,
languageFilter,
limitOutputTokens,
blockZoomerOrigins,
removeOriginHeaders,
finalizeBody,
];
+24 -4
View File
@@ -2,7 +2,6 @@ import { Request, Response } from "express";
import httpProxy from "http-proxy";
import { ZodError } from "zod";
const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
@@ -32,9 +31,14 @@ export function writeErrorResponse(
res.headersSent ||
res.getHeader("content-type") === "text/event-stream"
) {
const errorContent =
statusCode === 403
? JSON.stringify(errorPayload)
: JSON.stringify(errorPayload, null, 2);
const msg = buildFakeSseMessage(
`${errorSource} error (${statusCode})`,
JSON.stringify(errorPayload, null, 2),
errorContent,
req
);
res.write(msg);
@@ -57,6 +61,7 @@ export const handleInternalError = (
) => {
try {
const isZod = err instanceof ZodError;
const isForbidden = err.name === "ForbiddenError";
if (isZod) {
writeErrorResponse(req, res, 400, {
error: {
@@ -67,6 +72,17 @@ export const handleInternalError = (
message: err.message,
},
});
} else if (isForbidden) {
// Spoofs a vaguely threatening OpenAI error message. Only invoked by the
// block-zoomers rewriter to scare off tiktokers.
writeErrorResponse(req, res, 403, {
error: {
type: "organization_account_disabled",
code: "policy_violation",
param: null,
message: err.message,
},
});
} else {
writeErrorResponse(req, res, 500, {
error: {
@@ -91,10 +107,14 @@ export function buildFakeSseMessage(
req: Request
) {
let fakeEvent;
const useBackticks = !type.includes("403");
const msgContent = useBackticks
? `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`
: `[${type}: ${string}]`;
if (req.inboundApi === "anthropic") {
fakeEvent = {
completion: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`,
completion: msgContent,
stop_reason: type,
truncated: false, // I've never seen this be true
stop: null,
@@ -109,7 +129,7 @@ export function buildFakeSseMessage(
model: req.body?.model,
choices: [
{
delta: { content: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n` },
delta: { content: msgContent },
index: 0,
finish_reason: type,
},
@@ -0,0 +1,34 @@
import { isCompletionRequest } from "../common";
import { ProxyRequestMiddleware } from ".";
const DISALLOWED_ORIGIN_SUBSTRINGS = "janitorai.com,janitor.ai".split(",");
class ForbiddenError extends Error {
constructor(message: string) {
super(message);
this.name = "ForbiddenError";
}
}
/**
* Blocks requests from Janitor AI users with a fake, scary error message so I
* stop getting emails asking for tech support.
*/
export const blockZoomerOrigins: ProxyRequestMiddleware = (_proxyReq, req) => {
if (!isCompletionRequest(req)) {
return;
}
const origin = req.headers.origin || req.headers.referer;
if (origin && DISALLOWED_ORIGIN_SUBSTRINGS.some((s) => origin.includes(s))) {
// Venus-derivatives send a test prompt to check if the proxy is working.
// We don't want to block that just yet.
if (req.body.messages[0]?.content === "Just say TEST") {
return;
}
throw new ForbiddenError(
`Your access was terminated due to violation of our policies, please check your email for more information. If you believe this is in error and would like to appeal, please contact us through our help center at help.openai.com.`
);
}
};
+2
View File
@@ -10,10 +10,12 @@ export { transformOutboundPayload } from "./transform-outbound-payload";
// HPM middleware (runs on onProxyReq, cannot be async)
export { addKey } from "./add-key";
export { addAnthropicPreamble } from "./add-anthropic-preamble";
export { blockZoomerOrigins } from "./block-zoomer-origins";
export { finalizeBody } from "./finalize-body";
export { languageFilter } from "./language-filter";
export { limitCompletions } from "./limit-completions";
export { limitOutputTokens } from "./limit-output-tokens";
export { removeOriginHeaders } from "./remove-origin-headers";
export { transformKoboldPayload } from "./transform-kobold-payload";
/**
@@ -0,0 +1,10 @@
import { ProxyRequestMiddleware } from ".";
/**
* Removes origin and referer headers before sending the request to the API for
* privacy reasons.
**/
export const removeOriginHeaders: ProxyRequestMiddleware = (proxyReq) => {
proxyReq.setHeader("origin", "");
proxyReq.setHeader("referer", "");
};
@@ -99,6 +99,13 @@ function openaiToAnthropic(body: any, req: Request) {
throw result.error;
}
// Anthropic has started versioning their API, indicated by an HTTP header
// `anthropic-version`. The new June 2023 version is not backwards compatible
// with our OpenAI-to-Anthropic transformations so we need to explicitly
// request the older version for now. 2023-01-01 will be removed in September.
// https://docs.anthropic.com/claude/reference/versioning
req.headers["anthropic-version"] = "2023-01-01";
const { messages, ...rest } = result.data;
const prompt =
result.data.messages
+13 -3
View File
@@ -9,11 +9,13 @@ import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common";
import {
addKey,
blockZoomerOrigins,
createPreprocessorMiddleware,
finalizeBody,
languageFilter,
limitCompletions,
limitOutputTokens,
removeOriginHeaders,
} from "./middleware/request";
import {
createOnProxyResHandler,
@@ -28,13 +30,19 @@ function getModelsResponse() {
return modelsCache;
}
// https://platform.openai.com/docs/models/overview
const gptVariants = [
"gpt-4",
"gpt-4-0314",
"gpt-4-0613",
"gpt-4-0314", // EOL 2023-09-13
"gpt-4-32k",
"gpt-4-32k-0314",
"gpt-4-32k-0613",
"gpt-4-32k-0314", // EOL 2023-09-13
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301",
"gpt-3.5-turbo-0301", // EOL 2023-09-13
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
];
const gpt4Available = keyPool.list().filter((key) => {
@@ -87,6 +95,8 @@ const rewriteRequest = (
languageFilter,
limitOutputTokens,
limitCompletions,
blockZoomerOrigins,
removeOriginHeaders,
finalizeBody,
];
+2 -2
View File
@@ -197,8 +197,8 @@ async function setBuildInfo() {
logger.error(
{
error,
stdout: error.stdout.toString(),
stderr: error.stderr.toString(),
stdout: error.stdout?.toString(),
stderr: error.stderr?.toString(),
},
"Failed to get commit SHA.",
error