adds quick scale keyprovider

fixes wrong/misleading error msg when no Turbo keys available
requests old version of Anthropic API due to breaking SSE changes
2023-07-05 22:11:25 -05:00 · 2023-07-04 11:49:12 -05:00 · 2023-06-24 14:50:48 -05:00 · 2023-06-24 14:25:30 -05:00 · 2023-06-24 14:25:01 -05:00 · 2023-06-23 00:08:09 -05:00
12 changed files with 261 additions and 14 deletions
@@ -18,6 +18,8 @@ type Config = {
  openaiKey?: string;
  /** Comma-delimited list of Anthropic API keys. */
  anthropicKey?: string;
+  scaleKey?: string;
+  scaleMinDeployments: number;
  /**
   * The proxy key to require for requests. Only applicable if the user
   * management mode is set to 'proxy_key', and required if so.
@@ -26,7 +28,7 @@ type Config = {
  /**
   * The admin key used to access the /admin API. Required if the user
   * management mode is set to 'user_token'.
-   **/
+   */
  adminKey?: string;
  /**
   * Which user management mode to use.
@@ -49,7 +51,7 @@ type Config = {
   *
   * `firebase_rtdb`: Users are stored in a Firebase Realtime Database; requires
   *  `firebaseKey` and `firebaseRtdbUrl` to be set.
-   **/
+   */
  gatekeeperStore: "memory" | "firebase_rtdb";
  /** URL of the Firebase Realtime Database if using the Firebase RTDB store. */
  firebaseRtdbUrl?: string;
@@ -127,6 +129,8 @@ export const config: Config = {
  port: getEnvWithDefault("PORT", 7860),
  openaiKey: getEnvWithDefault("OPENAI_KEY", ""),
  anthropicKey: getEnvWithDefault("ANTHROPIC_KEY", ""),
+  scaleKey: getEnvWithDefault("SCALE_KEY", ""),
+  scaleMinDeployments: getEnvWithDefault("SCALE_MIN_DEPLOYMENTS", 0),
  proxyKey: getEnvWithDefault("PROXY_KEY", ""),
  adminKey: getEnvWithDefault("ADMIN_KEY", ""),
  gatekeeper: getEnvWithDefault("GATEKEEPER", "none"),
@@ -262,6 +266,7 @@ export const OMITTED_KEYS: (keyof Config)[] = [
  "logLevel",
  "openaiKey",
  "anthropicKey",
+  "scaleKey",
  "proxyKey",
  "adminKey",
  "checkKeys",
@@ -5,7 +5,7 @@ import {
 } from "./anthropic/provider";
 import { KeyPool } from "./key-pool";

-export type AIService = "openai" | "anthropic";
+export type AIService = "openai" | "anthropic" | "scale";
 export type Model = OpenAIModel | AnthropicModel;

 export interface Key {
@@ -128,8 +128,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
    );
    if (availableKeys.length === 0) {
      let message = needGpt4
-        ? "No active OpenAI keys available."
-        : "No GPT-4 keys available.  Try selecting a non-GPT-4 model.";
+        ? "No GPT-4 keys available.  Try selecting a non-GPT-4 model."
+        : "No active OpenAI keys available.";
      throw new Error(message);
    }

@@ -0,0 +1,155 @@
+import crypto from "crypto";
+import { Key, KeyProvider } from "..";
+import { config } from "../../config";
+import { logger } from "../../logger";
+
+export interface ScaleDeployment extends Key {
+  readonly service: "scale";
+  deploymentUrl: string;
+  createdAt: number;
+}
+
+/*
+Scale is a bit different from the other providers. It doesn't have set API keys;
+instead there are "deployments", which are created in the Scale dashboard and
+are accessible via a URL and API key together.
+
+The operator can provide these accounts via the SCALE_KEY environment variable,
+but more likely they will want the proxy to just automatically create new
+accounts and deployments as older ones reach their usage limits.
+*/
+
+export class ScaleKeyProvider implements KeyProvider<ScaleDeployment> {
+  readonly service = "scale";
+
+  private deployments: ScaleDeployment[] = [];
+  private log = logger.child({ module: "key-provider", service: this.service });
+  private churnerEnabled = false;
+
+  constructor() {
+    const keyConfig = config.scaleKey?.trim();
+    if (!keyConfig) return;
+    let initialKeys: string[];
+    initialKeys = [...new Set(keyConfig.split(",").map((k) => k.trim()))];
+    for (const keyStr of initialKeys) {
+      const [key, deploymentUrl] = keyStr.split("$");
+      const newDeployment: ScaleDeployment = {
+        key,
+        deploymentUrl,
+        service: this.service,
+        isGpt4: false,
+        isTrial: false,
+        isDisabled: false,
+        promptCount: 0,
+        lastUsed: 0,
+        createdAt: Date.now(),
+        hash: `sca-${crypto
+          .createHash("sha256")
+          .update(keyStr)
+          .digest("hex")
+          .slice(0, 8)}`,
+        lastChecked: 0,
+      };
+      this.deployments.push(newDeployment);
+    }
+    this.log.info(
+      { keyCount: this.deployments.length },
+      "Loaded initial Scale deployments"
+    );
+  }
+
+  public init() {
+    // TODO: Start account churner
+    this.churnerEnabled = true;
+  }
+
+  public list() {
+    return this.deployments.map((k) => Object.freeze({ ...k, key: undefined }));
+  }
+
+  public get(_model: unknown) {
+    // Scale doesn't support changing models on the fly
+    const availableDeployments = this.deployments.filter((a) => !a.isDisabled);
+    const canCreateNewAccounts = config.scaleMinDeployments > 0;
+    if (availableDeployments.length === 0) {
+      if (canCreateNewAccounts) {
+        this.log.warn(
+          "Ran out of Scale deployments and the churner is not creating new ones fast enough."
+        );
+        throw new Error(
+          "No Scale deployments available. Try again in a few minutes when the churner has created new deployments."
+        );
+      } else {
+        throw new Error(
+          "No Scale deployments available and account churner is disabled (possible IP ban or signup rate limit)."
+        );
+      }
+    }
+
+    // Unlike other providers, Scale doesn't want to rotate keys. Instead, we
+    // want to use the same key for as long as possible while building up a
+    // reserve of new accounts. Once an account dies there should be a fresh
+    // one ready to go.
+
+    const now = Date.now();
+
+    const deploymentsByPriority = availableDeployments.sort((a, b) => {
+      return a.createdAt - b.createdAt;
+    });
+
+    const selectedKey = deploymentsByPriority[0];
+    selectedKey.lastUsed = now;
+    return { ...selectedKey };
+  }
+
+  public disable(deployment: ScaleDeployment) {
+    const deploymentFromPool = this.deployments.find(
+      (d) => d.hash === deployment.hash
+    );
+    if (!deploymentFromPool || deploymentFromPool.isDisabled) return;
+    deploymentFromPool.isDisabled = true;
+    this.log.warn({ key: deployment.hash }, "Scale deployment disabled");
+  }
+
+  public update(hash: string, update: Partial<ScaleDeployment>) {
+    const deploymentFromPool = this.deployments.find((d) => d.hash === hash)!;
+    Object.assign(deploymentFromPool, update);
+  }
+
+  public available() {
+    return this.deployments.filter((k) => !k.isDisabled).length;
+  }
+
+  // Normally this would return the number of unchecked keys but we will
+  // repurpose it to return the number of pending accounts the churner is
+  // creating.
+  public anyUnchecked() {
+    return config.scaleMinDeployments - this.available() > 0;
+  }
+
+  public incrementPrompt(hash?: string) {
+    const deployment = this.deployments.find((d) => d.hash === hash);
+    if (!deployment) return;
+    deployment.promptCount++;
+  }
+
+  public getLockoutPeriod(_model: unknown) {
+    // TODO: Scale doesn't have rate limits but this may need to be repurposed
+    // to lock out the request queue if the account churner enabled but falling
+    // behind.
+    return 0;
+  }
+
+  public markRateLimited(keyHash: string) {
+    // Do nothing
+  }
+
+  /** Doesn't really mean anything for Scale */
+  public remainingQuota() {
+    return 1;
+  }
+
+  public usageInUsd() {
+    return "$0.00 / ∞";
+  }
+}
@@ -9,10 +9,12 @@ import { handleProxyError } from "./middleware/common";
 import {
  addKey,
  addAnthropicPreamble,
+  blockZoomerOrigins,
  createPreprocessorMiddleware,
  finalizeBody,
  languageFilter,
  limitOutputTokens,
+  removeOriginHeaders,
 } from "./middleware/request";
 import {
  ProxyResHandlerWithBody,
@@ -73,6 +75,8 @@ const rewriteAnthropicRequest = (
    addAnthropicPreamble,
    languageFilter,
    limitOutputTokens,
+    blockZoomerOrigins,
+    removeOriginHeaders,
    finalizeBody,
  ];

@@ -2,7 +2,6 @@ import { Request, Response } from "express";
 import httpProxy from "http-proxy";
 import { ZodError } from "zod";

-
 const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
 const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";

@@ -32,9 +31,14 @@ export function writeErrorResponse(
    res.headersSent ||
    res.getHeader("content-type") === "text/event-stream"
  ) {
+    const errorContent =
+      statusCode === 403
+        ? JSON.stringify(errorPayload)
+        : JSON.stringify(errorPayload, null, 2);
+
    const msg = buildFakeSseMessage(
      `${errorSource} error (${statusCode})`,
-      JSON.stringify(errorPayload, null, 2),
+      errorContent,
      req
    );
    res.write(msg);
@@ -57,6 +61,7 @@ export const handleInternalError = (
 ) => {
  try {
    const isZod = err instanceof ZodError;
+    const isForbidden = err.name === "ForbiddenError";
    if (isZod) {
      writeErrorResponse(req, res, 400, {
        error: {
@@ -67,6 +72,17 @@ export const handleInternalError = (
          message: err.message,
        },
      });
+    } else if (isForbidden) {
+      // Spoofs a vaguely threatening OpenAI error message. Only invoked by the
+      // block-zoomers rewriter to scare off tiktokers.
+      writeErrorResponse(req, res, 403, {
+        error: {
+          type: "organization_account_disabled",
+          code: "policy_violation",
+          param: null,
+          message: err.message,
+        },
+      });
    } else {
      writeErrorResponse(req, res, 500, {
        error: {
@@ -91,10 +107,14 @@ export function buildFakeSseMessage(
  req: Request
 ) {
  let fakeEvent;
+  const useBackticks = !type.includes("403");
+  const msgContent = useBackticks
+    ? `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`
+    : `[${type}: ${string}]`;

  if (req.inboundApi === "anthropic") {
    fakeEvent = {
-      completion: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`,
+      completion: msgContent,
      stop_reason: type,
      truncated: false, // I've never seen this be true
      stop: null,
@@ -109,7 +129,7 @@ export function buildFakeSseMessage(
      model: req.body?.model,
      choices: [
        {
-          delta: { content: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n` },
+          delta: { content: msgContent },
          index: 0,
          finish_reason: type,
        },
@@ -0,0 +1,34 @@
+import { isCompletionRequest } from "../common";
+import { ProxyRequestMiddleware } from ".";
+
+const DISALLOWED_ORIGIN_SUBSTRINGS = "janitorai.com,janitor.ai".split(",");
+
+class ForbiddenError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "ForbiddenError";
+  }
+}
+
+/**
+ * Blocks requests from Janitor AI users with a fake, scary error message so I
+ * stop getting emails asking for tech support.
+ */
+export const blockZoomerOrigins: ProxyRequestMiddleware = (_proxyReq, req) => {
+  if (!isCompletionRequest(req)) {
+    return;
+  }
+
+  const origin = req.headers.origin || req.headers.referer;
+  if (origin && DISALLOWED_ORIGIN_SUBSTRINGS.some((s) => origin.includes(s))) {
+    // Venus-derivatives send a test prompt to check if the proxy is working.
+    // We don't want to block that just yet.
+    if (req.body.messages[0]?.content === "Just say TEST") {
+      return;
+    }
+
+    throw new ForbiddenError(
+      `Your access was terminated due to violation of our policies, please check your email for more information. If you believe this is in error and would like to appeal, please contact us through our help center at help.openai.com.`
+    );
+  }
+};
@@ -10,10 +10,12 @@ export { transformOutboundPayload } from "./transform-outbound-payload";
 // HPM middleware (runs on onProxyReq, cannot be async)
 export { addKey } from "./add-key";
 export { addAnthropicPreamble } from "./add-anthropic-preamble";
+export { blockZoomerOrigins } from "./block-zoomer-origins";
 export { finalizeBody } from "./finalize-body";
 export { languageFilter } from "./language-filter";
 export { limitCompletions } from "./limit-completions";
 export { limitOutputTokens } from "./limit-output-tokens";
+export { removeOriginHeaders } from "./remove-origin-headers";
 export { transformKoboldPayload } from "./transform-kobold-payload";

 /**
@@ -0,0 +1,10 @@
+import { ProxyRequestMiddleware } from ".";
+
+/**
+ * Removes origin and referer headers before sending the request to the API for
+ * privacy reasons.
+ **/
+export const removeOriginHeaders: ProxyRequestMiddleware = (proxyReq) => {
+  proxyReq.setHeader("origin", "");
+  proxyReq.setHeader("referer", "");
+};
@@ -99,6 +99,13 @@ function openaiToAnthropic(body: any, req: Request) {
    throw result.error;
  }

+  // Anthropic has started versioning their API, indicated by an HTTP header
+  // `anthropic-version`. The new June 2023 version is not backwards compatible
+  // with our OpenAI-to-Anthropic transformations so we need to explicitly
+  // request the older version for now. 2023-01-01 will be removed in September.
+  // https://docs.anthropic.com/claude/reference/versioning
+  req.headers["anthropic-version"] = "2023-01-01";
+
  const { messages, ...rest } = result.data;
  const prompt =
    result.data.messages
@@ -9,11 +9,13 @@ import { ipLimiter } from "./rate-limit";
 import { handleProxyError } from "./middleware/common";
 import {
  addKey,
+  blockZoomerOrigins,
  createPreprocessorMiddleware,
  finalizeBody,
  languageFilter,
  limitCompletions,
  limitOutputTokens,
+  removeOriginHeaders,
 } from "./middleware/request";
 import {
  createOnProxyResHandler,
@@ -28,13 +30,19 @@ function getModelsResponse() {
    return modelsCache;
  }

+  // https://platform.openai.com/docs/models/overview
  const gptVariants = [
    "gpt-4",
-    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-0314", // EOL 2023-09-13
    "gpt-4-32k",
-    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-4-32k-0314", // EOL 2023-09-13
    "gpt-3.5-turbo",
-    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0301", // EOL 2023-09-13
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-16k-0613",
  ];

  const gpt4Available = keyPool.list().filter((key) => {
@@ -87,6 +95,8 @@ const rewriteRequest = (
    languageFilter,
    limitOutputTokens,
    limitCompletions,
+    blockZoomerOrigins,
+    removeOriginHeaders,
    finalizeBody,
  ];

@@ -197,8 +197,8 @@ async function setBuildInfo() {
    logger.error(
      {
        error,
-        stdout: error.stdout.toString(),
-        stderr: error.stderr.toString(),
+        stdout: error.stdout?.toString(),
+        stderr: error.stderr?.toString(),
      },
      "Failed to get commit SHA.",
      error
Author	SHA1	Message	Date
nai-degen	59141813d9	adds quick scale keyprovider	2023-07-05 22:11:25 -05:00
nai-degen	327e860967	fixes wrong/misleading error msg when no Turbo keys available	2023-07-04 11:49:12 -05:00
nai-degen	6598b4df0d	requests old version of Anthropic API due to breaking SSE changes	2023-06-24 14:50:48 -05:00
nai-degen	6a7f64b037	adds missed change from origin header adjustment	2023-06-24 14:25:30 -05:00
nai-degen	c8b3238398	reorganizes origin header middleware	2023-06-24 14:25:01 -05:00
nai-degen	602931bf7f	removes origin/referer headers from proxied request	2023-06-23 00:08:09 -05:00
nai-degen	db034a51b3	prevents crash on startup when git is not installed	2023-06-21 01:24:41 -05:00
khanon	43359779e7	Implements more robust anti-zoomer functionality (khanon/oai-reverse-proxy!24 )	2023-06-14 04:05:51 +00:00
nai-degen	c0ac69df27	adjusts default origin block	2023-06-13 21:18:31 -05:00
nai-degen	3a2a6e96fd	adds new OpenAI June 2023 models	2023-06-13 16:24:34 -05:00