fixes typecheck issue after rebasing

fixes stupid operator precedence mistake
cleanup
2023-07-19 11:21:53 -05:00 · 2023-07-19 11:21:05 -05:00 · 2023-07-19 11:21:05 -05:00 · 2023-07-19 11:21:05 -05:00 · 2023-07-19 11:21:05 -05:00 · 2023-07-19 11:21:05 -05:00
35 changed files with 1525 additions and 160 deletions
@@ -1,6 +1,7 @@
 .env
 .venv
 .vscode
+.venv
 build
 greeting.md
 node_modules
@@ -40,3 +40,5 @@ To run the proxy locally for development or testing, install Node.js >= 18.0.0 a
 4. Start the server in development mode with `npm run start:dev`.

 You can also use `npm run start:dev:tsc` to enable project-wide type checking at the cost of slower startup times. `npm run type-check` can be used to run type checking without starting the server.
+
+See the [Optional Dependencies](./docs/optional-dependencies.md) page for information on how to install the optional Claude tokenizer locally.
@@ -0,0 +1,45 @@
+# Switched to alpine both for smaller image size and because zeromq.js provides
+# a working prebuilt binary for alpine. On Debian, the prebuild was not working
+# and a bug in libzmq's makefile was causing the build from source to fail.
+# https://github.com/zeromq/zeromq.js/issues/529#issuecomment-1370721089
+FROM node:18-alpine as builder
+
+# Install general build dependencies
+RUN apk add --no-cache autoconf automake g++ libtool zeromq-dev python3 \
+    py3-pip git curl cmake gcc musl-dev pkgconfig openssl-dev
+  
+# Install Rust (required to build huggingface/tokenizers)
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+RUN git clone -b tokenize https://gitgud.io/khanon/oai-reverse-proxy.git /app
+WORKDIR /app
+
+RUN npm ci
+
+RUN npm run build && \
+    npm prune --production
+
+FROM node:18-alpine as runner
+
+RUN apk add --no-cache \
+  zeromq-dev \
+  python3
+
+COPY --from=builder /app/build /app/build
+COPY --from=builder /app/node_modules /app/node_modules
+COPY --from=builder /app/.venv /app/.venv
+COPY --from=builder /app/package.json /app/package.json
+
+WORKDIR /app
+RUN . .venv/bin/activate
+
+EXPOSE 7860
+
+ENV NODE_ENV=production
+
+# TODO: stamp with tag and git commit
+ENV RENDER=true
+ENV RENDER_GIT_COMMIT=ci-test
+
+CMD [ "npm", "start" ]
@@ -1,9 +1,10 @@
-FROM node:18-bullseye-slim
+FROM node:18-bullseye
 RUN apt-get update && \
-    apt-get install -y git
+    apt-get install -y git python3 python3-pip libzmq3-dev curl cmake g++ libsodium-dev pkg-config
 RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
 WORKDIR /app
-RUN npm install
+RUN pip3 install --no-cache-dir -r requirements.txt
+RUN npm ci --loglevel=verbose
 COPY Dockerfile greeting.md* .env* ./
 RUN npm run build
 EXPOSE 7860
@@ -0,0 +1,35 @@
+# Optional Dependencies
+## Claude tokenizer
+As Anthropic does not ship a NodeJS tokenizer, the server includes a small Python script that runs alongside the proxy to tokenize Claude requests. It is automatically started when the server is launched, but requires additional dependencies to be installed. If these dependencies are not installed, the server will not be able to accurately count the number of tokens in Claude requests but will still function normally otherwise.
+
+Note: On Windows, a Windows Firewall prompt may appear when the Claude tokenizer is started. This is normal and is caused by the Python process attempting to open a socket to communicate with the NodeJS server. You can safely allow the connection.
+
+### Automatic installation (local development)
+This will create a venv and install the required dependencies. You still need to activate the venv when running the server, and you must have Python >= 3.8.0 installed.
+1. Install Python >= 3.8.0
+2. Run `npm install`, which should automatically create a venv and install the required dependencies.
+3. Activate the virtual environment with `source .venv/bin/activate` (Linux/Mac) or `.\.venv\Scripts\activate` (PowerShell/Windows)
+    - **This step is required every time you start the server from a new terminal.**
+
+### Manual installation (local development)
+1. Install Python >= 3.8.0
+2. Create a virtual environment using `python -m .venv venv`
+3. Activate the virtual environment with `source .venv/bin/activate` (Linux/Mac) or `.\.venv\Scripts\activate` (PowerShell/Windows)
+    - **This step is required every time you start the server from a new terminal.**
+4. Install dependencies with `pip install -r requirements.txt`
+5. Provided you have the virtual environment activated, the server will automatically start the tokenizer when it is launched.
+
+### Docker (production deployment)
+Refer to the reference Dockerfiles for examples on how to install the tokenizer. The Huggingface and Render Dockerfiles both include the tokenizer.
+
+Generally, you will need libzmq3-dev, cmake, g++, and Python >= 3.8.0 installed. The postinstall script will automatically install the required Python dependencies.
+
+### Troubleshooting
+Ensure that:
+- Python >= 3.8 is installed and in your PATH
+- Python dependencies are installed (re-run `npm install`)
+- Python venv is activated (see above)
+- zeromq optional dependency installed successfully
+  - This should generally be installed automatically.
+  - On Windows, you may need to install MS C++ Build Tools or set msvs_version (eg `npm config set msvs_version 2019`), then re-run npm install.
+  - On Linux, ensure you have the appropriate build tools and headers installed for your distribution; refer to the reference Dockerfiles for examples.
@@ -0,0 +1,47 @@
+const esbuild = require("esbuild");
+const fs = require("fs");
+const { copy } = require("esbuild-plugin-copy");
+
+const buildDir = "build";
+
+const config = {
+  entryPoints: ["src/server.ts"],
+  bundle: true,
+  outfile: `${buildDir}/server.js`,
+  platform: "node",
+  target: "es2020",
+  format: "cjs",
+  sourcemap: true,
+  external: ["fs", "path", "zeromq", "tiktoken"],
+  plugins: [
+    copy({
+      resolveFrom: "cwd",
+      assets: {
+        from: ["src/tokenization/*.py"],
+        to: [`${buildDir}/tokenization`],
+      },
+    }),
+  ],
+};
+
+function createBundler() {
+  return {
+    build: async () => esbuild.build(config),
+    watch: async () => {
+      const watchConfig = { ...config, logLevel: "info" };
+      const ctx = await esbuild.context(watchConfig);
+      ctx.watch();
+    },
+  };
+}
+
+(async () => {
+  fs.rmSync(buildDir, { recursive: true, force: true });
+  const isDev = process.argv.includes("--dev");
+  const bundler = createBundler();
+  if (isDev) {
+    await bundler.watch();
+  } else {
+    await bundler.build();
+  }
+})();
@@ -3,12 +3,13 @@
  "version": "1.0.0",
  "description": "Reverse proxy for the OpenAI API",
  "scripts": {
-    "build:watch": "esbuild src/server.ts --outfile=build/server.js --platform=node --target=es2020 --format=cjs --bundle --sourcemap --watch",
-    "build": "tsc",
-    "start:dev": "concurrently \"npm run build:watch\" \"npm run start:watch\"",
-    "start:dev:tsc": "nodemon --watch src --exec ts-node --transpile-only src/server.ts",
-    "start:watch": "nodemon --require source-map-support/register build/server.js",
+    "build:dev": "node esbuild.js --dev",
+    "build": "node esbuild.js",
+    "postinstall": "node scripts/install-python-deps.js",
+    "start:dev:tsc": "nodemon --watch src --exec ts-node src/server.ts",
+    "start:dev": "concurrently \"npm run build:dev\" \"npm run start:watch\"",
    "start:replit": "tsc && node build/server.js",
+    "start:watch": "nodemon --require source-map-support/register build/server.js",
    "start": "node build/server.js",
    "type-check": "tsc --noEmit"
  },
@@ -22,13 +23,14 @@
    "cors": "^2.8.5",
    "dotenv": "^16.0.3",
    "express": "^4.18.2",
-    "firebase-admin": "^11.8.0",
+    "firebase-admin": "^11.9.0",
    "googleapis": "^117.0.0",
    "http-proxy-middleware": "^3.0.0-beta.1",
    "openai": "^3.2.1",
    "pino": "^8.11.0",
    "pino-http": "^8.3.3",
    "showdown": "^2.1.0",
+    "tiktoken": "^1.0.7",
    "uuid": "^9.0.0",
    "zlib": "^1.0.5",
    "zod": "^3.21.4"
@@ -38,12 +40,22 @@
    "@types/express": "^4.17.17",
    "@types/showdown": "^2.0.0",
    "@types/uuid": "^9.0.1",
+    "@types/zeromq": "^5.2.2",
    "concurrently": "^8.0.1",
    "esbuild": "^0.17.16",
+    "esbuild-node-externals": "^1.7.0",
+    "esbuild-plugin-copy": "^2.1.1",
    "esbuild-register": "^3.4.2",
    "nodemon": "^2.0.22",
    "source-map-support": "^0.5.21",
    "ts-node": "^10.9.1",
    "typescript": "^5.0.4"
+  },
+  "overrides": {
+    "optionator": "^0.9.3",
+    "semver": "^7.5.3"
+  },
+  "optionalDependencies": {
+    "zeromq": "^6.0.0-beta.16"
  }
 }
@@ -0,0 +1,2 @@
+pyzmq==25.1.0
+anthropic==0.2.9
@@ -0,0 +1,68 @@
+const fs = require("fs");
+const spawn = require("child_process").spawn;
+
+const IS_WINDOWS = process.platform === "win32";
+const IS_DEV = process.env.NODE_ENV !== "production";
+
+const installDeps = async () => {
+  try {
+    console.log("Installing additional optional dependencies...");
+    console.log("Creating venv...");
+    await maybeCreateVenv();
+    console.log("Installing python dependencies...");
+    await installPythonDependencies();
+  } catch (error) {
+    console.error("Error installing additional optional dependencies", error);
+    process.exit(0); // don't fail the build
+  }
+};
+
+installDeps();
+
+async function maybeCreateVenv() {
+  if (!IS_DEV) {
+    console.log("Skipping venv creation in production");
+    return true;
+  }
+  if (fs.existsSync(".venv")) {
+    console.log("Skipping venv creation, already exists");
+    return true;
+  }
+  const python = IS_WINDOWS ? "python" : "python3";
+  await runCommand(`${python} -m venv .venv`);
+  return true;
+}
+
+async function installPythonDependencies() {
+  const commands = [];
+  if (IS_DEV) {
+    commands.push(
+      IS_WINDOWS ? ".venv\\Scripts\\activate.bat" : "source .venv/bin/activate"
+    );
+  }
+  const pip = IS_WINDOWS ? "pip" : "pip3";
+  commands.push(`${pip} install -r requirements.txt`);
+
+  const command = commands.join(" && ");
+  await runCommand(command);
+  return true;
+}
+
+async function runCommand(command) {
+  return new Promise((resolve, reject) => {
+    const child = spawn(command, [], { shell: true });
+    child.stdout.on("data", (data) => {
+      console.log(data.toString());
+    });
+    child.stderr.on("data", (data) => {
+      console.error(data.toString());
+    });
+    child.on("close", (code) => {
+      if (code === 0) {
+        resolve();
+      } else {
+        reject();
+      }
+    });
+  });
+}
@@ -119,6 +119,11 @@ type Config = {
   * Desination URL to redirect blocked requests to, for non-JSON requests.
   */
  blockRedirect?: string;
+  /**
+   * Whether the proxy should disallow requests for GPT-4 models in order to
+   * prevent excessive spend.  Applies only to OpenAI.
+   */
+  turboOnly?: boolean;
 };

 // To change configs, create a file called .env in the root directory.
@@ -162,6 +167,7 @@ export const config: Config = {
    "You must be over the age of majority in your country to use this service."
  ),
  blockRedirect: getEnvWithDefault("BLOCK_REDIRECT", "https://www.9gag.com"),
+  turboOnly: getEnvWithDefault("TURBO_ONLY", false),
 } as const;

 function migrateConfigs() {
@@ -52,7 +52,17 @@ function cacheInfoPageHtml(baseUrl: string) {
  };

  const title = getServerTitle();
-  const headerHtml = buildInfoPageHeader(new showdown.Converter(), title);
+  let headerHtml = buildInfoPageHeader(new showdown.Converter(), title);
+
+  if (process.env.MISSING_PYTHON_WARNING) {
+    headerHtml +=
+      `<p style="color: red;">Python is not installed; the Claude tokenizer ` +
+      `cannot start. Your Dockerfile may be out of date; see <a ` +
+      `href="https://gitgud.io/khanon/oai-reverse-proxy">the docs</a> for an ` +
+      `updated Huggingface Dockerfile.</p><p>You can disable this warning by ` +
+      `setting <code>DISABLE_MISSING_PYTHON_WARNING=true</code> in your ` +
+      `environment.</p>`;
+  }

  const pageBody = `<!DOCTYPE html>
 <html lang="en">
@@ -89,7 +99,7 @@ type ServiceInfo = {
 function getOpenAIInfo() {
  const info: { [model: string]: Partial<ServiceInfo> } = {};
  const keys = keyPool.list().filter((k) => k.service === "openai");
-  const hasGpt4 = keys.some((k) => k.isGpt4);
+  const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;

  if (keyPool.anyUnchecked()) {
    const uncheckedKeys = keys.filter((k) => !k.lastChecked);
@@ -137,6 +147,9 @@ function getOpenAIInfo() {
  } else {
    info.status = "Key checking is disabled." as any;
    info.turbo = { activeKeys: keys.filter((k) => !k.isDisabled).length };
+    info.gpt4 = {
+      activeKeys: keys.filter((k) => !k.isDisabled && k.isGpt4).length,
+    };
  }

  if (config.queueMode !== "none") {
@@ -190,14 +203,14 @@ Logs are anonymous and do not contain IP addresses or timestamps. [You can see t
  }

  if (config.queueMode !== "none") {
-    const waits = [];
+    const waits: string[] = [];
    infoBody += `\n## Estimated Wait Times\nIf the AI is busy, your prompt will processed when a slot frees up.`;

    if (config.openaiKey) {
      const turboWait = getQueueInformation("turbo").estimatedQueueTime;
      const gpt4Wait = getQueueInformation("gpt-4").estimatedQueueTime;
      waits.push(`**Turbo:** ${turboWait}`);
-      if (keyPool.list().some((k) => k.isGpt4)) {
+      if (keyPool.list().some((k) => k.isGpt4) && !config.turboOnly) {
        waits.push(`**GPT-4:** ${gpt4Wait}`);
      }
    }
@@ -3,11 +3,13 @@ import { Key, KeyProvider } from "..";
 import { config } from "../../config";
 import { logger } from "../../logger";

+// https://docs.anthropic.com/claude/reference/selecting-a-model
 export const ANTHROPIC_SUPPORTED_MODELS = [
  "claude-instant-v1",
  "claude-instant-v1-100k",
  "claude-v1",
  "claude-v1-100k",
+  "claude-2",
 ] as const;
 export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];

@@ -38,10 +40,16 @@ export interface AnthropicKey extends Key {
 }

 /**
- * We don't get rate limit headers from Anthropic so if we get a 429, we just
- * lock out the key for a few seconds
+ * Upon being rate limited, a key will be locked out for this many milliseconds
+ * while we wait for other concurrent requests to finish.
 */
-const RATE_LIMIT_LOCKOUT = 5000;
+const RATE_LIMIT_LOCKOUT = 2000;
+/**
+ * Upon assigning a key, we will wait this many milliseconds before allowing it
+ * to be used again. This is to prevent the queue from flooding a key with too
+ * many requests while we wait to learn whether previous ones succeeded.
+ */
+const KEY_REUSE_DELAY = 500;

 export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
  readonly service = "anthropic";
@@ -127,7 +135,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
    // Intended to throttle the queue processor as otherwise it will just
    // flood the API with requests and we want to wait a sec to see if we're
    // going to get a rate limit error on this key.
-    selectedKey.rateLimitedUntil = now + 1000;
+    selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
    return { ...selectedKey };
  }

@@ -181,15 +189,9 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
  /**
   * This is called when we receive a 429, which means there are already five
   * concurrent requests running on this key. We don't have any information on
-   * when these requests will resolve so all we can do is wait a bit and try
-   * again.
-   * We will lock the key for 10 seconds, which should let a few of the other
-   * generations finish. This is an arbitrary number but the goal is to balance
-   * between not hammering the API with requests and not locking out a key that
-   * is actually available.
-   * TODO; Try to assign requests to slots on each key so we have an idea of how
-   * long each slot has been running and can make a more informed decision on
-   * how long to lock the key.
+   * when these requests will resolve, so all we can do is wait a bit and try
+   * again. We will lock the key for 2 seconds after getting a 429 before
+   * retrying in order to give the other requests a chance to finish.
   */
  public markRateLimited(keyHash: string) {
    this.log.warn({ key: keyHash }, "Key rate limited");
@@ -221,6 +221,13 @@ export class OpenAIKeyChecker {
          "Key is out of quota. Disabling key."
        );
        this.updateKey(key.hash, { isDisabled: true });
+      }
+      else if (status === 429 && data.error.type === "access_terminated") {
+        this.log.warn(
+          { key: key.hash, isTrial: key.isTrial, error: data },
+          "Key has been terminated due to policy violations. Disabling key."
+        );
+        this.updateKey(key.hash, { isDisabled: true });
      } else {
        this.log.error(
          { key: key.hash, status, error: data },
@@ -77,7 +77,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
      const newKey = {
        key: k,
        service: "openai" as const,
-        isGpt4: false,
+        isGpt4: true,
        isTrial: false,
        isDisabled: false,
        softLimit: 0,
@@ -128,11 +128,17 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
    );
    if (availableKeys.length === 0) {
      let message = needGpt4
-        ? "No GPT-4 keys available.  Try selecting a non-GPT-4 model."
+        ? "No GPT-4 keys available.  Try selecting a Turbo model."
        : "No active OpenAI keys available.";
      throw new Error(message);
    }

+    if (needGpt4 && config.turboOnly) {
+      throw new Error(
+        "Proxy operator has disabled GPT-4 to reduce quota usage.  Try selecting a Turbo model."
+      );
+    }
+
    // Select a key, from highest priority to lowest priority:
    // 1. Keys which are not rate limited
    //    a. We ignore rate limits from over a minute ago
@@ -256,9 +256,9 @@ export const appendBatch = async (batch: PromptLogEntry[]) => {
    return [
      entry.model,
      entry.endpoint,
-      entry.promptRaw,
-      entry.promptFlattened,
-      entry.response,
+      entry.promptRaw.slice(0, 50000),
+      entry.promptFlattened.slice(0, 50000),
+      entry.response.slice(0, 50000),
    ];
  });
  log.info({ sheetName, rowCount: newRows.length }, "Appending log batch.");
@@ -43,6 +43,8 @@ const getModelsResponse = () => {
    "claude-instant-v1.1",
    "claude-instant-v1.1-100k",
    "claude-instant-v1.0",
+    "claude-2", // claude-2 is 100k by default it seems
+    "claude-2.0",
  ];

  const models = claudeVariants.map((id) => ({
@@ -106,10 +108,16 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
    body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`;
  }

-  if (!req.originalUrl.includes("/v1/complete")) {
+  if (req.inboundApi === "openai") {
    req.log.info("Transforming Anthropic response to OpenAI format");
    body = transformAnthropicResponse(body);
  }
+
+  // TODO: Remove once tokenization is stable
+  if (req.debug) {
+    body.proxy_tokenizer_debug_info = req.debug;
+  }
+
  res.status(200).json(body);
 };

@@ -33,7 +33,7 @@ export const gatekeeper: RequestHandler = (req, res, next) => {
  // TODO: Generate anonymous users based on IP address for public or proxy_key
  // modes so that all middleware can assume a user of some sort is present.

-  if (token === ADMIN_KEY) {
+  if (ADMIN_KEY && token === ADMIN_KEY) {
    return next();
  }

@@ -45,6 +45,9 @@ export function writeErrorResponse(
    res.write(`data: [DONE]\n\n`);
    res.end();
  } else {
+    if (req.debug) {
+      errorPayload.error.proxy_tokenizer_debug_info = req.debug;
+    }
    res.status(statusCode).json(errorPayload);
  }
 }
@@ -57,7 +60,8 @@ export const handleProxyError: httpProxy.ErrorCallback = (err, req, res) => {
 export const handleInternalError = (
  err: Error,
  req: Request,
-  res: Response
+  res: Response,
+  errorType: string = "proxy_internal_error"
 ) => {
  try {
    const isZod = err instanceof ZodError;
@@ -86,7 +90,7 @@ export const handleInternalError = (
    } else {
      writeErrorResponse(req, res, 500, {
        error: {
-          type: "proxy_rewriter_error",
+          type: errorType,
          proxy_note: `Reverse proxy encountered an error before it could reach the upstream API.`,
          message: err.message,
          stack: err.stack,
@@ -41,8 +41,6 @@ export const addKey: ProxyRequestMiddleware = (proxyReq, req) => {
  // For such cases, ignore the requested model entirely.
  if (req.inboundApi === "openai" && req.outboundApi === "anthropic") {
    req.log.debug("Using an Anthropic key for an OpenAI-compatible request");
-    // We don't assign the model here, that will happen when transforming the
-    // request body.
    assignedKey = keyPool.get("claude-v1");
  } else {
    assignedKey = keyPool.get(req.body.model);
@@ -0,0 +1,36 @@
+import { countTokens } from "../../../tokenization";
+import { RequestPreprocessor } from ".";
+import { openAIMessagesToClaudePrompt } from "./transform-outbound-payload";
+
+export const checkPromptSize: RequestPreprocessor = async (req) => {
+  const prompt =
+    req.inboundApi === "openai" ? req.body.messages : req.body.prompt;
+
+  let result;
+  if (req.outboundApi === "openai") {
+    result = await countTokens({ req, prompt, service: "openai" });
+  } else {
+    // If we're doing OpenAI-to-Anthropic, we need to convert the messages to a
+    // prompt first before counting tokens, as that process affects the token
+    // count.
+    let promptStr =
+      req.inboundApi === "anthropic"
+        ? prompt
+        : openAIMessagesToClaudePrompt(prompt);
+    result = await countTokens({
+      req,
+      prompt: promptStr,
+      service: "anthropic",
+    });
+  }
+
+  req.promptTokens = result.token_count;
+
+  // TODO: Remove once token counting is stable
+  req.log.debug({ result }, "Counted prompt tokens");
+  req.debug = req.debug ?? {};
+  req.debug = {
+    ...req.debug,
+    ...result,
+  };
+};
@@ -4,6 +4,7 @@ import type { ProxyReqCallback } from "http-proxy";

 // Express middleware (runs before http-proxy-middleware, can be async)
 export { createPreprocessorMiddleware } from "./preprocess";
+export { checkPromptSize } from "./check-prompt-size";
 export { setApiFormat } from "./set-api-format";
 export { transformOutboundPayload } from "./transform-outbound-payload";

@@ -1,6 +1,11 @@
 import { RequestHandler } from "express";
 import { handleInternalError } from "../common";
-import { RequestPreprocessor, setApiFormat, transformOutboundPayload } from ".";
+import {
+  RequestPreprocessor,
+  checkPromptSize,
+  setApiFormat,
+  transformOutboundPayload,
+} from ".";

 /**
 * Returns a middleware function that processes the request body into the given
@@ -12,6 +17,7 @@ export const createPreprocessorMiddleware = (
 ): RequestHandler => {
  const preprocessors: RequestPreprocessor[] = [
    setApiFormat(apiFormat),
+    checkPromptSize,
    transformOutboundPayload,
    ...(additionalPreprocessors ?? []),
  ];
@@ -24,7 +30,7 @@ export const createPreprocessorMiddleware = (
      next();
    } catch (error) {
      req.log.error(error, "Error while executing request preprocessor");
-      handleInternalError(error as Error, req, res);
+      handleInternalError(error as Error, req, res, "proxy_preprocessor_error");
    }
  };
 };
@@ -2,7 +2,13 @@ import { Request } from "express";
 import { z } from "zod";
 import { isCompletionRequest } from "../common";
 import { RequestPreprocessor } from ".";
-// import { countTokens } from "../../../tokenization";
+import { OpenAIPromptMessage } from "../../../tokenization/openai";
+
+/**
+ * The maximum number of tokens an Anthropic prompt can have before we switch to
+ * the larger claude-100k context model.
+ */
+const CLAUDE_100K_TOKEN_THRESHOLD = 8200;

 // https://console.anthropic.com/docs/api/reference#-v1-complete
 const AnthropicV1CompleteSchema = z.object({
@@ -55,10 +61,9 @@ const OpenAIV1ChatCompletionSchema = z.object({
 /** Transforms an incoming request body to one that matches the target API. */
 export const transformOutboundPayload: RequestPreprocessor = async (req) => {
  const sameService = req.inboundApi === req.outboundApi;
-  const alreadyTransformed = req.retryCount > 0;
  const notTransformable = !isCompletionRequest(req);

-  if (alreadyTransformed || notTransformable) {
+  if (notTransformable) {
    return;
  }

@@ -69,6 +74,7 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
        ? OpenAIV1ChatCompletionSchema
        : AnthropicV1CompleteSchema;
    const result = validator.safeParse(req.body);
+
    if (!result.success) {
      req.log.error(
        { issues: result.error.issues, body: req.body },
@@ -76,11 +82,14 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
      );
      throw result.error;
    }
+
+    validatePromptSize(req);
    return;
  }

  if (req.inboundApi === "openai" && req.outboundApi === "anthropic") {
    req.body = openaiToAnthropic(req.body, req);
+    validatePromptSize(req);
    return;
  }

@@ -107,45 +116,27 @@ function openaiToAnthropic(body: any, req: Request) {
  req.headers["anthropic-version"] = "2023-01-01";

  const { messages, ...rest } = result.data;
-  const prompt =
-    result.data.messages
-      .map((m) => {
-        let role: string = m.role;
-        if (role === "assistant") {
-          role = "Assistant";
-        } else if (role === "system") {
-          role = "System";
-        } else if (role === "user") {
-          role = "Human";
-        }
-        // https://console.anthropic.com/docs/prompt-design
-        // `name` isn't supported by Anthropic but we can still try to use it.
-        return `\n\n${role}: ${m.name?.trim() ? `(as ${m.name}) ` : ""}${
-          m.content
-        }`;
-      })
-      .join("") + "\n\nAssistant: ";
+  const prompt = openAIMessagesToClaudePrompt(messages);

-  // Claude 1.2 has been selected as the default for smaller prompts because it
-  // is said to be less pozzed than the newer 1.3 model. But this is not based
-  // on any empirical testing, just speculation based on Anthropic stating that
-  // 1.3 is "safer and less susceptible to adversarial attacks" than 1.2.
-  // From my own interactions, both are pretty easy to jailbreak so I don't
-  // think there's much of a difference, honestly.
-
-  // If you want to override the model selection, you can set the
-  // CLAUDE_BIG_MODEL and CLAUDE_SMALL_MODEL environment variables in your
-  // .env file.
-
-  // Using "v1" of a model will automatically select the latest version of that
-  // model on the Anthropic side.
+  // No longer defaulting to `claude-v1.2` because it seems to be in the process
+  // of being deprecated. `claude-v1` is the new default.
+  // If you have keys that can still use `claude-v1.2`, you can set the
+  // CLAUDE_BIG_MODEL and CLAUDE_SMALL_MODEL environment variables in your .env
+  // file.

  const CLAUDE_BIG = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
-  const CLAUDE_SMALL = process.env.CLAUDE_SMALL_MODEL || "claude-v1.2";
+  const CLAUDE_SMALL = process.env.CLAUDE_SMALL_MODEL || "claude-v1";

-  // TODO: Finish implementing tokenizer for more accurate model selection.
-  // This currently uses _character count_, not token count.
-  const model = prompt.length > 25000 ? CLAUDE_BIG : CLAUDE_SMALL;
+  const contextTokens = Number(req.promptTokens ?? 0) + Number(rest.max_tokens);
+  const model =
+    (contextTokens ?? 0) > CLAUDE_100K_TOKEN_THRESHOLD
+      ? CLAUDE_BIG
+      : CLAUDE_SMALL;
+
+  req.log.debug(
+    { contextTokens, model, CLAUDE_100K_TOKEN_THRESHOLD },
+    "Selected Claude model"
+  );

  let stops = rest.stop
    ? Array.isArray(rest.stop)
@@ -168,3 +159,63 @@ function openaiToAnthropic(body: any, req: Request) {
    stop_sequences: stops,
  };
 }
+
+export function openAIMessagesToClaudePrompt(messages: OpenAIPromptMessage[]) {
+  return (
+    messages
+      .map((m) => {
+        let role: string = m.role;
+        if (role === "assistant") {
+          role = "Assistant";
+        } else if (role === "system") {
+          role = "System";
+        } else if (role === "user") {
+          role = "Human";
+        }
+        // https://console.anthropic.com/docs/prompt-design
+        // `name` isn't supported by Anthropic but we can still try to use it.
+        return `\n\n${role}: ${m.name?.trim() ? `(as ${m.name}) ` : ""}${
+          m.content
+        }`;
+      })
+      .join("") + "\n\nAssistant:"
+  );
+}
+
+function validatePromptSize(req: Request) {
+  const promptTokens = req.promptTokens || 0;
+  const model = req.body.model;
+  let maxTokensForModel = 0;
+
+  if (model.match(/gpt-3.5/)) {
+    maxTokensForModel = 4096;
+  } else if (model.match(/gpt-4/)) {
+    maxTokensForModel = 8192;
+  } else if (model.match(/gpt-4-32k/)) {
+    maxTokensForModel = 32768;
+  } else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?(?:-100k)/)) {
+    // Claude models don't throw an error if you exceed the token limit and
+    // instead just become extremely slow and give schizo results, so we will be
+    // more conservative with the token limit for them.
+    maxTokensForModel = 100000 * 0.98;
+  } else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?$/)) {
+    maxTokensForModel = 9000 * 0.98;
+  } else {
+    // I don't trust my regular expressions enough to throw an error here so
+    // we just log a warning and allow 100k tokens.
+    req.log.warn({ model }, "Unknown model, using 100k token limit.");
+    maxTokensForModel = 100000;
+  }
+
+  if (req.debug) {
+    req.debug.calculated_max_tokens = maxTokensForModel;
+  }
+
+  z.number()
+    .max(
+      maxTokensForModel,
+      `Prompt is too long for model ${model} (${promptTokens} tokens, max ${maxTokensForModel})`
+    )
+    .parse(promptTokens);
+  req.log.debug({ promptTokens, maxTokensForModel }, "Prompt size validated");
+}
@@ -377,6 +377,10 @@ function handleOpenAIRateLimitError(
    // Billing quota exceeded (key is dead, disable it)
    keyPool.disable(req.key!);
    errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
+  } else if (type === "access_terminated") {
+    // Account banned (key is dead, disable it)
+    keyPool.disable(req.key!);
+    errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
  } else if (type === "billing_not_active") {
    // Billing is not active (key is dead, disable it)
    keyPool.disable(req.key!);
@@ -125,6 +125,11 @@ const openaiResponseHandler: ProxyResHandlerWithBody = async (
    body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`;
  }

+  // TODO: Remove once tokenization is stable
+  if (req.debug) {
+    body.proxy_tokenizer_debug_info = req.debug;
+  }
+
  res.status(200).json(body);
 };

@@ -12,6 +12,7 @@ import { handleInfoPage } from "./info-page";
 import { logQueue } from "./prompt-logging";
 import { start as startRequestQueue } from "./proxy/queue";
 import { init as initUserStore } from "./proxy/auth/user-store";
+import { init as initTokenizers } from "./tokenization";
 import { checkOrigin } from "./proxy/check-origin";

 const PORT = config.port;
@@ -99,6 +100,8 @@ async function start() {

  keyPool.init();

+  await initTokenizers();
+
  if (config.gatekeeper === "user_token") {
    await initUserStore();
  }
@@ -0,0 +1,160 @@
+import { spawn, ChildProcess } from "child_process";
+import { join } from "path";
+import { logger } from "../logger";
+
+const TOKENIZER_SOCKET = "tcp://localhost:5555";
+const log = logger.child({ module: "claude-ipc" });
+const pythonLog = logger.child({ module: "claude-python" });
+
+let tokenizer: ChildProcess;
+let initialized = false;
+let socket: any; // zeromq.Dealer, not sure how to import it safely as it is optional
+
+export async function init() {
+  log.info("Initializing Claude tokenizer IPC");
+  try {
+    tokenizer = await launchTokenizer();
+    const zmq = await import("zeromq");
+    socket = new zmq.Dealer({ sendTimeout: 500 });
+    socket.connect(TOKENIZER_SOCKET);
+
+    await socket.send(["init"]);
+    const response = await socket.receive();
+    if (response.toString() !== "ok") {
+      throw new Error("Unexpected init response");
+    }
+
+    // Start message pump
+    processMessages();
+
+    // Test tokenizer
+    const result = await requestTokenCount({
+      requestId: "init-test",
+      prompt: "test prompt",
+    });
+    if (result !== 2) {
+      log.error({ result }, "Unexpected test token count");
+      throw new Error("Unexpected test token count");
+    }
+
+    initialized = true;
+  } catch (err) {
+    log.error({ err: err.message }, "Failed to initialize Claude tokenizer");
+    if (process.env.NODE_ENV !== "production") {
+      console.error(
+        `\nClaude tokenizer failed to initialize.\nIf you want to use the tokenizer, see the Optional Dependencies documentation.\n`
+      );
+    }
+    return false;
+  }
+  log.info("Claude tokenizer IPC ready");
+  return true;
+}
+
+const pendingRequests = new Map<
+  string,
+  { resolve: (tokens: number) => void }
+>();
+
+export async function requestTokenCount({
+  requestId,
+  prompt,
+}: {
+  requestId: string;
+  prompt: string;
+}) {
+  if (!socket) {
+    throw new Error("Claude tokenizer is not initialized");
+  }
+
+  log.debug({ requestId, chars: prompt.length }, "Requesting token count");
+  await socket.send(["tokenize", requestId, prompt]);
+
+  log.debug({ requestId }, "Waiting for socket response");
+  return new Promise<number>(async (resolve, reject) => {
+    const resolveFn = (tokens: number) => {
+      log.debug({ requestId, tokens }, "Received token count");
+      pendingRequests.delete(requestId);
+      resolve(tokens);
+    };
+
+    pendingRequests.set(requestId, { resolve: resolveFn });
+
+    const timeout = initialized ? 500 : 10000;
+    setTimeout(() => {
+      if (pendingRequests.has(requestId)) {
+        pendingRequests.delete(requestId);
+        const err = "Tokenizer deadline exceeded";
+        log.warn({ requestId }, err);
+        reject(new Error(err));
+      }
+    }, timeout);
+  });
+}
+
+async function processMessages() {
+  if (!socket) {
+    throw new Error("Claude tokenizer is not initialized");
+  }
+  log.debug("Starting message loop");
+  for await (const [requestId, tokens] of socket) {
+    const request = pendingRequests.get(requestId.toString());
+    if (!request) {
+      log.error({ requestId }, "No pending request found for incoming message");
+      continue;
+    }
+    request.resolve(Number(tokens.toString()));
+  }
+}
+
+async function launchTokenizer() {
+  return new Promise<ChildProcess>((resolve, reject) => {
+    let resolved = false;
+
+    const python = process.platform === "win32" ? "python" : "python3";
+    const proc = spawn(python, [
+      "-u",
+      join(__dirname, "tokenization", "claude-tokenizer.py"),
+    ]);
+    if (!proc) {
+      reject(new Error("Failed to spawn Claude tokenizer"));
+    }
+
+    function cleanup() {
+      socket?.close();
+      socket = undefined!;
+      tokenizer = undefined!;
+    }
+
+    proc.stdout!.on("data", (data) => {
+      pythonLog.info(data.toString().trim());
+    });
+    proc.stderr!.on("data", (data) => {
+      pythonLog.error(data.toString().trim());
+    });
+    proc.on("error", (err) => {
+      pythonLog.error({ err }, "Claude tokenizer error");
+      cleanup();
+      if (!resolved) {
+        resolved = true;
+        reject(err);
+      }
+    });
+    proc.on("close", (code) => {
+      pythonLog.info(`Claude tokenizer exited with code ${code}`);
+      cleanup();
+      if (code !== 0 && !resolved) {
+        resolved = true;
+        reject(new Error("Claude tokenizer exited immediately"));
+      }
+    });
+
+    // Wait a moment to catch any immediate errors (missing imports, etc)
+    setTimeout(() => {
+      if (!resolved) {
+        resolved = true;
+        resolve(proc);
+      }
+    }, 200);
+  });
+}
@@ -0,0 +1,54 @@
+""" 
+This is a small process running alongside the main NodeJS server intended to
+tokenize prompts for Claude, as currently Anthropic only ships a Python
+implemetnation for their tokenizer.
+ZeroMQ is used for IPC between the NodeJS server and this process.
+"""
+import zmq
+import anthropic
+
+def create_socket():
+    context = zmq.Context()
+    socket = context.socket(zmq.ROUTER)
+    socket.bind("tcp://*:5555")
+    return context, socket
+
+def init(socket):
+    print("claude-tokenizer.py: starting")
+    try:
+        while True:
+            message = socket.recv_multipart()
+            routing_id, command = message
+            if command == b"init":
+                print("claude-tokenizer.py: initialized")
+                socket.send_multipart([routing_id, b"ok"])
+                break
+    except Exception as e:
+        print("claude-tokenizer.py: failed to initialize")
+        return
+
+    message_processor(socket)
+
+def message_processor(socket):
+    while True:
+        try:
+            message = socket.recv_multipart()
+            routing_id, command, request_id, payload = message
+            payload = payload.decode("utf-8")
+            if command == b"exit":
+                print("claude-tokenizer.py: exiting")
+                break
+            elif command == b"tokenize":                
+                token_count = anthropic.count_tokens(payload)
+                socket.send_multipart([routing_id, request_id, str(token_count).encode("utf-8")])
+            else:
+                print("claude-tokenizer.py: unknown message type")
+        except Exception as e:
+            print(f"claude-tokenizer.py: failed to process message ({e})")
+            break
+
+if __name__ == "__main__":
+    context, socket = create_socket()
+    init(socket)
+    socket.close()
+    context.term()
@@ -0,0 +1 @@
+export { init, countTokens } from "./tokenizer";
@@ -0,0 +1,57 @@
+import { Tiktoken } from "tiktoken/lite";
+import cl100k_base from "tiktoken/encoders/cl100k_base.json";
+
+let encoder: Tiktoken;
+
+export function init() {
+  encoder = new Tiktoken(
+    cl100k_base.bpe_ranks,
+    cl100k_base.special_tokens,
+    cl100k_base.pat_str
+  );
+  return true;
+}
+
+// Implmentation based and tested against:
+// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+
+export function getTokenCount(messages: any[], model: string) {
+  const gpt4 = model.startsWith("gpt-4");
+
+  const tokensPerMessage = gpt4 ? 3 : 4;
+  const tokensPerName = gpt4 ? 1 : -1; // turbo omits role if name is present
+
+  let numTokens = 0;
+
+  for (const message of messages) {
+    numTokens += tokensPerMessage;
+    for (const key of Object.keys(message)) {
+      {
+        const value = message[key];
+        // Break if we get a huge message or exceed the token limit to prevent DoS
+        // 100k tokens allows for future 100k GPT-4 models and 250k characters is
+        // just a sanity check
+        if (value.length > 250000 || numTokens > 100000) {
+          numTokens = 100000;
+          return {
+            tokenizer: "tiktoken (prompt length limit exceeded)",
+            token_count: numTokens,
+          };
+        }
+
+        numTokens += encoder.encode(message[key]).length;
+        if (key === "name") {
+          numTokens += tokensPerName;
+        }
+      }
+    }
+  }
+  numTokens += 3; // every reply is primed with <|start|>assistant<|message|>
+  return { tokenizer: "tiktoken", token_count: numTokens };
+}
+
+export type OpenAIPromptMessage = {
+  name?: string;
+  content: string;
+  role: string;
+};
@@ -0,0 +1,129 @@
+import { Request } from "express";
+import childProcess from "child_process";
+import { config } from "../config";
+import { logger } from "../logger";
+import {
+  init as initIpc,
+  requestTokenCount as requestClaudeTokenCount,
+} from "./claude-ipc";
+import {
+  init as initEncoder,
+  getTokenCount as getOpenAITokenCount,
+  OpenAIPromptMessage,
+} from "./openai";
+
+let canTokenizeClaude = false;
+
+export async function init() {
+  if (config.anthropicKey) {
+    if (!isPythonInstalled()) {
+      const skipWarning = !!process.env.DISABLE_MISSING_PYTHON_WARNING;
+      process.env.MISSING_PYTHON_WARNING = skipWarning ? "" : "true";
+    } else {
+      canTokenizeClaude = await initIpc();
+      if (!canTokenizeClaude) {
+        logger.warn(
+          "Anthropic key is set, but tokenizer is not available. Claude prompts will use a naive estimate for token count."
+        );
+      }
+    }
+  }
+  if (config.openaiKey) {
+    initEncoder();
+  }
+}
+
+type TokenCountResult = {
+  token_count: number;
+  tokenizer: string;
+  tokenization_duration_ms: number;
+};
+type TokenCountRequest = {
+  req: Request;
+} & (
+  | { prompt: string; service: "anthropic" }
+  | { prompt: OpenAIPromptMessage[]; service: "openai" }
+);
+export async function countTokens({
+  req,
+  service,
+  prompt,
+}: TokenCountRequest): Promise<TokenCountResult> {
+  const time = process.hrtime();
+
+  switch (service) {
+    case "anthropic":
+      if (!canTokenizeClaude) {
+        const result = guesstimateTokens(prompt);
+        return {
+          token_count: result,
+          tokenizer: "guesstimate (claude-ipc disabled)",
+          tokenization_duration_ms: getElapsedMs(time),
+        };
+      }
+
+      // If the prompt is absolutely massive (possibly malicious) don't even try
+      if (prompt.length > 500000) {
+        return {
+          token_count: guesstimateTokens(JSON.stringify(prompt)),
+          tokenizer: "guesstimate (prompt too long)",
+          tokenization_duration_ms: getElapsedMs(time),
+        };
+      }
+
+      try {
+        const result = await requestClaudeTokenCount({
+          requestId: String(req.id),
+          prompt,
+        });
+        return {
+          token_count: result,
+          tokenizer: "claude-ipc",
+          tokenization_duration_ms: getElapsedMs(time),
+        };
+      } catch (e: any) {
+        req.log.error("Failed to tokenize with claude_tokenizer", e);
+        const result = guesstimateTokens(prompt);
+        return {
+          token_count: result,
+          tokenizer: `guesstimate (claude-ipc failed: ${e.message})`,
+          tokenization_duration_ms: getElapsedMs(time),
+        };
+      }
+
+    case "openai":
+      const result = getOpenAITokenCount(prompt, req.body.model);
+      return {
+        ...result,
+        tokenization_duration_ms: getElapsedMs(time),
+      };
+    default:
+      throw new Error(`Unknown service: ${service}`);
+  }
+}
+
+function getElapsedMs(time: [number, number]) {
+  const diff = process.hrtime(time);
+  return diff[0] * 1000 + diff[1] / 1e6;
+}
+
+function guesstimateTokens(prompt: string) {
+  // From Anthropic's docs:
+  // The maximum length of prompt that Claude can see is its context window.
+  // Claude's context window is currently ~6500 words / ~8000 tokens /
+  // ~28000 Unicode characters.
+  // This suggests 0.28 tokens per character but in practice this seems to be
+  // a substantial underestimate in some cases.
+  return Math.ceil(prompt.length * 0.325);
+}
+
+function isPythonInstalled() {
+  try {
+    const python = process.platform === "win32" ? "python" : "python3";
+    childProcess.execSync(`${python} --version`, { stdio: "ignore" });
+    return true;
+  } catch (err) {
+    logger.debug({ err: err.message }, "Python not installed.");
+    return false;
+  }
+}
@@ -18,6 +18,9 @@ declare global {
      onAborted?: () => void;
      proceed: () => void;
      heartbeatInterval?: NodeJS.Timeout;
+      promptTokens?: number;
+      // TODO: remove later
+      debug: Record<string, any>;
    }
  }
 }
@@ -9,7 +9,9 @@
    "skipLibCheck": true,
    "skipDefaultLibCheck": true,
    "outDir": "build",
-    "sourceMap": true
+    "sourceMap": true,
+    "resolveJsonModule": true,
+    "useUnknownInCatchVariables": false
  },
  "include": ["src"],
  "exclude": ["node_modules"],
Author	SHA1	Message	Date
nai-degen	858a619ae2	fixes typecheck issue after rebasing	2023-07-19 11:21:53 -05:00
nai-degen	bda3d8e8a7	fixes stupid operator precedence mistake	2023-07-19 11:21:05 -05:00
nai-degen	e2c491f2e2	cleanup	2023-07-19 11:21:05 -05:00
nai-degen	e88e564124	adds working alpine Dockerfile for CI	2023-07-19 11:21:05 -05:00
nai-degen	5eafb6a0b0	tries newer version of zmq again	2023-07-19 11:21:05 -05:00
nai-degen	d979edbc0a	trying to figure out why it's selecting incorrect model	2023-07-19 11:21:05 -05:00
nai-degen	e0fd28bf18	lengthens initial tokenizer timeout	2023-07-19 11:21:05 -05:00
nai-degen	5a2eab4771	fixes python invokation on *nix	2023-07-19 11:21:05 -05:00
nai-degen	367a541c9c	downgrades zmq implementation for v5.x	2023-07-19 11:21:05 -05:00
nai-degen	780defab2f	adds missing python warning to infopage	2023-07-19 11:21:02 -05:00
nai-degen	33cf8f0077	adds python deps install script	2023-07-19 11:20:17 -05:00
nai-degen	e8bf5be77f	updates docs	2023-07-19 11:20:17 -05:00
nai-degen	2f21075d19	downgrades zeromq to stable due to native dep issue	2023-07-19 11:20:15 -05:00
nai-degen	9f93a7a0f6	fixes fucked lockfile	2023-07-19 11:19:04 -05:00
nai-degen	3e56456331	adds forgotten lockfile change	2023-07-19 11:19:04 -05:00
nai-degen	5bf5a7cfa6	downgrades zeromq to avoid broken statically linked native dep	2023-07-19 11:19:04 -05:00
nai-degen	83f16c7ec8	tries to fix huggingface docker build issues	2023-07-19 11:19:04 -05:00
nai-degen	f76e0d5519	tokenizes and validates incoming prompts	2023-07-19 11:19:04 -05:00
nai-degen	c8d74fe8fd	includes tokenizer debug info on responses	2023-07-19 11:19:01 -05:00
nai-degen	4341dc5961	improves OpenAI token counting accuracy	2023-07-19 11:17:56 -05:00
nai-degen	0064fd4f3a	updates docs and README for Claude tokenizer	2023-07-19 11:17:56 -05:00
nai-degen	857760a2df	adds claude tokenizer via janky python ipc	2023-07-19 11:17:56 -05:00
nai-degen	697362381e	adds openai tokenizer	2023-07-19 11:17:56 -05:00
nai-degen	ac8e18a326	adds python dependencies	2023-07-19 11:17:56 -05:00
nai-degen	6422a526a8	uses esbuild for production bundle	2023-07-19 11:17:53 -05:00
nai-degen	e8e1c226d7	adds tiktoken package	2023-07-19 11:14:21 -05:00
Xrystallized	120b7da340	Include non /v1 url in check (khanon/oai-reverse-proxy!27 )	2023-07-19 16:00:29 +00:00
nai-degen	d7a4829d13	handles keys which have been banned (but not revoked) by openai	2023-07-19 10:28:38 -05:00
nai-degen	c749e2d57d	adjusts claude rate limit handling to retry more aggressively	2023-07-19 01:58:44 -05:00
nai-degen	efa1b03570	uses claude-v1 by default as anthropic seems to be turning off v1.2	2023-07-19 01:48:57 -05:00
goanon016	f6f13f7955	Fix cell size error in sheets (khanon/oai-reverse-proxy!26 )	2023-07-16 08:35:42 +00:00
khanon	7478112077	fixes embarrassing auth oversight	2023-07-16 07:31:44 +00:00
nai-degen	aee382c84e	adds claude-2 to supported models	2023-07-11 09:14:46 -05:00
nai-degen	32605fff53	fixes infopage regression when CHECK_KEYS=false	2023-07-08 15:29:43 -05:00
nai-degen	71882b18ae	adds feature to prevent GPT-4 model selection (default off)	2023-07-06 16:09:30 -05:00
nai-degen	561c063d90	assumes keys are GPT-4 by default since it's now GA	2023-07-06 15:07:01 -05:00
nai-degen	2a7efc8d42	cleans up minor pm audit items	2023-07-06 07:26:52 -05:00
				`@@ -0,0 +1 @@`
				`export { init, countTokens } from "./tokenizer";`