46 Commits

Author SHA1 Message Date
nai-degen 858a619ae2 fixes typecheck issue after rebasing 2023-07-19 11:21:53 -05:00
nai-degen bda3d8e8a7 fixes stupid operator precedence mistake 2023-07-19 11:21:05 -05:00
nai-degen e2c491f2e2 cleanup 2023-07-19 11:21:05 -05:00
nai-degen e88e564124 adds working alpine Dockerfile for CI 2023-07-19 11:21:05 -05:00
nai-degen 5eafb6a0b0 tries newer version of zmq again 2023-07-19 11:21:05 -05:00
nai-degen d979edbc0a trying to figure out why it's selecting incorrect model 2023-07-19 11:21:05 -05:00
nai-degen e0fd28bf18 lengthens initial tokenizer timeout 2023-07-19 11:21:05 -05:00
nai-degen 5a2eab4771 fixes python invokation on *nix 2023-07-19 11:21:05 -05:00
nai-degen 367a541c9c downgrades zmq implementation for v5.x 2023-07-19 11:21:05 -05:00
nai-degen 780defab2f adds missing python warning to infopage 2023-07-19 11:21:02 -05:00
nai-degen 33cf8f0077 adds python deps install script 2023-07-19 11:20:17 -05:00
nai-degen e8bf5be77f updates docs 2023-07-19 11:20:17 -05:00
nai-degen 2f21075d19 downgrades zeromq to stable due to native dep issue 2023-07-19 11:20:15 -05:00
nai-degen 9f93a7a0f6 fixes fucked lockfile 2023-07-19 11:19:04 -05:00
nai-degen 3e56456331 adds forgotten lockfile change 2023-07-19 11:19:04 -05:00
nai-degen 5bf5a7cfa6 downgrades zeromq to avoid broken statically linked native dep 2023-07-19 11:19:04 -05:00
nai-degen 83f16c7ec8 tries to fix huggingface docker build issues 2023-07-19 11:19:04 -05:00
nai-degen f76e0d5519 tokenizes and validates incoming prompts 2023-07-19 11:19:04 -05:00
nai-degen c8d74fe8fd includes tokenizer debug info on responses 2023-07-19 11:19:01 -05:00
nai-degen 4341dc5961 improves OpenAI token counting accuracy 2023-07-19 11:17:56 -05:00
nai-degen 0064fd4f3a updates docs and README for Claude tokenizer 2023-07-19 11:17:56 -05:00
nai-degen 857760a2df adds claude tokenizer via janky python ipc 2023-07-19 11:17:56 -05:00
nai-degen 697362381e adds openai tokenizer 2023-07-19 11:17:56 -05:00
nai-degen ac8e18a326 adds python dependencies 2023-07-19 11:17:56 -05:00
nai-degen 6422a526a8 uses esbuild for production bundle 2023-07-19 11:17:53 -05:00
nai-degen e8e1c226d7 adds tiktoken package 2023-07-19 11:14:21 -05:00
Xrystallized 120b7da340 Include non /v1 url in check (khanon/oai-reverse-proxy!27) 2023-07-19 16:00:29 +00:00
nai-degen d7a4829d13 handles keys which have been banned (but not revoked) by openai 2023-07-19 10:28:38 -05:00
nai-degen c749e2d57d adjusts claude rate limit handling to retry more aggressively 2023-07-19 01:58:44 -05:00
nai-degen efa1b03570 uses claude-v1 by default as anthropic seems to be turning off v1.2 2023-07-19 01:48:57 -05:00
goanon016 f6f13f7955 Fix cell size error in sheets (khanon/oai-reverse-proxy!26) 2023-07-16 08:35:42 +00:00
khanon 7478112077 fixes embarrassing auth oversight 2023-07-16 07:31:44 +00:00
nai-degen aee382c84e adds claude-2 to supported models 2023-07-11 09:14:46 -05:00
nai-degen 32605fff53 fixes infopage regression when CHECK_KEYS=false 2023-07-08 15:29:43 -05:00
nai-degen 71882b18ae adds feature to prevent GPT-4 model selection (default off) 2023-07-06 16:09:30 -05:00
nai-degen 561c063d90 assumes keys are GPT-4 by default since it's now GA 2023-07-06 15:07:01 -05:00
nai-degen 2a7efc8d42 cleans up minor
pm audit items
2023-07-06 07:26:52 -05:00
nai-degen 327e860967 fixes wrong/misleading error msg when no Turbo keys available 2023-07-04 11:49:12 -05:00
nai-degen 6598b4df0d requests old version of Anthropic API due to breaking SSE changes 2023-06-24 14:50:48 -05:00
nai-degen 6a7f64b037 adds missed change from origin header adjustment 2023-06-24 14:25:30 -05:00
nai-degen c8b3238398 reorganizes origin header middleware 2023-06-24 14:25:01 -05:00
nai-degen 602931bf7f removes origin/referer headers from proxied request 2023-06-23 00:08:09 -05:00
nai-degen db034a51b3 prevents crash on startup when git is not installed 2023-06-21 01:24:41 -05:00
khanon 43359779e7 Implements more robust anti-zoomer functionality (khanon/oai-reverse-proxy!24) 2023-06-14 04:05:51 +00:00
nai-degen c0ac69df27 adjusts default origin block 2023-06-13 21:18:31 -05:00
nai-degen 3a2a6e96fd adds new OpenAI June 2023 models 2023-06-13 16:24:34 -05:00
43 changed files with 1674 additions and 539 deletions
+1
View File
@@ -1,6 +1,7 @@
.env .env
.venv .venv
.vscode .vscode
.venv
build build
greeting.md greeting.md
node_modules node_modules
+2
View File
@@ -40,3 +40,5 @@ To run the proxy locally for development or testing, install Node.js >= 18.0.0 a
4. Start the server in development mode with `npm run start:dev`. 4. Start the server in development mode with `npm run start:dev`.
You can also use `npm run start:dev:tsc` to enable project-wide type checking at the cost of slower startup times. `npm run type-check` can be used to run type checking without starting the server. You can also use `npm run start:dev:tsc` to enable project-wide type checking at the cost of slower startup times. `npm run type-check` can be used to run type checking without starting the server.
See the [Optional Dependencies](./docs/optional-dependencies.md) page for information on how to install the optional Claude tokenizer locally.
View File
+45
View File
@@ -0,0 +1,45 @@
# Switched to alpine both for smaller image size and because zeromq.js provides
# a working prebuilt binary for alpine. On Debian, the prebuild was not working
# and a bug in libzmq's makefile was causing the build from source to fail.
# https://github.com/zeromq/zeromq.js/issues/529#issuecomment-1370721089
FROM node:18-alpine as builder
# Install general build dependencies
RUN apk add --no-cache autoconf automake g++ libtool zeromq-dev python3 \
py3-pip git curl cmake gcc musl-dev pkgconfig openssl-dev
# Install Rust (required to build huggingface/tokenizers)
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
RUN git clone -b tokenize https://gitgud.io/khanon/oai-reverse-proxy.git /app
WORKDIR /app
RUN npm ci
RUN npm run build && \
npm prune --production
FROM node:18-alpine as runner
RUN apk add --no-cache \
zeromq-dev \
python3
COPY --from=builder /app/build /app/build
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/.venv /app/.venv
COPY --from=builder /app/package.json /app/package.json
WORKDIR /app
RUN . .venv/bin/activate
EXPOSE 7860
ENV NODE_ENV=production
# TODO: stamp with tag and git commit
ENV RENDER=true
ENV RENDER_GIT_COMMIT=ci-test
CMD [ "npm", "start" ]
+4 -3
View File
@@ -1,9 +1,10 @@
FROM node:18-bullseye-slim FROM node:18-bullseye
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y git apt-get install -y git python3 python3-pip libzmq3-dev curl cmake g++ libsodium-dev pkg-config
RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
WORKDIR /app WORKDIR /app
RUN npm install RUN pip3 install --no-cache-dir -r requirements.txt
RUN npm ci --loglevel=verbose
COPY Dockerfile greeting.md* .env* ./ COPY Dockerfile greeting.md* .env* ./
RUN npm run build RUN npm run build
EXPOSE 7860 EXPOSE 7860
+4 -6
View File
@@ -1,9 +1,7 @@
# ⚠️ Warning ⚠️ # Warning
**I strongly suggest against using this feature with a Google account that you care about.** Depending on the content of the prompts people submit (which you obviously have no control over), Google may flag the spreadsheet as containing inappropriate content. If this happens, Google may suspend your ability to share the spreadsheet, block access to Google Sheets, or even suspend your entire Google account (this happened to my throwaway, though it may have been because it was very clearly a throwaway and used a burner SMS number). **I strongly suggest against using this feature with a Google account that you care about.** Depending on the content of the prompts people submit, Google may flag the spreadsheet as containing inappropriate content. This seems to prevent you from sharing that spreadsheet _or any others on the account. This happened with my throwaway account during testing; the existing shared spreadsheet continues to work but even completely new spreadsheets are flagged and cannot be shared.
**Be aware that Google has been known to link accounts through device/browser fingerprinting, so even a VPN may not be sufficient; if you must use this feature, do so entirely from an isolated VM and VPN with no other Google accounts logged in.** I'll be looking into alternative storage backends but you should not use this implementation with a Google account you care about, or even one remotely connected to your main accounts (as Google has a history of linking accounts together via IPs/browser fingerprinting). Use a VPN and completely isolated VM to be safe.
There are now other logging options available, so you should use those instead. I'm leaving this here for posterity, but I will not be providing any support for it.
# Configuring Google Sheets Prompt Logging # Configuring Google Sheets Prompt Logging
This proxy can log incoming prompts and model responses to Google Sheets. Some configuration on the Google side is required to enable this feature. The APIs used are free, but you will need a Google account and a Google Cloud Platform project. This proxy can log incoming prompts and model responses to Google Sheets. Some configuration on the Google side is required to enable this feature. The APIs used are free, but you will need a Google account and a Google Cloud Platform project.
@@ -12,7 +10,7 @@ NOTE: Concurrency is not supported. Don't connect two instances of the server to
## Prerequisites ## Prerequisites
- A Google account - A Google account
- **⚠️ USE A THROWAWAY ACCOUNT!** - **USE A THROWAWAY ACCOUNT!**
- A Google Cloud Platform project - A Google Cloud Platform project
### 0. Create a Google Cloud Platform Project ### 0. Create a Google Cloud Platform Project
+35
View File
@@ -0,0 +1,35 @@
# Optional Dependencies
## Claude tokenizer
As Anthropic does not ship a NodeJS tokenizer, the server includes a small Python script that runs alongside the proxy to tokenize Claude requests. It is automatically started when the server is launched, but requires additional dependencies to be installed. If these dependencies are not installed, the server will not be able to accurately count the number of tokens in Claude requests but will still function normally otherwise.
Note: On Windows, a Windows Firewall prompt may appear when the Claude tokenizer is started. This is normal and is caused by the Python process attempting to open a socket to communicate with the NodeJS server. You can safely allow the connection.
### Automatic installation (local development)
This will create a venv and install the required dependencies. You still need to activate the venv when running the server, and you must have Python >= 3.8.0 installed.
1. Install Python >= 3.8.0
2. Run `npm install`, which should automatically create a venv and install the required dependencies.
3. Activate the virtual environment with `source .venv/bin/activate` (Linux/Mac) or `.\.venv\Scripts\activate` (PowerShell/Windows)
- **This step is required every time you start the server from a new terminal.**
### Manual installation (local development)
1. Install Python >= 3.8.0
2. Create a virtual environment using `python -m .venv venv`
3. Activate the virtual environment with `source .venv/bin/activate` (Linux/Mac) or `.\.venv\Scripts\activate` (PowerShell/Windows)
- **This step is required every time you start the server from a new terminal.**
4. Install dependencies with `pip install -r requirements.txt`
5. Provided you have the virtual environment activated, the server will automatically start the tokenizer when it is launched.
### Docker (production deployment)
Refer to the reference Dockerfiles for examples on how to install the tokenizer. The Huggingface and Render Dockerfiles both include the tokenizer.
Generally, you will need libzmq3-dev, cmake, g++, and Python >= 3.8.0 installed. The postinstall script will automatically install the required Python dependencies.
### Troubleshooting
Ensure that:
- Python >= 3.8 is installed and in your PATH
- Python dependencies are installed (re-run `npm install`)
- Python venv is activated (see above)
- zeromq optional dependency installed successfully
- This should generally be installed automatically.
- On Windows, you may need to install MS C++ Build Tools or set msvs_version (eg `npm config set msvs_version 2019`), then re-run npm install.
- On Linux, ensure you have the appropriate build tools and headers installed for your distribution; refer to the reference Dockerfiles for examples.
-41
View File
@@ -1,41 +0,0 @@
# Prompt Logging
This proxy supports logging incoming prompts and model responses to different destinations. Currently, Airtable and Google Sheets (not recommended) are supported. You can enable prompt logging by setting the `PROMPT_LOGGING` environment variable to `true` and configuring the `PROMPT_LOGGING_BACKEND` environment variable to the desired logging backend.
The included backends are generally designed with the goal of working within the limitations of a service's free tier, such as strict API rate limits or maximum record limits. As a result, they may be a little clunky to use and may not be as performant as a dedicated logging solution, but they should be sufficient for low-volume use cases. You can implement your own backend by exporting a module that implements the `PromptLoggingBackend` interface and wiring it up to `src/prompt-logging/log-queue.ts`.
Refer to the list below for the required configuration for each backend.
## Airtable
1. Create an Airtable.com account
2. Create a Personal Access Token
1. Go to https://airtable.com/create/tokens/new and enter a name for your token
2. Under **Scopes**, click **Add a scope** and assign the following scopes:
- `data.records:read`
- `data.records:write`
- `schema.bases:read`
- `schema.bases:write`
3. Under **Access**, click **Add a base** and assign "All current and future bases in this workspace"
- Create a new workspace for prompt logging if you don't want to give the script access to all your bases
4. Click **Create token**
5. A modal will appear with your token; copy it and set is as the `AIRTABLE_KEY` environment variable
3. Find your workspace ID
- You can find your workspace ID by going to https://airtable.com/workspaces and selecting **View Workspace** on the workspace you want to use
- The ID is the text beginning with `wsp` in the URL, after `airtable.com/workspaces/`
- Set this value as the `AIRTABLE_WORKSPACE_ID` environment variable
4. Set the `PROMPT_LOGGING_BACKEND` environment variable to `airtable`
The proxy will handle creating and migrating bases for you. The following bases will be created in the workspace you select:
- `oai-proxy-index`
- Stores metadata about the proxy and the bases it creates
- `oai-proxy-logs-*`
- Stores prompt logs
- As free bases are limited in size, the proxy will create additional bases as needed
## Google Sheets (deprecated)
**⚠️ This implementation is strongly discouraged** due to the nature of content users may submit, which may be in violation of Google's policies. They seem to analyze the content of API requests and may suspend your account. Don't use this unless you know what you're doing.
Refer to the dedicated [Google Sheets docs](logging-sheets.md) for detailed instructions on how to set up Google Sheets logging.
+47
View File
@@ -0,0 +1,47 @@
const esbuild = require("esbuild");
const fs = require("fs");
const { copy } = require("esbuild-plugin-copy");
const buildDir = "build";
const config = {
entryPoints: ["src/server.ts"],
bundle: true,
outfile: `${buildDir}/server.js`,
platform: "node",
target: "es2020",
format: "cjs",
sourcemap: true,
external: ["fs", "path", "zeromq", "tiktoken"],
plugins: [
copy({
resolveFrom: "cwd",
assets: {
from: ["src/tokenization/*.py"],
to: [`${buildDir}/tokenization`],
},
}),
],
};
function createBundler() {
return {
build: async () => esbuild.build(config),
watch: async () => {
const watchConfig = { ...config, logLevel: "info" };
const ctx = await esbuild.context(watchConfig);
ctx.watch();
},
};
}
(async () => {
fs.rmSync(buildDir, { recursive: true, force: true });
const isDev = process.argv.includes("--dev");
const bundler = createBundler();
if (isDev) {
await bundler.watch();
} else {
await bundler.build();
}
})();
+678 -108
View File
File diff suppressed because it is too large Load Diff
+18 -7
View File
@@ -3,12 +3,13 @@
"version": "1.0.0", "version": "1.0.0",
"description": "Reverse proxy for the OpenAI API", "description": "Reverse proxy for the OpenAI API",
"scripts": { "scripts": {
"build:watch": "esbuild src/server.ts --outfile=build/server.js --platform=node --target=es2020 --format=cjs --bundle --sourcemap --watch", "build:dev": "node esbuild.js --dev",
"build": "tsc", "build": "node esbuild.js",
"start:dev": "concurrently \"npm run build:watch\" \"npm run start:watch\"", "postinstall": "node scripts/install-python-deps.js",
"start:dev:tsc": "nodemon --watch src --exec ts-node --transpile-only src/server.ts", "start:dev:tsc": "nodemon --watch src --exec ts-node src/server.ts",
"start:watch": "nodemon --require source-map-support/register build/server.js", "start:dev": "concurrently \"npm run build:dev\" \"npm run start:watch\"",
"start:replit": "tsc && node build/server.js", "start:replit": "tsc && node build/server.js",
"start:watch": "nodemon --require source-map-support/register build/server.js",
"start": "node build/server.js", "start": "node build/server.js",
"type-check": "tsc --noEmit" "type-check": "tsc --noEmit"
}, },
@@ -18,18 +19,18 @@
"author": "", "author": "",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"airtable": "^0.12.1",
"axios": "^1.3.5", "axios": "^1.3.5",
"cors": "^2.8.5", "cors": "^2.8.5",
"dotenv": "^16.0.3", "dotenv": "^16.0.3",
"express": "^4.18.2", "express": "^4.18.2",
"firebase-admin": "^11.8.0", "firebase-admin": "^11.9.0",
"googleapis": "^117.0.0", "googleapis": "^117.0.0",
"http-proxy-middleware": "^3.0.0-beta.1", "http-proxy-middleware": "^3.0.0-beta.1",
"openai": "^3.2.1", "openai": "^3.2.1",
"pino": "^8.11.0", "pino": "^8.11.0",
"pino-http": "^8.3.3", "pino-http": "^8.3.3",
"showdown": "^2.1.0", "showdown": "^2.1.0",
"tiktoken": "^1.0.7",
"uuid": "^9.0.0", "uuid": "^9.0.0",
"zlib": "^1.0.5", "zlib": "^1.0.5",
"zod": "^3.21.4" "zod": "^3.21.4"
@@ -39,12 +40,22 @@
"@types/express": "^4.17.17", "@types/express": "^4.17.17",
"@types/showdown": "^2.0.0", "@types/showdown": "^2.0.0",
"@types/uuid": "^9.0.1", "@types/uuid": "^9.0.1",
"@types/zeromq": "^5.2.2",
"concurrently": "^8.0.1", "concurrently": "^8.0.1",
"esbuild": "^0.17.16", "esbuild": "^0.17.16",
"esbuild-node-externals": "^1.7.0",
"esbuild-plugin-copy": "^2.1.1",
"esbuild-register": "^3.4.2", "esbuild-register": "^3.4.2",
"nodemon": "^2.0.22", "nodemon": "^2.0.22",
"source-map-support": "^0.5.21", "source-map-support": "^0.5.21",
"ts-node": "^10.9.1", "ts-node": "^10.9.1",
"typescript": "^5.0.4" "typescript": "^5.0.4"
},
"overrides": {
"optionator": "^0.9.3",
"semver": "^7.5.3"
},
"optionalDependencies": {
"zeromq": "^6.0.0-beta.16"
} }
} }
+2
View File
@@ -0,0 +1,2 @@
pyzmq==25.1.0
anthropic==0.2.9
+68
View File
@@ -0,0 +1,68 @@
const fs = require("fs");
const spawn = require("child_process").spawn;
const IS_WINDOWS = process.platform === "win32";
const IS_DEV = process.env.NODE_ENV !== "production";
const installDeps = async () => {
try {
console.log("Installing additional optional dependencies...");
console.log("Creating venv...");
await maybeCreateVenv();
console.log("Installing python dependencies...");
await installPythonDependencies();
} catch (error) {
console.error("Error installing additional optional dependencies", error);
process.exit(0); // don't fail the build
}
};
installDeps();
async function maybeCreateVenv() {
if (!IS_DEV) {
console.log("Skipping venv creation in production");
return true;
}
if (fs.existsSync(".venv")) {
console.log("Skipping venv creation, already exists");
return true;
}
const python = IS_WINDOWS ? "python" : "python3";
await runCommand(`${python} -m venv .venv`);
return true;
}
async function installPythonDependencies() {
const commands = [];
if (IS_DEV) {
commands.push(
IS_WINDOWS ? ".venv\\Scripts\\activate.bat" : "source .venv/bin/activate"
);
}
const pip = IS_WINDOWS ? "pip" : "pip3";
commands.push(`${pip} install -r requirements.txt`);
const command = commands.join(" && ");
await runCommand(command);
return true;
}
async function runCommand(command) {
return new Promise((resolve, reject) => {
const child = spawn(command, [], { shell: true });
child.stdout.on("data", (data) => {
console.log(data.toString());
});
child.stderr.on("data", (data) => {
console.error(data.toString());
});
child.on("close", (code) => {
if (code === 0) {
resolve();
} else {
reject();
}
});
});
}
+11 -20
View File
@@ -8,6 +8,7 @@ const startupLogger = pino({ level: "debug" }).child({ module: "startup" });
const isDev = process.env.NODE_ENV !== "production"; const isDev = process.env.NODE_ENV !== "production";
type PromptLoggingBackend = "google_sheets";
export type DequeueMode = "fair" | "random" | "none"; export type DequeueMode = "fair" | "random" | "none";
type Config = { type Config = {
@@ -74,22 +75,12 @@ type Config = {
logLevel?: "debug" | "info" | "warn" | "error"; logLevel?: "debug" | "info" | "warn" | "error";
/** Whether prompts and responses should be logged to persistent storage. */ /** Whether prompts and responses should be logged to persistent storage. */
promptLogging?: boolean; promptLogging?: boolean;
/** Which prompt logging backend to use. /** Which prompt logging backend to use. */
* promptLoggingBackend?: PromptLoggingBackend;
* `google_sheets`: Logs prompts and responses to a Google Sheets spreadsheet.
* This method is no longer recommended; see docs for more info.
*
* `airtable`: Logs prompts and responses to an Airtable table.
*/
promptLoggingBackend?: "google_sheets" | "airtable";
/** Base64-encoded Google Sheets API key. */ /** Base64-encoded Google Sheets API key. */
googleSheetsKey?: string; googleSheetsKey?: string;
/** Google Sheets spreadsheet ID. */ /** Google Sheets spreadsheet ID. */
googleSheetsSpreadsheetId?: string; googleSheetsSpreadsheetId?: string;
/** Airtable personal access token. */
airtableKey?: string;
/** Airtable workspace ID, under which bases will be automatically created. */
airtableWorkspaceId?: string;
/** Whether to periodically check keys for usage and validity. */ /** Whether to periodically check keys for usage and validity. */
checkKeys?: boolean; checkKeys?: boolean;
/** /**
@@ -128,6 +119,11 @@ type Config = {
* Desination URL to redirect blocked requests to, for non-JSON requests. * Desination URL to redirect blocked requests to, for non-JSON requests.
*/ */
blockRedirect?: string; blockRedirect?: string;
/**
* Whether the proxy should disallow requests for GPT-4 models in order to
* prevent excessive spend. Applies only to OpenAI.
*/
turboOnly?: boolean;
}; };
// To change configs, create a file called .env in the root directory. // To change configs, create a file called .env in the root directory.
@@ -159,8 +155,6 @@ export const config: Config = {
quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"), quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"),
promptLogging: getEnvWithDefault("PROMPT_LOGGING", false), promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined), promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
airtableKey: getEnvWithDefault("AIRTABLE_KEY", undefined),
airtableWorkspaceId: getEnvWithDefault("AIRTABLE_WORKSPACE_ID", undefined),
googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined), googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),
googleSheetsSpreadsheetId: getEnvWithDefault( googleSheetsSpreadsheetId: getEnvWithDefault(
"GOOGLE_SHEETS_SPREADSHEET_ID", "GOOGLE_SHEETS_SPREADSHEET_ID",
@@ -173,6 +167,7 @@ export const config: Config = {
"You must be over the age of majority in your country to use this service." "You must be over the age of majority in your country to use this service."
), ),
blockRedirect: getEnvWithDefault("BLOCK_REDIRECT", "https://www.9gag.com"), blockRedirect: getEnvWithDefault("BLOCK_REDIRECT", "https://www.9gag.com"),
turboOnly: getEnvWithDefault("TURBO_ONLY", false),
} as const; } as const;
function migrateConfigs() { function migrateConfigs() {
@@ -245,7 +240,7 @@ export async function assertConfigIsValid() {
// Ensure forks which add new secret-like config keys don't unwittingly expose // Ensure forks which add new secret-like config keys don't unwittingly expose
// them to users. // them to users.
for (const key of getKeys(config)) { for (const key of getKeys(config)) {
const maybeSensitive = ["key", "credential", "secret", "password"].some( const maybeSensitive = ["key", "credentials", "secret", "password"].some(
(sensitive) => key.toLowerCase().includes(sensitive) (sensitive) => key.toLowerCase().includes(sensitive)
); );
const secured = new Set([...SENSITIVE_KEYS, ...OMITTED_KEYS]); const secured = new Set([...SENSITIVE_KEYS, ...OMITTED_KEYS]);
@@ -262,10 +257,7 @@ export async function assertConfigIsValid() {
* Config keys that are masked on the info page, but not hidden as their * Config keys that are masked on the info page, but not hidden as their
* presence may be relevant to the user due to privacy implications. * presence may be relevant to the user due to privacy implications.
*/ */
export const SENSITIVE_KEYS: (keyof Config)[] = [ export const SENSITIVE_KEYS: (keyof Config)[] = ["googleSheetsSpreadsheetId"];
"googleSheetsSpreadsheetId",
"airtableWorkspaceId",
];
/** /**
* Config keys that are not displayed on the info page at all, generally because * Config keys that are not displayed on the info page at all, generally because
@@ -281,7 +273,6 @@ export const OMITTED_KEYS: (keyof Config)[] = [
"checkKeys", "checkKeys",
"quotaDisplayMode", "quotaDisplayMode",
"googleSheetsKey", "googleSheetsKey",
"airtableKey",
"firebaseKey", "firebaseKey",
"firebaseRtdbUrl", "firebaseRtdbUrl",
"gatekeeperStore", "gatekeeperStore",
+17 -4
View File
@@ -52,7 +52,17 @@ function cacheInfoPageHtml(baseUrl: string) {
}; };
const title = getServerTitle(); const title = getServerTitle();
const headerHtml = buildInfoPageHeader(new showdown.Converter(), title); let headerHtml = buildInfoPageHeader(new showdown.Converter(), title);
if (process.env.MISSING_PYTHON_WARNING) {
headerHtml +=
`<p style="color: red;">Python is not installed; the Claude tokenizer ` +
`cannot start. Your Dockerfile may be out of date; see <a ` +
`href="https://gitgud.io/khanon/oai-reverse-proxy">the docs</a> for an ` +
`updated Huggingface Dockerfile.</p><p>You can disable this warning by ` +
`setting <code>DISABLE_MISSING_PYTHON_WARNING=true</code> in your ` +
`environment.</p>`;
}
const pageBody = `<!DOCTYPE html> const pageBody = `<!DOCTYPE html>
<html lang="en"> <html lang="en">
@@ -89,7 +99,7 @@ type ServiceInfo = {
function getOpenAIInfo() { function getOpenAIInfo() {
const info: { [model: string]: Partial<ServiceInfo> } = {}; const info: { [model: string]: Partial<ServiceInfo> } = {};
const keys = keyPool.list().filter((k) => k.service === "openai"); const keys = keyPool.list().filter((k) => k.service === "openai");
const hasGpt4 = keys.some((k) => k.isGpt4); const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;
if (keyPool.anyUnchecked()) { if (keyPool.anyUnchecked()) {
const uncheckedKeys = keys.filter((k) => !k.lastChecked); const uncheckedKeys = keys.filter((k) => !k.lastChecked);
@@ -137,6 +147,9 @@ function getOpenAIInfo() {
} else { } else {
info.status = "Key checking is disabled." as any; info.status = "Key checking is disabled." as any;
info.turbo = { activeKeys: keys.filter((k) => !k.isDisabled).length }; info.turbo = { activeKeys: keys.filter((k) => !k.isDisabled).length };
info.gpt4 = {
activeKeys: keys.filter((k) => !k.isDisabled && k.isGpt4).length,
};
} }
if (config.queueMode !== "none") { if (config.queueMode !== "none") {
@@ -190,14 +203,14 @@ Logs are anonymous and do not contain IP addresses or timestamps. [You can see t
} }
if (config.queueMode !== "none") { if (config.queueMode !== "none") {
const waits = []; const waits: string[] = [];
infoBody += `\n## Estimated Wait Times\nIf the AI is busy, your prompt will processed when a slot frees up.`; infoBody += `\n## Estimated Wait Times\nIf the AI is busy, your prompt will processed when a slot frees up.`;
if (config.openaiKey) { if (config.openaiKey) {
const turboWait = getQueueInformation("turbo").estimatedQueueTime; const turboWait = getQueueInformation("turbo").estimatedQueueTime;
const gpt4Wait = getQueueInformation("gpt-4").estimatedQueueTime; const gpt4Wait = getQueueInformation("gpt-4").estimatedQueueTime;
waits.push(`**Turbo:** ${turboWait}`); waits.push(`**Turbo:** ${turboWait}`);
if (keyPool.list().some((k) => k.isGpt4)) { if (keyPool.list().some((k) => k.isGpt4) && !config.turboOnly) {
waits.push(`**GPT-4:** ${gpt4Wait}`); waits.push(`**GPT-4:** ${gpt4Wait}`);
} }
} }
+15 -13
View File
@@ -3,11 +3,13 @@ import { Key, KeyProvider } from "..";
import { config } from "../../config"; import { config } from "../../config";
import { logger } from "../../logger"; import { logger } from "../../logger";
// https://docs.anthropic.com/claude/reference/selecting-a-model
export const ANTHROPIC_SUPPORTED_MODELS = [ export const ANTHROPIC_SUPPORTED_MODELS = [
"claude-instant-v1", "claude-instant-v1",
"claude-instant-v1-100k", "claude-instant-v1-100k",
"claude-v1", "claude-v1",
"claude-v1-100k", "claude-v1-100k",
"claude-2",
] as const; ] as const;
export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number]; export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];
@@ -38,10 +40,16 @@ export interface AnthropicKey extends Key {
} }
/** /**
* We don't get rate limit headers from Anthropic so if we get a 429, we just * Upon being rate limited, a key will be locked out for this many milliseconds
* lock out the key for a few seconds * while we wait for other concurrent requests to finish.
*/ */
const RATE_LIMIT_LOCKOUT = 5000; const RATE_LIMIT_LOCKOUT = 2000;
/**
* Upon assigning a key, we will wait this many milliseconds before allowing it
* to be used again. This is to prevent the queue from flooding a key with too
* many requests while we wait to learn whether previous ones succeeded.
*/
const KEY_REUSE_DELAY = 500;
export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> { export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
readonly service = "anthropic"; readonly service = "anthropic";
@@ -127,7 +135,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
// Intended to throttle the queue processor as otherwise it will just // Intended to throttle the queue processor as otherwise it will just
// flood the API with requests and we want to wait a sec to see if we're // flood the API with requests and we want to wait a sec to see if we're
// going to get a rate limit error on this key. // going to get a rate limit error on this key.
selectedKey.rateLimitedUntil = now + 1000; selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
return { ...selectedKey }; return { ...selectedKey };
} }
@@ -181,15 +189,9 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
/** /**
* This is called when we receive a 429, which means there are already five * This is called when we receive a 429, which means there are already five
* concurrent requests running on this key. We don't have any information on * concurrent requests running on this key. We don't have any information on
* when these requests will resolve so all we can do is wait a bit and try * when these requests will resolve, so all we can do is wait a bit and try
* again. * again. We will lock the key for 2 seconds after getting a 429 before
* We will lock the key for 10 seconds, which should let a few of the other * retrying in order to give the other requests a chance to finish.
* generations finish. This is an arbitrary number but the goal is to balance
* between not hammering the API with requests and not locking out a key that
* is actually available.
* TODO; Try to assign requests to slots on each key so we have an idea of how
* long each slot has been running and can make a more informed decision on
* how long to lock the key.
*/ */
public markRateLimited(keyHash: string) { public markRateLimited(keyHash: string) {
this.log.warn({ key: keyHash }, "Key rate limited"); this.log.warn({ key: keyHash }, "Key rate limited");
+7
View File
@@ -221,6 +221,13 @@ export class OpenAIKeyChecker {
"Key is out of quota. Disabling key." "Key is out of quota. Disabling key."
); );
this.updateKey(key.hash, { isDisabled: true }); this.updateKey(key.hash, { isDisabled: true });
}
else if (status === 429 && data.error.type === "access_terminated") {
this.log.warn(
{ key: key.hash, isTrial: key.isTrial, error: data },
"Key has been terminated due to policy violations. Disabling key."
);
this.updateKey(key.hash, { isDisabled: true });
} else { } else {
this.log.error( this.log.error(
{ key: key.hash, status, error: data }, { key: key.hash, status, error: data },
+9 -3
View File
@@ -77,7 +77,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
const newKey = { const newKey = {
key: k, key: k,
service: "openai" as const, service: "openai" as const,
isGpt4: false, isGpt4: true,
isTrial: false, isTrial: false,
isDisabled: false, isDisabled: false,
softLimit: 0, softLimit: 0,
@@ -128,11 +128,17 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
); );
if (availableKeys.length === 0) { if (availableKeys.length === 0) {
let message = needGpt4 let message = needGpt4
? "No active OpenAI keys available." ? "No GPT-4 keys available. Try selecting a Turbo model."
: "No GPT-4 keys available. Try selecting a non-GPT-4 model."; : "No active OpenAI keys available.";
throw new Error(message); throw new Error(message);
} }
if (needGpt4 && config.turboOnly) {
throw new Error(
"Proxy operator has disabled GPT-4 to reduce quota usage. Try selecting a Turbo model."
);
}
// Select a key, from highest priority to lowest priority: // Select a key, from highest priority to lowest priority:
// 1. Keys which are not rate limited // 1. Keys which are not rate limited
// a. We ignore rate limits from over a minute ago // a. We ignore rate limits from over a minute ago
-226
View File
@@ -1,226 +0,0 @@
import Airtable from "airtable";
import axios, { AxiosError } from "axios";
import { config } from "../../config";
import { logger } from "../../logger";
import { PromptLogBackend, PromptLogEntry } from "..";
type AirbaseFieldType =
| "singleLineText"
| "multilineText"
| "number"
| "dateTime";
type IndexRecord = {
/** Name of the base */
id: string;
/** Schema version of the base */
schema: 1;
/** Last row number used */
lastRow: number;
/** When the base was created. ISO 8601 format. */
created: string;
};
const INDEX_BASE_NAME = "oai-proxy-index";
export class AirtableBackend implements PromptLogBackend {
private log = logger.child({ module: "airtable" });
private airtable: Airtable;
private indexBase: Airtable.Base | null = null;
private indexTable: Airtable.Table<IndexRecord> | null = null;
private activeLogBase: Airtable.Base | null = null;
private activeLogTable: Airtable.Table<PromptLogEntry> | null = null;
constructor() {
this.airtable = new Airtable({
apiKey: config.airtableKey,
requestTimeout: 1000 * 60 * 1,
});
}
async init() {
this.log.info("Initializing Airtable backend...");
await this.ensureIndexBase();
await this.ensureLogBase();
}
private async ensureIndexBase() {
const bases = await this.listBases();
const indexBaseId = bases.find((b) => b.name === INDEX_BASE_NAME)?.id;
if (!indexBaseId) {
this.log.info("Creating index base.");
const result = await this.createBase(INDEX_BASE_NAME, [
{ name: "id", type: "singleLineText" },
{ name: "schema", type: "number" },
{ name: "lastRow", type: "number" },
{ name: "created", type: "dateTime" },
]);
this.log.info("Index base created.");
this.indexBase = this.airtable.base(result);
this.indexTable = this.indexBase.table<IndexRecord>(INDEX_BASE_NAME);
} else {
this.log.info("Index base already exists.");
this.indexBase = this.airtable.base(indexBaseId);
this.indexTable = this.indexBase.table<IndexRecord>(INDEX_BASE_NAME);
}
}
/**
* Sets the active log base to the newest one in the index, unless there are
* no bases or the newest one is already full. Creates a new base if needed.
*/
private async ensureLogBase() {
const indexRecords = await this.indexTable!.select().all();
if (indexRecords.length === 0) {
this.log.info("No log bases found, creating a new one.");
await this.createLogBase();
} else {
const newestBase = indexRecords.reduce((a, b) => {
const aDate = new Date(a.get("created"));
const bDate = new Date(b.get("created"));
return aDate > bDate ? a : b;
});
const lastRow = newestBase.get("lastRow");
if (lastRow >= 1000) {
this.log.info(
{ lastRow },
"Last log base is full, creating a new one."
);
await this.createLogBase();
} else if (this.activeLogBase === null) {
const newestBaseId = newestBase.get("id");
this.log.info(
{ activeLogBase: newestBaseId },
"Setting active log base."
);
this.activeLogBase = this.airtable.base(newestBaseId);
this.activeLogTable =
this.activeLogBase.table<PromptLogEntry>(newestBaseId);
} else {
this.log.debug("Active log base already set.");
}
}
}
private async createLogBase() {
const indexRecords = await this.indexTable!.select().all();
const baseCount = indexRecords.length;
const baseName = `oai-proxy-log-${baseCount.toString().padStart(3, "0")}`;
this.log.info({ baseName }, "Creating new log base.");
const newBaseId = await this.createBase(baseName, [
{ name: "model", type: "singleLineText" },
{ name: "endpoint", type: "singleLineText" },
{ name: "promptRaw", type: "multilineText" },
{ name: "prompt", type: "multilineText" },
{ name: "response", type: "multilineText" },
]);
this.activeLogBase = this.airtable.base(newBaseId);
this.activeLogTable = this.activeLogBase.table<PromptLogEntry>(baseName);
this.log.info({ baseName }, "New log base created and activated.");
await this.indexTable!.create([
{
fields: {
id: newBaseId,
schema: 1,
lastRow: 0,
created: new Date().toISOString(),
},
},
]);
this.log.info({ baseName }, "New log base added to index.");
}
/**
* Appends a batch of entries to the log and updates the index. If the log
* has reached its maximum size, a new log base will be created.
*/
async appendBatch(entries: PromptLogEntry[]) {
if (!this.activeLogBase || !this.activeLogTable) {
throw new Error("No active log base.");
}
// Airtable can only create 10 rows at a time, so we have to chunk it.
const chunkSize = 10;
const chunks = [];
for (let i = 0; i < entries.length; i += chunkSize) {
chunks.push(entries.slice(i, i + chunkSize));
}
this.log.info(
{ batchSize: entries.length, chunks: chunks.length },
"Appending batch of log entries."
);
for (const chunk of chunks) {
const records = chunk.map((entry) => ({
fields: {
model: entry.model,
endpoint: entry.endpoint,
promptRaw: entry.promptRaw,
prompt: entry.promptFlattened,
response: entry.response,
},
}));
await this.activeLogTable.create(records);
this.log.info(
{ count: records.length },
"Submitted chunk of log entries."
);
}
await this.syncIndex();
await this.ensureLogBase();
}
async syncIndex() {
if (!this.activeLogBase || !this.activeLogTable) {
throw new Error("No active log base.");
}
const logRecords = await this.activeLogTable.select().all();
const logCount = logRecords.length;
// Update the index with the new row count, by the active log base ID.
const indexRecords = await this.indexTable!.select({
filterByFormula: `{id} = "${this.activeLogBase.getId()}"`,
}).all();
if (indexRecords.length !== 1) {
throw new Error("Index record not found.");
}
const indexRecord = indexRecords[0];
await this.indexTable!.update([
{ id: indexRecord.id, fields: { lastRow: logCount } },
]);
}
// The airtable library doesn't support meta operations like listing or
// creating bases, so we have to do that ourselves.
/**
* Lists all bases in the workspace.
* @returns Array of base objects with `id` and `name` properties.
*/
private async listBases(): Promise<{ id: string; name: string }[]> {
// Maximum page size is 1000 but I'm not going to bother with that for now.
const url = `https://api.airtable.com/v0/meta/bases`;
const response = await axios.get(url, {
headers: { Authorization: `Bearer ${config.airtableKey}` },
});
return response.data.bases;
}
/**
* Creates a new base with the given name and table schema. Table will be
* created with the same name as the base.
* Schema is a list of fields, each of which has a name and type. Only a
* subset of field types are supported.
* Returns the id of the new base.
*/
private async createBase(
name: string,
fields: { name: string; type: AirbaseFieldType }[]
) {
const url = `https://api.airtable.com/v0/meta/bases`;
const response = await axios.post(
url,
{ name, tables: [{ name, fields }] },
{ headers: { Authorization: `Bearer ${config.airtableKey}` } }
);
return response.data.id;
}
}
+1 -19
View File
@@ -1,19 +1 @@
import { config } from "../../config"; export * as sheets from "./sheets";
import { PromptLogBackend } from "..";
import { AirtableBackend } from "./airtable";
import { sheets } from "./sheets";
export const createPromptLogBackend = (
backend: NonNullable<typeof config.promptLoggingBackend>
): PromptLogBackend => {
switch (backend) {
case "google_sheets":
// Sheets backend is just a module, though it has a bunch of state so it
// should probably be a class just like the Airtable backend.
return sheets;
case "airtable":
return new AirtableBackend();
default:
throw new Error(`Unknown log backend: ${backend}`);
}
};
+39 -8
View File
@@ -10,7 +10,7 @@ import type { CredentialBody } from "google-auth-library";
import type { GaxiosResponse } from "googleapis-common"; import type { GaxiosResponse } from "googleapis-common";
import { config } from "../../config"; import { config } from "../../config";
import { logger } from "../../logger"; import { logger } from "../../logger";
import { PromptLogBackend, PromptLogEntry } from ".."; import { PromptLogEntry } from "..";
// There is always a sheet called __index__ which contains a list of all the // There is always a sheet called __index__ which contains a list of all the
// other sheets. We use this rather than iterating over all the sheets in case // other sheets. We use this rather than iterating over all the sheets in case
@@ -240,7 +240,7 @@ const createLogSheet = async () => {
activeLogSheet = { sheetName, rows: [] }; activeLogSheet = { sheetName, rows: [] };
}; };
const appendBatch = async (batch: PromptLogEntry[]) => { export const appendBatch = async (batch: PromptLogEntry[]) => {
if (!activeLogSheet) { if (!activeLogSheet) {
// Create a new log sheet if we don't have one yet. // Create a new log sheet if we don't have one yet.
await createLogSheet(); await createLogSheet();
@@ -256,9 +256,9 @@ const appendBatch = async (batch: PromptLogEntry[]) => {
return [ return [
entry.model, entry.model,
entry.endpoint, entry.endpoint,
entry.promptRaw, entry.promptRaw.slice(0, 50000),
entry.promptFlattened, entry.promptFlattened.slice(0, 50000),
entry.response, entry.response.slice(0, 50000),
]; ];
}); });
log.info({ sheetName, rowCount: newRows.length }, "Appending log batch."); log.info({ sheetName, rowCount: newRows.length }, "Appending log batch.");
@@ -310,7 +310,40 @@ const finalizeBatch = async () => {
log.info({ sheetName, rowCount }, "Batch finalized."); log.info({ sheetName, rowCount }, "Batch finalized.");
}; };
const init = async (onStop: () => void) => { type LoadLogSheetArgs = {
sheetName: string;
/** The starting row to load. If omitted, loads all rows (expensive). */
fromRow?: number;
};
/** Not currently used. */
export const loadLogSheet = async ({
sheetName,
fromRow = 2, // omit header row
}: LoadLogSheetArgs) => {
const client = sheetsClient!;
const spreadsheetId = config.googleSheetsSpreadsheetId!;
const range = `${sheetName}!A${fromRow}:E`;
const res = await client.spreadsheets.values.get({
spreadsheetId: spreadsheetId,
range,
});
const data = assertData(res);
const values = data.values || [];
const rows = values.slice(1).map((row) => {
return {
model: row[0],
endpoint: row[1],
promptRaw: row[2],
promptFlattened: row[3],
response: row[4],
};
});
activeLogSheet = { sheetName, rows };
};
export const init = async (onStop: () => void) => {
if (sheetsClient) { if (sheetsClient) {
return; return;
} }
@@ -387,5 +420,3 @@ function assertData<T = sheets_v4.Schema$ValueRange>(res: GaxiosResponse<T>) {
} }
return res.data!; return res.data!;
} }
export const sheets = { init, appendBatch };
+1 -6
View File
@@ -6,7 +6,7 @@ database for now.
Due to the limitations of Google Sheets, we'll queue up log entries and flush Due to the limitations of Google Sheets, we'll queue up log entries and flush
them to the API periodically. */ them to the API periodically. */
export type PromptLogEntry = { export interface PromptLogEntry {
model: string; model: string;
endpoint: string; endpoint: string;
/** JSON prompt passed to the model */ /** JSON prompt passed to the model */
@@ -15,11 +15,6 @@ export type PromptLogEntry = {
promptFlattened: string; promptFlattened: string;
response: string; response: string;
// TODO: temperature, top_p, top_k, etc. // TODO: temperature, top_p, top_k, etc.
};
export interface PromptLogBackend {
init(onStop: () => void): Promise<void>;
appendBatch(entries: PromptLogEntry[]): Promise<void>;
} }
export * as logQueue from "./log-queue"; export * as logQueue from "./log-queue";
+5 -19
View File
@@ -1,9 +1,9 @@
/* Queues incoming prompts/responses and periodically flushes them to configured /* Queues incoming prompts/responses and periodically flushes them to configured
* logging backend. */ * logging backend. */
import { config } from "../config";
import { logger } from "../logger"; import { logger } from "../logger";
import { PromptLogBackend, PromptLogEntry } from "."; import { PromptLogEntry } from ".";
import { createPromptLogBackend } from "./backends"; import { sheets } from "./backends";
const FLUSH_INTERVAL = 1000 * 10; const FLUSH_INTERVAL = 1000 * 10;
const MAX_BATCH_SIZE = 25; const MAX_BATCH_SIZE = 25;
@@ -11,19 +11,11 @@ const MAX_BATCH_SIZE = 25;
const queue: PromptLogEntry[] = []; const queue: PromptLogEntry[] = [];
const log = logger.child({ module: "log-queue" }); const log = logger.child({ module: "log-queue" });
let activeBackend: PromptLogBackend | null = null;
let started = false; let started = false;
let timeoutId: NodeJS.Timeout | null = null; let timeoutId: NodeJS.Timeout | null = null;
let retrying = false; let retrying = false;
let consecutiveFailedBatches = 0; let consecutiveFailedBatches = 0;
const getBackend = () => {
if (!activeBackend) {
throw new Error("Log queue not initialized.");
}
return activeBackend;
};
export const enqueue = (payload: PromptLogEntry) => { export const enqueue = (payload: PromptLogEntry) => {
if (!started) { if (!started) {
log.warn("Log queue not started, discarding incoming log entry."); log.warn("Log queue not started, discarding incoming log entry.");
@@ -42,7 +34,7 @@ export const flush = async () => {
const nextBatch = queue.splice(0, batchSize); const nextBatch = queue.splice(0, batchSize);
log.info({ size: nextBatch.length }, "Submitting new batch."); log.info({ size: nextBatch.length }, "Submitting new batch.");
try { try {
await getBackend().appendBatch(nextBatch); await sheets.appendBatch(nextBatch);
retrying = false; retrying = false;
consecutiveFailedBatches = 0; consecutiveFailedBatches = 0;
} catch (e: any) { } catch (e: any) {
@@ -73,13 +65,7 @@ export const flush = async () => {
export const start = async () => { export const start = async () => {
try { try {
const selectedBackend = config.promptLoggingBackend; await sheets.init(() => stop());
if (!selectedBackend) {
throw new Error("No logging backend configured.");
}
activeBackend = createPromptLogBackend(selectedBackend);
await getBackend().init(() => stop());
log.info("Logging backend initialized."); log.info("Logging backend initialized.");
started = true; started = true;
} catch (e) { } catch (e) {
+13 -1
View File
@@ -9,10 +9,12 @@ import { handleProxyError } from "./middleware/common";
import { import {
addKey, addKey,
addAnthropicPreamble, addAnthropicPreamble,
blockZoomerOrigins,
createPreprocessorMiddleware, createPreprocessorMiddleware,
finalizeBody, finalizeBody,
languageFilter, languageFilter,
limitOutputTokens, limitOutputTokens,
removeOriginHeaders,
} from "./middleware/request"; } from "./middleware/request";
import { import {
ProxyResHandlerWithBody, ProxyResHandlerWithBody,
@@ -41,6 +43,8 @@ const getModelsResponse = () => {
"claude-instant-v1.1", "claude-instant-v1.1",
"claude-instant-v1.1-100k", "claude-instant-v1.1-100k",
"claude-instant-v1.0", "claude-instant-v1.0",
"claude-2", // claude-2 is 100k by default it seems
"claude-2.0",
]; ];
const models = claudeVariants.map((id) => ({ const models = claudeVariants.map((id) => ({
@@ -73,6 +77,8 @@ const rewriteAnthropicRequest = (
addAnthropicPreamble, addAnthropicPreamble,
languageFilter, languageFilter,
limitOutputTokens, limitOutputTokens,
blockZoomerOrigins,
removeOriginHeaders,
finalizeBody, finalizeBody,
]; ];
@@ -102,10 +108,16 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`; body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`;
} }
if (!req.originalUrl.includes("/v1/complete")) { if (req.inboundApi === "openai") {
req.log.info("Transforming Anthropic response to OpenAI format"); req.log.info("Transforming Anthropic response to OpenAI format");
body = transformAnthropicResponse(body); body = transformAnthropicResponse(body);
} }
// TODO: Remove once tokenization is stable
if (req.debug) {
body.proxy_tokenizer_debug_info = req.debug;
}
res.status(200).json(body); res.status(200).json(body);
}; };
+1 -1
View File
@@ -33,7 +33,7 @@ export const gatekeeper: RequestHandler = (req, res, next) => {
// TODO: Generate anonymous users based on IP address for public or proxy_key // TODO: Generate anonymous users based on IP address for public or proxy_key
// modes so that all middleware can assume a user of some sort is present. // modes so that all middleware can assume a user of some sort is present.
if (token === ADMIN_KEY) { if (ADMIN_KEY && token === ADMIN_KEY) {
return next(); return next();
} }
+30 -6
View File
@@ -2,7 +2,6 @@ import { Request, Response } from "express";
import httpProxy from "http-proxy"; import httpProxy from "http-proxy";
import { ZodError } from "zod"; import { ZodError } from "zod";
const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions"; const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete"; const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
@@ -32,15 +31,23 @@ export function writeErrorResponse(
res.headersSent || res.headersSent ||
res.getHeader("content-type") === "text/event-stream" res.getHeader("content-type") === "text/event-stream"
) { ) {
const errorContent =
statusCode === 403
? JSON.stringify(errorPayload)
: JSON.stringify(errorPayload, null, 2);
const msg = buildFakeSseMessage( const msg = buildFakeSseMessage(
`${errorSource} error (${statusCode})`, `${errorSource} error (${statusCode})`,
JSON.stringify(errorPayload, null, 2), errorContent,
req req
); );
res.write(msg); res.write(msg);
res.write(`data: [DONE]\n\n`); res.write(`data: [DONE]\n\n`);
res.end(); res.end();
} else { } else {
if (req.debug) {
errorPayload.error.proxy_tokenizer_debug_info = req.debug;
}
res.status(statusCode).json(errorPayload); res.status(statusCode).json(errorPayload);
} }
} }
@@ -53,10 +60,12 @@ export const handleProxyError: httpProxy.ErrorCallback = (err, req, res) => {
export const handleInternalError = ( export const handleInternalError = (
err: Error, err: Error,
req: Request, req: Request,
res: Response res: Response,
errorType: string = "proxy_internal_error"
) => { ) => {
try { try {
const isZod = err instanceof ZodError; const isZod = err instanceof ZodError;
const isForbidden = err.name === "ForbiddenError";
if (isZod) { if (isZod) {
writeErrorResponse(req, res, 400, { writeErrorResponse(req, res, 400, {
error: { error: {
@@ -67,10 +76,21 @@ export const handleInternalError = (
message: err.message, message: err.message,
}, },
}); });
} else if (isForbidden) {
// Spoofs a vaguely threatening OpenAI error message. Only invoked by the
// block-zoomers rewriter to scare off tiktokers.
writeErrorResponse(req, res, 403, {
error: {
type: "organization_account_disabled",
code: "policy_violation",
param: null,
message: err.message,
},
});
} else { } else {
writeErrorResponse(req, res, 500, { writeErrorResponse(req, res, 500, {
error: { error: {
type: "proxy_rewriter_error", type: errorType,
proxy_note: `Reverse proxy encountered an error before it could reach the upstream API.`, proxy_note: `Reverse proxy encountered an error before it could reach the upstream API.`,
message: err.message, message: err.message,
stack: err.stack, stack: err.stack,
@@ -91,10 +111,14 @@ export function buildFakeSseMessage(
req: Request req: Request
) { ) {
let fakeEvent; let fakeEvent;
const useBackticks = !type.includes("403");
const msgContent = useBackticks
? `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`
: `[${type}: ${string}]`;
if (req.inboundApi === "anthropic") { if (req.inboundApi === "anthropic") {
fakeEvent = { fakeEvent = {
completion: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`, completion: msgContent,
stop_reason: type, stop_reason: type,
truncated: false, // I've never seen this be true truncated: false, // I've never seen this be true
stop: null, stop: null,
@@ -109,7 +133,7 @@ export function buildFakeSseMessage(
model: req.body?.model, model: req.body?.model,
choices: [ choices: [
{ {
delta: { content: `\`\`\`\n[${type}: ${string}]\n\`\`\`\n` }, delta: { content: msgContent },
index: 0, index: 0,
finish_reason: type, finish_reason: type,
}, },
-2
View File
@@ -41,8 +41,6 @@ export const addKey: ProxyRequestMiddleware = (proxyReq, req) => {
// For such cases, ignore the requested model entirely. // For such cases, ignore the requested model entirely.
if (req.inboundApi === "openai" && req.outboundApi === "anthropic") { if (req.inboundApi === "openai" && req.outboundApi === "anthropic") {
req.log.debug("Using an Anthropic key for an OpenAI-compatible request"); req.log.debug("Using an Anthropic key for an OpenAI-compatible request");
// We don't assign the model here, that will happen when transforming the
// request body.
assignedKey = keyPool.get("claude-v1"); assignedKey = keyPool.get("claude-v1");
} else { } else {
assignedKey = keyPool.get(req.body.model); assignedKey = keyPool.get(req.body.model);
@@ -0,0 +1,34 @@
import { isCompletionRequest } from "../common";
import { ProxyRequestMiddleware } from ".";
const DISALLOWED_ORIGIN_SUBSTRINGS = "janitorai.com,janitor.ai".split(",");
class ForbiddenError extends Error {
constructor(message: string) {
super(message);
this.name = "ForbiddenError";
}
}
/**
* Blocks requests from Janitor AI users with a fake, scary error message so I
* stop getting emails asking for tech support.
*/
export const blockZoomerOrigins: ProxyRequestMiddleware = (_proxyReq, req) => {
if (!isCompletionRequest(req)) {
return;
}
const origin = req.headers.origin || req.headers.referer;
if (origin && DISALLOWED_ORIGIN_SUBSTRINGS.some((s) => origin.includes(s))) {
// Venus-derivatives send a test prompt to check if the proxy is working.
// We don't want to block that just yet.
if (req.body.messages[0]?.content === "Just say TEST") {
return;
}
throw new ForbiddenError(
`Your access was terminated due to violation of our policies, please check your email for more information. If you believe this is in error and would like to appeal, please contact us through our help center at help.openai.com.`
);
}
};
@@ -0,0 +1,36 @@
import { countTokens } from "../../../tokenization";
import { RequestPreprocessor } from ".";
import { openAIMessagesToClaudePrompt } from "./transform-outbound-payload";
export const checkPromptSize: RequestPreprocessor = async (req) => {
const prompt =
req.inboundApi === "openai" ? req.body.messages : req.body.prompt;
let result;
if (req.outboundApi === "openai") {
result = await countTokens({ req, prompt, service: "openai" });
} else {
// If we're doing OpenAI-to-Anthropic, we need to convert the messages to a
// prompt first before counting tokens, as that process affects the token
// count.
let promptStr =
req.inboundApi === "anthropic"
? prompt
: openAIMessagesToClaudePrompt(prompt);
result = await countTokens({
req,
prompt: promptStr,
service: "anthropic",
});
}
req.promptTokens = result.token_count;
// TODO: Remove once token counting is stable
req.log.debug({ result }, "Counted prompt tokens");
req.debug = req.debug ?? {};
req.debug = {
...req.debug,
...result,
};
};
+3
View File
@@ -4,16 +4,19 @@ import type { ProxyReqCallback } from "http-proxy";
// Express middleware (runs before http-proxy-middleware, can be async) // Express middleware (runs before http-proxy-middleware, can be async)
export { createPreprocessorMiddleware } from "./preprocess"; export { createPreprocessorMiddleware } from "./preprocess";
export { checkPromptSize } from "./check-prompt-size";
export { setApiFormat } from "./set-api-format"; export { setApiFormat } from "./set-api-format";
export { transformOutboundPayload } from "./transform-outbound-payload"; export { transformOutboundPayload } from "./transform-outbound-payload";
// HPM middleware (runs on onProxyReq, cannot be async) // HPM middleware (runs on onProxyReq, cannot be async)
export { addKey } from "./add-key"; export { addKey } from "./add-key";
export { addAnthropicPreamble } from "./add-anthropic-preamble"; export { addAnthropicPreamble } from "./add-anthropic-preamble";
export { blockZoomerOrigins } from "./block-zoomer-origins";
export { finalizeBody } from "./finalize-body"; export { finalizeBody } from "./finalize-body";
export { languageFilter } from "./language-filter"; export { languageFilter } from "./language-filter";
export { limitCompletions } from "./limit-completions"; export { limitCompletions } from "./limit-completions";
export { limitOutputTokens } from "./limit-output-tokens"; export { limitOutputTokens } from "./limit-output-tokens";
export { removeOriginHeaders } from "./remove-origin-headers";
export { transformKoboldPayload } from "./transform-kobold-payload"; export { transformKoboldPayload } from "./transform-kobold-payload";
/** /**
+8 -2
View File
@@ -1,6 +1,11 @@
import { RequestHandler } from "express"; import { RequestHandler } from "express";
import { handleInternalError } from "../common"; import { handleInternalError } from "../common";
import { RequestPreprocessor, setApiFormat, transformOutboundPayload } from "."; import {
RequestPreprocessor,
checkPromptSize,
setApiFormat,
transformOutboundPayload,
} from ".";
/** /**
* Returns a middleware function that processes the request body into the given * Returns a middleware function that processes the request body into the given
@@ -12,6 +17,7 @@ export const createPreprocessorMiddleware = (
): RequestHandler => { ): RequestHandler => {
const preprocessors: RequestPreprocessor[] = [ const preprocessors: RequestPreprocessor[] = [
setApiFormat(apiFormat), setApiFormat(apiFormat),
checkPromptSize,
transformOutboundPayload, transformOutboundPayload,
...(additionalPreprocessors ?? []), ...(additionalPreprocessors ?? []),
]; ];
@@ -24,7 +30,7 @@ export const createPreprocessorMiddleware = (
next(); next();
} catch (error) { } catch (error) {
req.log.error(error, "Error while executing request preprocessor"); req.log.error(error, "Error while executing request preprocessor");
handleInternalError(error as Error, req, res); handleInternalError(error as Error, req, res, "proxy_preprocessor_error");
} }
}; };
}; };
@@ -0,0 +1,10 @@
import { ProxyRequestMiddleware } from ".";
/**
* Removes origin and referer headers before sending the request to the API for
* privacy reasons.
**/
export const removeOriginHeaders: ProxyRequestMiddleware = (proxyReq) => {
proxyReq.setHeader("origin", "");
proxyReq.setHeader("referer", "");
};
@@ -2,7 +2,13 @@ import { Request } from "express";
import { z } from "zod"; import { z } from "zod";
import { isCompletionRequest } from "../common"; import { isCompletionRequest } from "../common";
import { RequestPreprocessor } from "."; import { RequestPreprocessor } from ".";
// import { countTokens } from "../../../tokenization"; import { OpenAIPromptMessage } from "../../../tokenization/openai";
/**
* The maximum number of tokens an Anthropic prompt can have before we switch to
* the larger claude-100k context model.
*/
const CLAUDE_100K_TOKEN_THRESHOLD = 8200;
// https://console.anthropic.com/docs/api/reference#-v1-complete // https://console.anthropic.com/docs/api/reference#-v1-complete
const AnthropicV1CompleteSchema = z.object({ const AnthropicV1CompleteSchema = z.object({
@@ -55,10 +61,9 @@ const OpenAIV1ChatCompletionSchema = z.object({
/** Transforms an incoming request body to one that matches the target API. */ /** Transforms an incoming request body to one that matches the target API. */
export const transformOutboundPayload: RequestPreprocessor = async (req) => { export const transformOutboundPayload: RequestPreprocessor = async (req) => {
const sameService = req.inboundApi === req.outboundApi; const sameService = req.inboundApi === req.outboundApi;
const alreadyTransformed = req.retryCount > 0;
const notTransformable = !isCompletionRequest(req); const notTransformable = !isCompletionRequest(req);
if (alreadyTransformed || notTransformable) { if (notTransformable) {
return; return;
} }
@@ -69,6 +74,7 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
? OpenAIV1ChatCompletionSchema ? OpenAIV1ChatCompletionSchema
: AnthropicV1CompleteSchema; : AnthropicV1CompleteSchema;
const result = validator.safeParse(req.body); const result = validator.safeParse(req.body);
if (!result.success) { if (!result.success) {
req.log.error( req.log.error(
{ issues: result.error.issues, body: req.body }, { issues: result.error.issues, body: req.body },
@@ -76,11 +82,14 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
); );
throw result.error; throw result.error;
} }
validatePromptSize(req);
return; return;
} }
if (req.inboundApi === "openai" && req.outboundApi === "anthropic") { if (req.inboundApi === "openai" && req.outboundApi === "anthropic") {
req.body = openaiToAnthropic(req.body, req); req.body = openaiToAnthropic(req.body, req);
validatePromptSize(req);
return; return;
} }
@@ -99,46 +108,35 @@ function openaiToAnthropic(body: any, req: Request) {
throw result.error; throw result.error;
} }
// Anthropic has started versioning their API, indicated by an HTTP header
// `anthropic-version`. The new June 2023 version is not backwards compatible
// with our OpenAI-to-Anthropic transformations so we need to explicitly
// request the older version for now. 2023-01-01 will be removed in September.
// https://docs.anthropic.com/claude/reference/versioning
req.headers["anthropic-version"] = "2023-01-01";
const { messages, ...rest } = result.data; const { messages, ...rest } = result.data;
const prompt = const prompt = openAIMessagesToClaudePrompt(messages);
result.data.messages
.map((m) => {
let role: string = m.role;
if (role === "assistant") {
role = "Assistant";
} else if (role === "system") {
role = "System";
} else if (role === "user") {
role = "Human";
}
// https://console.anthropic.com/docs/prompt-design
// `name` isn't supported by Anthropic but we can still try to use it.
return `\n\n${role}: ${m.name?.trim() ? `(as ${m.name}) ` : ""}${
m.content
}`;
})
.join("") + "\n\nAssistant: ";
// Claude 1.2 has been selected as the default for smaller prompts because it // No longer defaulting to `claude-v1.2` because it seems to be in the process
// is said to be less pozzed than the newer 1.3 model. But this is not based // of being deprecated. `claude-v1` is the new default.
// on any empirical testing, just speculation based on Anthropic stating that // If you have keys that can still use `claude-v1.2`, you can set the
// 1.3 is "safer and less susceptible to adversarial attacks" than 1.2. // CLAUDE_BIG_MODEL and CLAUDE_SMALL_MODEL environment variables in your .env
// From my own interactions, both are pretty easy to jailbreak so I don't // file.
// think there's much of a difference, honestly.
// If you want to override the model selection, you can set the
// CLAUDE_BIG_MODEL and CLAUDE_SMALL_MODEL environment variables in your
// .env file.
// Using "v1" of a model will automatically select the latest version of that
// model on the Anthropic side.
const CLAUDE_BIG = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k"; const CLAUDE_BIG = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
const CLAUDE_SMALL = process.env.CLAUDE_SMALL_MODEL || "claude-v1.2"; const CLAUDE_SMALL = process.env.CLAUDE_SMALL_MODEL || "claude-v1";
// TODO: Finish implementing tokenizer for more accurate model selection. const contextTokens = Number(req.promptTokens ?? 0) + Number(rest.max_tokens);
// This currently uses _character count_, not token count. const model =
const model = prompt.length > 25000 ? CLAUDE_BIG : CLAUDE_SMALL; (contextTokens ?? 0) > CLAUDE_100K_TOKEN_THRESHOLD
? CLAUDE_BIG
: CLAUDE_SMALL;
req.log.debug(
{ contextTokens, model, CLAUDE_100K_TOKEN_THRESHOLD },
"Selected Claude model"
);
let stops = rest.stop let stops = rest.stop
? Array.isArray(rest.stop) ? Array.isArray(rest.stop)
@@ -161,3 +159,63 @@ function openaiToAnthropic(body: any, req: Request) {
stop_sequences: stops, stop_sequences: stops,
}; };
} }
export function openAIMessagesToClaudePrompt(messages: OpenAIPromptMessage[]) {
return (
messages
.map((m) => {
let role: string = m.role;
if (role === "assistant") {
role = "Assistant";
} else if (role === "system") {
role = "System";
} else if (role === "user") {
role = "Human";
}
// https://console.anthropic.com/docs/prompt-design
// `name` isn't supported by Anthropic but we can still try to use it.
return `\n\n${role}: ${m.name?.trim() ? `(as ${m.name}) ` : ""}${
m.content
}`;
})
.join("") + "\n\nAssistant:"
);
}
function validatePromptSize(req: Request) {
const promptTokens = req.promptTokens || 0;
const model = req.body.model;
let maxTokensForModel = 0;
if (model.match(/gpt-3.5/)) {
maxTokensForModel = 4096;
} else if (model.match(/gpt-4/)) {
maxTokensForModel = 8192;
} else if (model.match(/gpt-4-32k/)) {
maxTokensForModel = 32768;
} else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?(?:-100k)/)) {
// Claude models don't throw an error if you exceed the token limit and
// instead just become extremely slow and give schizo results, so we will be
// more conservative with the token limit for them.
maxTokensForModel = 100000 * 0.98;
} else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?$/)) {
maxTokensForModel = 9000 * 0.98;
} else {
// I don't trust my regular expressions enough to throw an error here so
// we just log a warning and allow 100k tokens.
req.log.warn({ model }, "Unknown model, using 100k token limit.");
maxTokensForModel = 100000;
}
if (req.debug) {
req.debug.calculated_max_tokens = maxTokensForModel;
}
z.number()
.max(
maxTokensForModel,
`Prompt is too long for model ${model} (${promptTokens} tokens, max ${maxTokensForModel})`
)
.parse(promptTokens);
req.log.debug({ promptTokens, maxTokensForModel }, "Prompt size validated");
}
+4
View File
@@ -377,6 +377,10 @@ function handleOpenAIRateLimitError(
// Billing quota exceeded (key is dead, disable it) // Billing quota exceeded (key is dead, disable it)
keyPool.disable(req.key!); keyPool.disable(req.key!);
errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`; errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
} else if (type === "access_terminated") {
// Account banned (key is dead, disable it)
keyPool.disable(req.key!);
errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
} else if (type === "billing_not_active") { } else if (type === "billing_not_active") {
// Billing is not active (key is dead, disable it) // Billing is not active (key is dead, disable it)
keyPool.disable(req.key!); keyPool.disable(req.key!);
+18 -3
View File
@@ -9,11 +9,13 @@ import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common"; import { handleProxyError } from "./middleware/common";
import { import {
addKey, addKey,
blockZoomerOrigins,
createPreprocessorMiddleware, createPreprocessorMiddleware,
finalizeBody, finalizeBody,
languageFilter, languageFilter,
limitCompletions, limitCompletions,
limitOutputTokens, limitOutputTokens,
removeOriginHeaders,
} from "./middleware/request"; } from "./middleware/request";
import { import {
createOnProxyResHandler, createOnProxyResHandler,
@@ -28,13 +30,19 @@ function getModelsResponse() {
return modelsCache; return modelsCache;
} }
// https://platform.openai.com/docs/models/overview
const gptVariants = [ const gptVariants = [
"gpt-4", "gpt-4",
"gpt-4-0314", "gpt-4-0613",
"gpt-4-0314", // EOL 2023-09-13
"gpt-4-32k", "gpt-4-32k",
"gpt-4-32k-0314", "gpt-4-32k-0613",
"gpt-4-32k-0314", // EOL 2023-09-13
"gpt-3.5-turbo", "gpt-3.5-turbo",
"gpt-3.5-turbo-0301", "gpt-3.5-turbo-0301", // EOL 2023-09-13
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
]; ];
const gpt4Available = keyPool.list().filter((key) => { const gpt4Available = keyPool.list().filter((key) => {
@@ -87,6 +95,8 @@ const rewriteRequest = (
languageFilter, languageFilter,
limitOutputTokens, limitOutputTokens,
limitCompletions, limitCompletions,
blockZoomerOrigins,
removeOriginHeaders,
finalizeBody, finalizeBody,
]; ];
@@ -115,6 +125,11 @@ const openaiResponseHandler: ProxyResHandlerWithBody = async (
body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`; body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`;
} }
// TODO: Remove once tokenization is stable
if (req.debug) {
body.proxy_tokenizer_debug_info = req.debug;
}
res.status(200).json(body); res.status(200).json(body);
}; };
+5 -2
View File
@@ -12,6 +12,7 @@ import { handleInfoPage } from "./info-page";
import { logQueue } from "./prompt-logging"; import { logQueue } from "./prompt-logging";
import { start as startRequestQueue } from "./proxy/queue"; import { start as startRequestQueue } from "./proxy/queue";
import { init as initUserStore } from "./proxy/auth/user-store"; import { init as initUserStore } from "./proxy/auth/user-store";
import { init as initTokenizers } from "./tokenization";
import { checkOrigin } from "./proxy/check-origin"; import { checkOrigin } from "./proxy/check-origin";
const PORT = config.port; const PORT = config.port;
@@ -99,6 +100,8 @@ async function start() {
keyPool.init(); keyPool.init();
await initTokenizers();
if (config.gatekeeper === "user_token") { if (config.gatekeeper === "user_token") {
await initUserStore(); await initUserStore();
} }
@@ -197,8 +200,8 @@ async function setBuildInfo() {
logger.error( logger.error(
{ {
error, error,
stdout: error.stdout.toString(), stdout: error.stdout?.toString(),
stderr: error.stderr.toString(), stderr: error.stderr?.toString(),
}, },
"Failed to get commit SHA.", "Failed to get commit SHA.",
error error
+160
View File
@@ -0,0 +1,160 @@
import { spawn, ChildProcess } from "child_process";
import { join } from "path";
import { logger } from "../logger";
const TOKENIZER_SOCKET = "tcp://localhost:5555";
const log = logger.child({ module: "claude-ipc" });
const pythonLog = logger.child({ module: "claude-python" });
let tokenizer: ChildProcess;
let initialized = false;
let socket: any; // zeromq.Dealer, not sure how to import it safely as it is optional
export async function init() {
log.info("Initializing Claude tokenizer IPC");
try {
tokenizer = await launchTokenizer();
const zmq = await import("zeromq");
socket = new zmq.Dealer({ sendTimeout: 500 });
socket.connect(TOKENIZER_SOCKET);
await socket.send(["init"]);
const response = await socket.receive();
if (response.toString() !== "ok") {
throw new Error("Unexpected init response");
}
// Start message pump
processMessages();
// Test tokenizer
const result = await requestTokenCount({
requestId: "init-test",
prompt: "test prompt",
});
if (result !== 2) {
log.error({ result }, "Unexpected test token count");
throw new Error("Unexpected test token count");
}
initialized = true;
} catch (err) {
log.error({ err: err.message }, "Failed to initialize Claude tokenizer");
if (process.env.NODE_ENV !== "production") {
console.error(
`\nClaude tokenizer failed to initialize.\nIf you want to use the tokenizer, see the Optional Dependencies documentation.\n`
);
}
return false;
}
log.info("Claude tokenizer IPC ready");
return true;
}
const pendingRequests = new Map<
string,
{ resolve: (tokens: number) => void }
>();
export async function requestTokenCount({
requestId,
prompt,
}: {
requestId: string;
prompt: string;
}) {
if (!socket) {
throw new Error("Claude tokenizer is not initialized");
}
log.debug({ requestId, chars: prompt.length }, "Requesting token count");
await socket.send(["tokenize", requestId, prompt]);
log.debug({ requestId }, "Waiting for socket response");
return new Promise<number>(async (resolve, reject) => {
const resolveFn = (tokens: number) => {
log.debug({ requestId, tokens }, "Received token count");
pendingRequests.delete(requestId);
resolve(tokens);
};
pendingRequests.set(requestId, { resolve: resolveFn });
const timeout = initialized ? 500 : 10000;
setTimeout(() => {
if (pendingRequests.has(requestId)) {
pendingRequests.delete(requestId);
const err = "Tokenizer deadline exceeded";
log.warn({ requestId }, err);
reject(new Error(err));
}
}, timeout);
});
}
async function processMessages() {
if (!socket) {
throw new Error("Claude tokenizer is not initialized");
}
log.debug("Starting message loop");
for await (const [requestId, tokens] of socket) {
const request = pendingRequests.get(requestId.toString());
if (!request) {
log.error({ requestId }, "No pending request found for incoming message");
continue;
}
request.resolve(Number(tokens.toString()));
}
}
async function launchTokenizer() {
return new Promise<ChildProcess>((resolve, reject) => {
let resolved = false;
const python = process.platform === "win32" ? "python" : "python3";
const proc = spawn(python, [
"-u",
join(__dirname, "tokenization", "claude-tokenizer.py"),
]);
if (!proc) {
reject(new Error("Failed to spawn Claude tokenizer"));
}
function cleanup() {
socket?.close();
socket = undefined!;
tokenizer = undefined!;
}
proc.stdout!.on("data", (data) => {
pythonLog.info(data.toString().trim());
});
proc.stderr!.on("data", (data) => {
pythonLog.error(data.toString().trim());
});
proc.on("error", (err) => {
pythonLog.error({ err }, "Claude tokenizer error");
cleanup();
if (!resolved) {
resolved = true;
reject(err);
}
});
proc.on("close", (code) => {
pythonLog.info(`Claude tokenizer exited with code ${code}`);
cleanup();
if (code !== 0 && !resolved) {
resolved = true;
reject(new Error("Claude tokenizer exited immediately"));
}
});
// Wait a moment to catch any immediate errors (missing imports, etc)
setTimeout(() => {
if (!resolved) {
resolved = true;
resolve(proc);
}
}, 200);
});
}
+54
View File
@@ -0,0 +1,54 @@
"""
This is a small process running alongside the main NodeJS server intended to
tokenize prompts for Claude, as currently Anthropic only ships a Python
implemetnation for their tokenizer.
ZeroMQ is used for IPC between the NodeJS server and this process.
"""
import zmq
import anthropic
def create_socket():
context = zmq.Context()
socket = context.socket(zmq.ROUTER)
socket.bind("tcp://*:5555")
return context, socket
def init(socket):
print("claude-tokenizer.py: starting")
try:
while True:
message = socket.recv_multipart()
routing_id, command = message
if command == b"init":
print("claude-tokenizer.py: initialized")
socket.send_multipart([routing_id, b"ok"])
break
except Exception as e:
print("claude-tokenizer.py: failed to initialize")
return
message_processor(socket)
def message_processor(socket):
while True:
try:
message = socket.recv_multipart()
routing_id, command, request_id, payload = message
payload = payload.decode("utf-8")
if command == b"exit":
print("claude-tokenizer.py: exiting")
break
elif command == b"tokenize":
token_count = anthropic.count_tokens(payload)
socket.send_multipart([routing_id, request_id, str(token_count).encode("utf-8")])
else:
print("claude-tokenizer.py: unknown message type")
except Exception as e:
print(f"claude-tokenizer.py: failed to process message ({e})")
break
if __name__ == "__main__":
context, socket = create_socket()
init(socket)
socket.close()
context.term()
+1
View File
@@ -0,0 +1 @@
export { init, countTokens } from "./tokenizer";
+57
View File
@@ -0,0 +1,57 @@
import { Tiktoken } from "tiktoken/lite";
import cl100k_base from "tiktoken/encoders/cl100k_base.json";
let encoder: Tiktoken;
export function init() {
encoder = new Tiktoken(
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str
);
return true;
}
// Implmentation based and tested against:
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
export function getTokenCount(messages: any[], model: string) {
const gpt4 = model.startsWith("gpt-4");
const tokensPerMessage = gpt4 ? 3 : 4;
const tokensPerName = gpt4 ? 1 : -1; // turbo omits role if name is present
let numTokens = 0;
for (const message of messages) {
numTokens += tokensPerMessage;
for (const key of Object.keys(message)) {
{
const value = message[key];
// Break if we get a huge message or exceed the token limit to prevent DoS
// 100k tokens allows for future 100k GPT-4 models and 250k characters is
// just a sanity check
if (value.length > 250000 || numTokens > 100000) {
numTokens = 100000;
return {
tokenizer: "tiktoken (prompt length limit exceeded)",
token_count: numTokens,
};
}
numTokens += encoder.encode(message[key]).length;
if (key === "name") {
numTokens += tokensPerName;
}
}
}
}
numTokens += 3; // every reply is primed with <|start|>assistant<|message|>
return { tokenizer: "tiktoken", token_count: numTokens };
}
export type OpenAIPromptMessage = {
name?: string;
content: string;
role: string;
};
+129
View File
@@ -0,0 +1,129 @@
import { Request } from "express";
import childProcess from "child_process";
import { config } from "../config";
import { logger } from "../logger";
import {
init as initIpc,
requestTokenCount as requestClaudeTokenCount,
} from "./claude-ipc";
import {
init as initEncoder,
getTokenCount as getOpenAITokenCount,
OpenAIPromptMessage,
} from "./openai";
let canTokenizeClaude = false;
export async function init() {
if (config.anthropicKey) {
if (!isPythonInstalled()) {
const skipWarning = !!process.env.DISABLE_MISSING_PYTHON_WARNING;
process.env.MISSING_PYTHON_WARNING = skipWarning ? "" : "true";
} else {
canTokenizeClaude = await initIpc();
if (!canTokenizeClaude) {
logger.warn(
"Anthropic key is set, but tokenizer is not available. Claude prompts will use a naive estimate for token count."
);
}
}
}
if (config.openaiKey) {
initEncoder();
}
}
type TokenCountResult = {
token_count: number;
tokenizer: string;
tokenization_duration_ms: number;
};
type TokenCountRequest = {
req: Request;
} & (
| { prompt: string; service: "anthropic" }
| { prompt: OpenAIPromptMessage[]; service: "openai" }
);
export async function countTokens({
req,
service,
prompt,
}: TokenCountRequest): Promise<TokenCountResult> {
const time = process.hrtime();
switch (service) {
case "anthropic":
if (!canTokenizeClaude) {
const result = guesstimateTokens(prompt);
return {
token_count: result,
tokenizer: "guesstimate (claude-ipc disabled)",
tokenization_duration_ms: getElapsedMs(time),
};
}
// If the prompt is absolutely massive (possibly malicious) don't even try
if (prompt.length > 500000) {
return {
token_count: guesstimateTokens(JSON.stringify(prompt)),
tokenizer: "guesstimate (prompt too long)",
tokenization_duration_ms: getElapsedMs(time),
};
}
try {
const result = await requestClaudeTokenCount({
requestId: String(req.id),
prompt,
});
return {
token_count: result,
tokenizer: "claude-ipc",
tokenization_duration_ms: getElapsedMs(time),
};
} catch (e: any) {
req.log.error("Failed to tokenize with claude_tokenizer", e);
const result = guesstimateTokens(prompt);
return {
token_count: result,
tokenizer: `guesstimate (claude-ipc failed: ${e.message})`,
tokenization_duration_ms: getElapsedMs(time),
};
}
case "openai":
const result = getOpenAITokenCount(prompt, req.body.model);
return {
...result,
tokenization_duration_ms: getElapsedMs(time),
};
default:
throw new Error(`Unknown service: ${service}`);
}
}
function getElapsedMs(time: [number, number]) {
const diff = process.hrtime(time);
return diff[0] * 1000 + diff[1] / 1e6;
}
function guesstimateTokens(prompt: string) {
// From Anthropic's docs:
// The maximum length of prompt that Claude can see is its context window.
// Claude's context window is currently ~6500 words / ~8000 tokens /
// ~28000 Unicode characters.
// This suggests 0.28 tokens per character but in practice this seems to be
// a substantial underestimate in some cases.
return Math.ceil(prompt.length * 0.325);
}
function isPythonInstalled() {
try {
const python = process.platform === "win32" ? "python" : "python3";
childProcess.execSync(`${python} --version`, { stdio: "ignore" });
return true;
} catch (err) {
logger.debug({ err: err.message }, "Python not installed.");
return false;
}
}
+3
View File
@@ -18,6 +18,9 @@ declare global {
onAborted?: () => void; onAborted?: () => void;
proceed: () => void; proceed: () => void;
heartbeatInterval?: NodeJS.Timeout; heartbeatInterval?: NodeJS.Timeout;
promptTokens?: number;
// TODO: remove later
debug: Record<string, any>;
} }
} }
} }
+3 -1
View File
@@ -9,7 +9,9 @@
"skipLibCheck": true, "skipLibCheck": true,
"skipDefaultLibCheck": true, "skipDefaultLibCheck": true,
"outDir": "build", "outDir": "build",
"sourceMap": true "sourceMap": true,
"resolveJsonModule": true,
"useUnknownInCatchVariables": false
}, },
"include": ["src"], "include": ["src"],
"exclude": ["node_modules"], "exclude": ["node_modules"],