Refactor project structure and add user self-serve UI (khanon/oai-reverse-proxy!41)

This commit is contained in:
khanon
2023-09-02 19:36:44 +00:00
parent 435b46ad4d
commit f05e196994
67 changed files with 993 additions and 381 deletions
+27
View File
@@ -0,0 +1,27 @@
import { getTokenizer } from "@anthropic-ai/tokenizer";
import { Tiktoken } from "tiktoken/lite";
let encoder: Tiktoken;
export function init() {
// they export a `countTokens` function too but it instantiates a new
// tokenizer every single time and it is not fast...
encoder = getTokenizer();
return true;
}
export function getTokenCount(prompt: string, _model: string) {
// Don't try tokenizing if the prompt is massive to prevent DoS.
// 500k characters should be sufficient for all supported models.
if (prompt.length > 500000) {
return {
tokenizer: "length fallback",
token_count: 100000,
};
}
return {
tokenizer: "@anthropic-ai/tokenizer",
token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
};
}
+2
View File
@@ -0,0 +1,2 @@
export { OpenAIPromptMessage } from "./openai";
export { init, countTokens } from "./tokenizer";
+80
View File
@@ -0,0 +1,80 @@
import { Tiktoken } from "tiktoken/lite";
import cl100k_base from "tiktoken/encoders/cl100k_base.json";
let encoder: Tiktoken;
export function init() {
encoder = new Tiktoken(
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str
);
return true;
}
// Tested against:
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
export function getTokenCount(
prompt: string | OpenAIPromptMessage[],
model: string
) {
if (typeof prompt === "string") {
return getTextTokenCount(prompt);
}
const gpt4 = model.startsWith("gpt-4");
const tokensPerMessage = gpt4 ? 3 : 4;
const tokensPerName = gpt4 ? 1 : -1; // turbo omits role if name is present
let numTokens = 0;
for (const message of prompt) {
numTokens += tokensPerMessage;
for (const key of Object.keys(message)) {
{
const value = message[key as keyof OpenAIPromptMessage];
if (!value || typeof value !== "string") continue;
// Break if we get a huge message or exceed the token limit to prevent
// DoS.
// 100k tokens allows for future 100k GPT-4 models and 500k characters
// is just a sanity check
if (value.length > 500000 || numTokens > 100000) {
numTokens = 100000;
return {
tokenizer: "tiktoken (prompt length limit exceeded)",
token_count: numTokens,
};
}
numTokens += encoder.encode(value).length;
if (key === "name") {
numTokens += tokensPerName;
}
}
}
}
numTokens += 3; // every reply is primed with <|start|>assistant<|message|>
return { tokenizer: "tiktoken", token_count: numTokens };
}
function getTextTokenCount(prompt: string) {
if (prompt.length > 500000) {
return {
tokenizer: "length fallback",
token_count: 100000,
};
}
return {
tokenizer: "tiktoken",
token_count: encoder.encode(prompt).length,
};
}
export type OpenAIPromptMessage = {
name?: string;
content: string;
role: string;
};
+59
View File
@@ -0,0 +1,59 @@
import { Request } from "express";
import { config } from "../../config";
import {
init as initClaude,
getTokenCount as getClaudeTokenCount,
} from "./claude";
import {
init as initOpenAi,
getTokenCount as getOpenAITokenCount,
OpenAIPromptMessage,
} from "./openai";
export async function init() {
if (config.anthropicKey) {
initClaude();
}
if (config.openaiKey) {
initOpenAi();
}
}
type TokenCountResult = {
token_count: number;
tokenizer: string;
tokenization_duration_ms: number;
};
type TokenCountRequest = { req: Request } & (
| { prompt: OpenAIPromptMessage[]; completion?: never; service: "openai" }
| { prompt: string; completion?: never; service: "anthropic" }
| { prompt?: never; completion: string; service: "openai" }
| { prompt?: never; completion: string; service: "anthropic" }
);
export async function countTokens({
req,
service,
prompt,
completion,
}: TokenCountRequest): Promise<TokenCountResult> {
const time = process.hrtime();
switch (service) {
case "anthropic":
return {
...getClaudeTokenCount(prompt ?? completion, req.body.model),
tokenization_duration_ms: getElapsedMs(time),
};
case "openai":
return {
...getOpenAITokenCount(prompt ?? completion, req.body.model),
tokenization_duration_ms: getElapsedMs(time),
};
default:
throw new Error(`Unknown service: ${service}`);
}
}
function getElapsedMs(time: [number, number]) {
const diff = process.hrtime(time);
return diff[0] * 1000 + diff[1] / 1e6;
}