Refactor project structure and add user self-serve UI (khanon/oai-reverse-proxy!41)
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
import { getTokenizer } from "@anthropic-ai/tokenizer";
|
||||
import { Tiktoken } from "tiktoken/lite";
|
||||
|
||||
let encoder: Tiktoken;
|
||||
|
||||
export function init() {
|
||||
// they export a `countTokens` function too but it instantiates a new
|
||||
// tokenizer every single time and it is not fast...
|
||||
encoder = getTokenizer();
|
||||
return true;
|
||||
}
|
||||
|
||||
export function getTokenCount(prompt: string, _model: string) {
|
||||
// Don't try tokenizing if the prompt is massive to prevent DoS.
|
||||
// 500k characters should be sufficient for all supported models.
|
||||
if (prompt.length > 500000) {
|
||||
return {
|
||||
tokenizer: "length fallback",
|
||||
token_count: 100000,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
tokenizer: "@anthropic-ai/tokenizer",
|
||||
token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
export { OpenAIPromptMessage } from "./openai";
|
||||
export { init, countTokens } from "./tokenizer";
|
||||
@@ -0,0 +1,80 @@
|
||||
import { Tiktoken } from "tiktoken/lite";
|
||||
import cl100k_base from "tiktoken/encoders/cl100k_base.json";
|
||||
|
||||
let encoder: Tiktoken;
|
||||
|
||||
export function init() {
|
||||
encoder = new Tiktoken(
|
||||
cl100k_base.bpe_ranks,
|
||||
cl100k_base.special_tokens,
|
||||
cl100k_base.pat_str
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Tested against:
|
||||
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
||||
|
||||
export function getTokenCount(
|
||||
prompt: string | OpenAIPromptMessage[],
|
||||
model: string
|
||||
) {
|
||||
if (typeof prompt === "string") {
|
||||
return getTextTokenCount(prompt);
|
||||
}
|
||||
|
||||
const gpt4 = model.startsWith("gpt-4");
|
||||
|
||||
const tokensPerMessage = gpt4 ? 3 : 4;
|
||||
const tokensPerName = gpt4 ? 1 : -1; // turbo omits role if name is present
|
||||
|
||||
let numTokens = 0;
|
||||
|
||||
for (const message of prompt) {
|
||||
numTokens += tokensPerMessage;
|
||||
for (const key of Object.keys(message)) {
|
||||
{
|
||||
const value = message[key as keyof OpenAIPromptMessage];
|
||||
if (!value || typeof value !== "string") continue;
|
||||
// Break if we get a huge message or exceed the token limit to prevent
|
||||
// DoS.
|
||||
// 100k tokens allows for future 100k GPT-4 models and 500k characters
|
||||
// is just a sanity check
|
||||
if (value.length > 500000 || numTokens > 100000) {
|
||||
numTokens = 100000;
|
||||
return {
|
||||
tokenizer: "tiktoken (prompt length limit exceeded)",
|
||||
token_count: numTokens,
|
||||
};
|
||||
}
|
||||
|
||||
numTokens += encoder.encode(value).length;
|
||||
if (key === "name") {
|
||||
numTokens += tokensPerName;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
numTokens += 3; // every reply is primed with <|start|>assistant<|message|>
|
||||
return { tokenizer: "tiktoken", token_count: numTokens };
|
||||
}
|
||||
|
||||
function getTextTokenCount(prompt: string) {
|
||||
if (prompt.length > 500000) {
|
||||
return {
|
||||
tokenizer: "length fallback",
|
||||
token_count: 100000,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
tokenizer: "tiktoken",
|
||||
token_count: encoder.encode(prompt).length,
|
||||
};
|
||||
}
|
||||
|
||||
export type OpenAIPromptMessage = {
|
||||
name?: string;
|
||||
content: string;
|
||||
role: string;
|
||||
};
|
||||
@@ -0,0 +1,59 @@
|
||||
import { Request } from "express";
|
||||
import { config } from "../../config";
|
||||
import {
|
||||
init as initClaude,
|
||||
getTokenCount as getClaudeTokenCount,
|
||||
} from "./claude";
|
||||
import {
|
||||
init as initOpenAi,
|
||||
getTokenCount as getOpenAITokenCount,
|
||||
OpenAIPromptMessage,
|
||||
} from "./openai";
|
||||
|
||||
export async function init() {
|
||||
if (config.anthropicKey) {
|
||||
initClaude();
|
||||
}
|
||||
if (config.openaiKey) {
|
||||
initOpenAi();
|
||||
}
|
||||
}
|
||||
|
||||
type TokenCountResult = {
|
||||
token_count: number;
|
||||
tokenizer: string;
|
||||
tokenization_duration_ms: number;
|
||||
};
|
||||
type TokenCountRequest = { req: Request } & (
|
||||
| { prompt: OpenAIPromptMessage[]; completion?: never; service: "openai" }
|
||||
| { prompt: string; completion?: never; service: "anthropic" }
|
||||
| { prompt?: never; completion: string; service: "openai" }
|
||||
| { prompt?: never; completion: string; service: "anthropic" }
|
||||
);
|
||||
export async function countTokens({
|
||||
req,
|
||||
service,
|
||||
prompt,
|
||||
completion,
|
||||
}: TokenCountRequest): Promise<TokenCountResult> {
|
||||
const time = process.hrtime();
|
||||
switch (service) {
|
||||
case "anthropic":
|
||||
return {
|
||||
...getClaudeTokenCount(prompt ?? completion, req.body.model),
|
||||
tokenization_duration_ms: getElapsedMs(time),
|
||||
};
|
||||
case "openai":
|
||||
return {
|
||||
...getOpenAITokenCount(prompt ?? completion, req.body.model),
|
||||
tokenization_duration_ms: getElapsedMs(time),
|
||||
};
|
||||
default:
|
||||
throw new Error(`Unknown service: ${service}`);
|
||||
}
|
||||
}
|
||||
|
||||
function getElapsedMs(time: [number, number]) {
|
||||
const diff = process.hrtime(time);
|
||||
return diff[0] * 1000 + diff[1] / 1e6;
|
||||
}
|
||||
Reference in New Issue
Block a user