28 lines
773 B
TypeScript
28 lines
773 B
TypeScript
import { getTokenizer } from "@anthropic-ai/tokenizer";
|
|
import { Tiktoken } from "tiktoken/lite";
|
|
|
|
let encoder: Tiktoken;
|
|
|
|
export function init() {
|
|
// they export a `countTokens` function too but it instantiates a new
|
|
// tokenizer every single time and it is not fast...
|
|
encoder = getTokenizer();
|
|
return true;
|
|
}
|
|
|
|
export function getTokenCount(prompt: string, _model: string) {
|
|
// Don't try tokenizing if the prompt is massive to prevent DoS.
|
|
// 500k characters should be sufficient for all supported models.
|
|
if (prompt.length > 500000) {
|
|
return {
|
|
tokenizer: "length fallback",
|
|
token_count: 100000,
|
|
};
|
|
}
|
|
|
|
return {
|
|
tokenizer: "@anthropic-ai/tokenizer",
|
|
token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
|
|
};
|
|
}
|