import { getTokenizer } from "@anthropic-ai/tokenizer";
import { Tiktoken } from "tiktoken/lite";

let encoder: Tiktoken;

export function init() {
  // they export a `countTokens` function too but it instantiates a new
  // tokenizer every single time and it is not fast...
  encoder = getTokenizer();
  return true;
}

export function getTokenCount(prompt: string, _model: string) {
  // Don't try tokenizing if the prompt is massive to prevent DoS.
  // 500k characters should be sufficient for all supported models.
  if (prompt.length > 500000) {
    return {
      tokenizer: "length fallback",
      token_count: 100000,
    };
  }

  return {
    tokenizer: "@anthropic-ai/tokenizer",
    token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
  };
}