Files
big-agi/lib/token-counters.ts
T
Enrico Ros e2eac30155 Cleanup token estimation
Move the function to a separate file for better encapsulation
2023-04-01 18:21:57 -07:00

31 lines
1014 B
TypeScript

import { encoding_for_model, get_encoding, Tiktoken } from '@dqbd/tiktoken';
import { ChatModelId, defaultChatModelId } from '@/lib/data';
/**
* Wrapper around the Tiktoken library, to keep tokenizers for all models in a cache
*
* We also preload the tokenizer for the default model, so that the first time a user types
* a message, it doesn't stall loading the tokenizer.
*/
export const countModelTokens: (text: string, chatModelId: ChatModelId) => number = (() => {
const tokenEncoders: { [modelId: string]: Tiktoken } = {};
function tokenCount(text: string, chatModelId: ChatModelId) {
if (!(chatModelId in tokenEncoders)) {
try {
tokenEncoders[chatModelId] = encoding_for_model(chatModelId);
} catch (e) {
tokenEncoders[chatModelId] = get_encoding('cl100k_base');
}
}
return tokenEncoders[chatModelId]?.encode(text)?.length || 0;
}
// preload the tokenizer for the default model
tokenCount('', defaultChatModelId);
return tokenCount;
})();