mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 06:00:15 -07:00
e2eac30155
Move the function to a separate file for better encapsulation
31 lines
1014 B
TypeScript
31 lines
1014 B
TypeScript
import { encoding_for_model, get_encoding, Tiktoken } from '@dqbd/tiktoken';
|
|
|
|
import { ChatModelId, defaultChatModelId } from '@/lib/data';
|
|
|
|
|
|
/**
|
|
* Wrapper around the Tiktoken library, to keep tokenizers for all models in a cache
|
|
*
|
|
* We also preload the tokenizer for the default model, so that the first time a user types
|
|
* a message, it doesn't stall loading the tokenizer.
|
|
*/
|
|
export const countModelTokens: (text: string, chatModelId: ChatModelId) => number = (() => {
|
|
const tokenEncoders: { [modelId: string]: Tiktoken } = {};
|
|
|
|
function tokenCount(text: string, chatModelId: ChatModelId) {
|
|
if (!(chatModelId in tokenEncoders)) {
|
|
try {
|
|
tokenEncoders[chatModelId] = encoding_for_model(chatModelId);
|
|
} catch (e) {
|
|
tokenEncoders[chatModelId] = get_encoding('cl100k_base');
|
|
}
|
|
}
|
|
return tokenEncoders[chatModelId]?.encode(text)?.length || 0;
|
|
}
|
|
|
|
// preload the tokenizer for the default model
|
|
tokenCount('', defaultChatModelId);
|
|
|
|
return tokenCount;
|
|
})();
|