Approximate Tokenization - default on new Mobile installs

This commit is contained in:
Enrico Ros
2025-08-01 14:26:26 -07:00
parent 04df3dcba8
commit a79fd0a10c
5 changed files with 172 additions and 2 deletions
+14
View File
@@ -3,10 +3,13 @@ import { persist } from 'zustand/middleware';
import { useShallow } from 'zustand/react/shallow';
import type { DLLMId } from '~/common/stores/llms/llms.types';
import { Is } from '~/common/util/pwaUtils';
export type ChatAutoSpeakType = 'off' | 'firstLine' | 'all';
export type TokenCountingMethod = 'accurate' | 'approximate';
// Chat Settings (Chat AI & Chat UI)
@@ -38,6 +41,9 @@ interface AppChatStore {
chatKeepLastThinkingOnly: boolean,
setChatKeepLastThinkingOnly: (chatKeepLastThinkingOnly: boolean) => void;
tokenCountingMethod: TokenCountingMethod;
setTokenCountingMethod: (tokenCountingMethod: TokenCountingMethod) => void;
// chat UI
clearFilters: () => void;
@@ -107,6 +113,9 @@ const useAppChatStore = create<AppChatStore>()(persist(
chatKeepLastThinkingOnly: true,
setChatKeepLastThinkingOnly: (chatKeepLastThinkingOnly: boolean) => _set({ chatKeepLastThinkingOnly }),
tokenCountingMethod: Is.Desktop ? 'accurate' : 'approximate',
setTokenCountingMethod: (tokenCountingMethod: TokenCountingMethod) => _set({ tokenCountingMethod }),
// Chat UI
clearFilters: () => _set({ filterIsArchived: false, filterHasDocFragments: false, filterHasImageAssets: false, filterHasStars: false }),
@@ -181,6 +190,7 @@ export const useChatAutoAI = () => useAppChatStore(useShallow(state => ({
autoTitleChat: state.autoTitleChat,
autoVndAntBreakpoints: state.autoVndAntBreakpoints,
chatKeepLastThinkingOnly: state.chatKeepLastThinkingOnly,
tokenCountingMethod: state.tokenCountingMethod,
setAutoSpeak: state.setAutoSpeak,
setAutoSuggestAttachmentPrompts: state.setAutoSuggestAttachmentPrompts,
setAutoSuggestDiagrams: state.setAutoSuggestDiagrams,
@@ -189,6 +199,7 @@ export const useChatAutoAI = () => useAppChatStore(useShallow(state => ({
setAutoTitleChat: state.setAutoTitleChat,
setAutoVndAntBreakpoints: state.setAutoVndAntBreakpoints,
setChatKeepLastThinkingOnly: state.setChatKeepLastThinkingOnly,
setTokenCountingMethod: state.setTokenCountingMethod,
})));
export const getChatAutoAI = (): {
@@ -208,6 +219,9 @@ export const useChatAutoSuggestHTMLUI = (): boolean =>
export const useChatAutoSuggestAttachmentPrompts = (): boolean =>
useAppChatStore(state => state.autoSuggestAttachmentPrompts);
export const getChatTokenCountingMethod = (): TokenCountingMethod =>
useAppChatStore.getState().tokenCountingMethod;
export const useChatMicTimeoutMsValue = (): number =>
useAppChatStore(state => state.micTimeoutMs);
@@ -13,6 +13,7 @@ import { useLLMSelect } from '~/common/components/forms/useLLMSelect';
import { useLabsDevMode } from '~/common/stores/store-ux-labs';
import { useModelDomain } from '~/common/stores/llms/hooks/useModelDomain';
import type { TokenCountingMethod } from '../chat/store-app-chat';
import { useChatAutoAI } from '../chat/store-app-chat';
@@ -29,6 +30,19 @@ const _keepThinkingBlocksOptions: FormSelectOption<'all' | 'last-only'>[] = [
},
] as const;
const _tokenCountingMethodOptions: FormSelectOption<TokenCountingMethod>[] = [
{
value: 'approximate',
label: 'Fast',
description: 'Lightweight: ~90% approximation',
},
{
value: 'accurate',
label: 'Precise',
description: 'Accurate tokenizer, heavier',
},
] as const;
function FormControlDomainModel(props: {
domainId: DModelDomainId,
@@ -63,6 +77,7 @@ export function AppChatSettingsAI() {
// autoSuggestQuestions, setAutoSuggestQuestions,
autoTitleChat, setAutoTitleChat,
chatKeepLastThinkingOnly, setChatKeepLastThinkingOnly,
tokenCountingMethod, setTokenCountingMethod,
} = useChatAutoAI();
const labsDevMode = useLabsDevMode();
@@ -123,6 +138,15 @@ export function AppChatSettingsAI() {
/>
)}
<FormSelectControl
title='Token Counting'
tooltip='Controls how tokens are counted for context limits and pricing estimates.'
options={_tokenCountingMethodOptions}
value={tokenCountingMethod}
onChange={setTokenCountingMethod}
selectSx={{ minWidth: 140 }}
/>
<FormSelectControl
title='Reasoning traces'
tooltip='Controls how AI thinking/reasoning blocks are kept in your chat history. Keeping only in the last message (default) reduces clutter.'
@@ -1,6 +1,8 @@
import * as React from 'react';
import { useRouter } from 'next/router';
import { getChatTokenCountingMethod } from '../../apps/chat/store-app-chat';
import { markNewsAsSeen, shallRedirectToNews, sherpaReconfigureBackendModels, sherpaStorageMaintenanceNoChats_delayed } from '~/common/logic/store-logic-sherpa';
import { navigateToNews, ROUTE_APP_CHAT } from '~/common/app.routes';
import { preloadTiktokenLibrary } from '~/common/tokens/tokens.text';
@@ -36,7 +38,7 @@ export function ProviderBootstrapLogic(props: { children: React.ReactNode }) {
// decide what to launch
const launchPreload = isOnChat && !isRedirectingToNews;
const launchPreload = isOnChat && !isRedirectingToNews && getChatTokenCountingMethod() === 'accurate'; // only preload if using TikToken by default
const launchAutoConf = isOnChat && !isRedirectingToNews;
const launchStorageGC = true;
+14 -1
View File
@@ -1,4 +1,7 @@
import { getChatTokenCountingMethod } from '../../../apps/chat/store-app-chat';
import type { DLLM } from '~/common/stores/llms/llms.types';
import { approximateTextTokens } from '~/common/tokens/tokens.approximate';
import { imageTokensForLLM } from '~/common/tokens/tokens.image';
import { textTokensForLLM } from '~/common/tokens/tokens.text';
@@ -19,7 +22,17 @@ export function estimateTokensForFragments(llm: DLLM, role: DMessageRole, fragme
// Text
export function estimateTextTokens(text: string, llm: DLLM, debugFrom: string): number {
return textTokensForLLM(text, llm, debugFrom) ?? 0;
// Approximate path
if (getChatTokenCountingMethod() === 'approximate')
return approximateTextTokens(text, llm, debugFrom);
// Default to accurate method (the JS+WASM 'tiktoken' lib)
const accurateTokens = textTokensForLLM(text, llm, debugFrom);
if (accurateTokens !== null)
return accurateTokens;
// Fallback to approximate if the accurate method is not available
return approximateTextTokens(text, llm, debugFrom);
}
function estimateImageTokens(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM): number {
+117
View File
@@ -0,0 +1,117 @@
import type { DLLM } from '~/common/stores/llms/llms.types';
// configuration
const DEBUG_TOKEN_COUNT = false;
/**
* Lightweight approximate token counting without tiktoken dependency.
* This provides fast estimates with ~85-90% accuracy vs tiktoken.
*/
// Character to token ratios by model family (empirically derived)
const TOKEN_RATIOS = {
// GPT models (OpenAI-like)
'gpt': 3.9, // ~4 chars per token on average
'o1': 4.0,
'claude': 3.8, // Anthropic models tend to be slightly more efficient
'gemini': 4.2, // Google models
'llama': 4.1, // Meta and similar
'mistral': 4.0,
'qwen': 3.9, // Alibaba
'deepseek': 4.0,
'default': 4.0, // Conservative default
} as const;
// Language-specific adjustments
const LANGUAGE_MULTIPLIERS = {
// Code typically has more tokens per character
'code': 1.2,
// Non-Latin scripts often require more tokens
'chinese': 1.4,
'japanese': 1.4,
'korean': 1.3,
'arabic': 1.2,
'default': 1.0,
} as const;
/**
* Detects content type based on text characteristics
*/
function detectContentType(text: string): keyof typeof LANGUAGE_MULTIPLIERS {
// check for code patterns
if (text.includes('```') || text.includes('function ') || text.includes('const ') ||
text.includes('import ') || text.includes('class ') || text.includes('def ') ||
text.includes(' ') && text.includes('\n') || // indented blocks
/\{.*}/.test(text) || /\[.*]/.test(text)) {
return 'code';
}
// Check for CJK characters
if (/[\u4e00-\u9fff]/.test(text)) return 'chinese';
if (/[\u3040-\u309f\u30a0-\u30ff]/.test(text)) return 'japanese';
if (/[\uac00-\ud7af]/.test(text)) return 'korean';
if (/[\u0600-\u06ff]/.test(text)) return 'arabic';
return 'default';
}
/**
* Gets model family from LLM configuration
*/
function getModelFamily(llm: DLLM): keyof typeof TOKEN_RATIOS {
const modelId = llm.id.toLowerCase();
const modelRef = llm.initialParameters?.llmRef?.toLowerCase() || '';
// Check model ID and reference for family patterns
if (modelId.includes('gpt') || modelRef.includes('gpt')) return 'gpt';
if (modelId.includes('o1') || modelRef.includes('o1')) return 'o1';
if (modelId.includes('claude') || modelRef.includes('claude')) return 'claude';
if (modelId.includes('gemini') || modelRef.includes('gemini')) return 'gemini';
if (modelId.includes('llama') || modelRef.includes('llama')) return 'llama';
if (modelId.includes('mistral') || modelRef.includes('mistral')) return 'mistral';
if (modelId.includes('qwen') || modelRef.includes('qwen')) return 'qwen';
if (modelId.includes('deepseek') || modelRef.includes('deepseek')) return 'deepseek';
return 'default';
}
/**
* Fast approximate token counting based on character count and heuristics.
*
* @param text - The text to count tokens for
* @param llm - The LLM configuration (used to determine model family)
* @param debugFrom - Debug label for logging
* @returns Estimated token count
*/
export function approximateTextTokens(text: string, llm: DLLM, debugFrom: string): number {
if (!text) return 0;
const contentType = detectContentType(text);
const modelFamily = getModelFamily(llm);
const baseRatio = TOKEN_RATIOS[modelFamily];
const languageMultiplier = LANGUAGE_MULTIPLIERS[contentType];
// Base calculation: characters / ratio
const baseTokens = text.length / baseRatio;
// Apply language-specific adjustments
const adjustedTokens = baseTokens * languageMultiplier;
// Additional heuristics:
// - Spaces typically reduce token count (word boundaries)
const spaceCount = (text.match(/\s/g) || []).length;
const spaceAdjustment = spaceCount * 0.1; // Small reduction for spaces
// - Repeated characters/patterns often compress better
const repetitionReduction = text.length > 100 ? Math.min(adjustedTokens * 0.05, 10) : 0;
const finalCount = Math.max(1, Math.round(adjustedTokens - spaceAdjustment - repetitionReduction));
DEBUG_TOKEN_COUNT && console.log(`approximateTextTokens: ${debugFrom}, family: ${modelFamily}, type: ${contentType}, chars: ${text.length}, tokens: ${finalCount}`);
return finalCount;
}