Tokens: rationalize

This commit is contained in:
Enrico Ros
2024-07-03 11:42:46 -07:00
parent c8b3d8ad9b
commit 14118d3056
6 changed files with 86 additions and 55 deletions
+2 -2
View File
@@ -3,9 +3,9 @@ import * as React from 'react';
import { Box, Container, FormControl, Textarea, Typography } from '@mui/joy';
import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
import { textTokensForEncodingId, preloadTiktokenLibrary } from '~/common/util/token-counter';
import { textTokensForEncodingId, preloadTiktokenLibrary } from '~/common/tokens/tokens.text';
import { lineHeightTextareaMd } from '~/common/app.theme';
import { useTokenizerSelect } from '~/common/components/forms/useTokenizerSelect';
import { useTokenizerSelect } from '~/common/tokens/useTokenizerSelect';
function generateColor(index: number) {
@@ -3,7 +3,7 @@ import * as React from 'react';
import { useKnowledgeOfBackendCaps } from '~/modules/backend/store-backend-capabilities';
import { apiQuery } from '~/common/util/trpc.client';
import { preloadTiktokenLibrary } from '~/common/util/token-counter';
import { preloadTiktokenLibrary } from '~/common/tokens/tokens.text';
// configuration
+8 -35
View File
@@ -1,8 +1,9 @@
import type { DLLM } from '~/modules/llms/store-llms';
import { textTokensForLLM } from '~/common/util/token-counter';
import { textTokensForLLM } from '~/common/tokens/tokens.text';
import { DMessageAttachmentFragment, DMessageFragment, isContentFragment, isContentOrAttachmentFragment } from '~/common/stores/chat/chat.fragments';
import { imageTokensForLLM } from '~/common/tokens/tokens.image';
export function estimateTokensForFragments(fragments: DMessageFragment[], llm: DLLM, addTopGlue: boolean, debugFrom: string) {
@@ -21,6 +22,10 @@ export function estimateTextTokens(text: string, llm: DLLM, debugFrom: string):
return textTokensForLLM(text, llm, debugFrom) ?? 0;
}
function estimateImageTokens(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM): number {
return imageTokensForLLM(width, height, debugTitle, llm);
}
// Content Parts
@@ -37,7 +42,7 @@ function _fragmentTokens(fragment: DMessageFragment, llm: DLLM, debugFrom: strin
const likelyRendition = marshallWrapText(aPart.data.text, aPart.ref, 'markdown-code');
return estimateTextTokens(likelyRendition, llm, debugFrom);
case 'image_ref':
return _imagePartTokens(aPart.width, aPart.height, fragment.title, llm);
return estimateImageTokens(aPart.width, aPart.height, fragment.title, llm);
}
} else if (isContentFragment(fragment)) {
const cPart = fragment.part;
@@ -45,7 +50,7 @@ function _fragmentTokens(fragment: DMessageFragment, llm: DLLM, debugFrom: strin
case 'error':
return estimateTextTokens(cPart.error, llm, debugFrom);
case 'image_ref':
return _imagePartTokens(cPart.width, cPart.height, debugFrom, llm);
return estimateImageTokens(cPart.width, cPart.height, debugFrom, llm);
case 'ph':
return 0;
case 'text':
@@ -97,38 +102,6 @@ export function marshallWrapDocFragments(initialText: string | null, fragments:
}
function _imagePartTokens(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM) {
// for the guidelines, see `attachment.pipeline.ts` (lists the latest URLs)
switch (llm._source?.vId) {
case 'openai':
// missing values
if (!width || !height) {
console.log(`Missing width or height for openai image tokens calculation (${debugTitle || 'no title'})`);
return 85;
}
// 'detail: low' mode, has an image of (or up to) 512x512 -> 85 tokens
if (width <= 512 && height <= 512)
return 85;
// 'detail: high' mode, cover the image with 512x512 patches of 170 tokens, in addition to the 85
const patchesX = Math.ceil(width / 512);
const patchesY = Math.ceil(height / 512);
return 85 + patchesX * patchesY * 170;
case 'anthropic':
// Max case for Anthropic
return 1600;
case 'googleai':
// Inferred from the Gemini Videos description, but not sure
return 258;
default:
console.warn('Unhandled token preview for image with llm:', llm._source?.vId);
return 0;
}
}
// Encoding Glue - TODO: implement these correctly and based off LLMs
function _glueForFragmentTokens(_llm: DLLM): number {
+57
View File
@@ -0,0 +1,57 @@
import type { DLLM } from '~/modules/llms/store-llms';
export function imageTokensForLLM(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM) {
// for the guidelines, see `attachment.pipeline.ts` (lists the latest URLs)
switch (llm._source?.vId) {
case 'openai':
// missing values
if (!width || !height) {
console.log(`Missing width or height for openai image tokens calculation (${debugTitle || 'no title'})`);
return 85;
}
// 'detail: low' mode, has an image of (or up to) 512x512 -> 85 tokens
if (width <= 512 && height <= 512)
return 85;
// 'detail: high' mode, cover the image with 512x512 patches of 170 tokens, in addition to the 85
const patchesX = Math.ceil(width / 512);
const patchesY = Math.ceil(height / 512);
return 85 + patchesX * patchesY * 170;
case 'anthropic':
// Recommended image sizes:
// https://docs.anthropic.com/en/docs/build-with-claude/vision
// - Max: 1568px on long edge
// - Optimal: ≤1.15 megapixels (e.g., 1092x1092, 951x1268, 896x1344, 819x1456, 784x1568)
// - Min: >200px on both edges
// Max case as fallback
if (!width || !height) {
// console.log(`Missing width or height for Anthropic image tokens calculation (${debugTitle || 'no title'})`);
return 1600;
}
// Calculate tokens based on image size
const megapixels = (width * height) / 1000000;
const tokens = Math.min(Math.round((width * height) / 750), 1600);
// Max case for oversized images
if (megapixels > 1.15) {
// console.log(`Image exceeds recommended size for Anthropic (${debugTitle || 'no title'})`);
return 1600;
}
// if (width < 200 || height < 200) {
// console.log(`Image may be too small for optimal Anthropic performance (${debugTitle || 'no title'})`);
// }
return tokens;
case 'googleai':
// Inferred from the Gemini Videos description, but not sure
return 258;
default:
console.warn('Unhandled token preview for image with llm:', llm._source?.vId);
return 0;
}
}
@@ -7,22 +7,6 @@ import type { DLLM } from '~/modules/llms/store-llms';
const DEBUG_TOKEN_COUNT = false;
const fallbackEncodingId: TiktokenEncoding = 'cl100k_base';
// Globals
interface TiktokenTokenizer {
id: TiktokenEncoding;
label: string;
exampleNet?: string;
}
export const TiktokenTokenizers: TiktokenTokenizer[] = [
{ id: 'o200k_base', label: 'O200k Base', exampleNet: 'GPT-4o' },
{ id: 'cl100k_base', label: 'CL100k Base' },
{ id: 'p50k_edit', label: 'P50k Edit' },
{ id: 'p50k_base', label: 'P50k Base' },
{ id: 'r50k_base', label: 'R50k Base' },
{ id: 'gpt2', label: 'GPT-2' },
];
// Global symbols to dynamically load the Tiktoken library
let get_encoding: ((encoding: TiktokenEncoding) => Tiktoken) | null = null;
@@ -1,12 +1,29 @@
import * as React from 'react';
import type { TiktokenEncoding } from 'tiktoken';
import type { SxProps } from '@mui/joy/styles/types';
import { FormControl, Option, Select } from '@mui/joy';
import { TiktokenTokenizers } from '~/common/util/token-counter';
import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
// Globals
interface TiktokenTokenizer {
id: TiktokenEncoding;
label: string;
exampleNet?: string;
}
export const TiktokenTokenizers: TiktokenTokenizer[] = [
{ id: 'o200k_base', label: 'O200k Base', exampleNet: 'GPT-4o' },
{ id: 'cl100k_base', label: 'CL100k Base' },
{ id: 'p50k_edit', label: 'P50k Edit' },
{ id: 'p50k_base', label: 'P50k Base' },
{ id: 'r50k_base', label: 'R50k Base' },
{ id: 'gpt2', label: 'GPT-2' },
];
const tokenizerSelectSx: SxProps = {
flex: 1,
backgroundColor: 'background.popup',