mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Tokens: rationalize
This commit is contained in:
@@ -3,9 +3,9 @@ import * as React from 'react';
|
||||
import { Box, Container, FormControl, Textarea, Typography } from '@mui/joy';
|
||||
|
||||
import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
|
||||
import { textTokensForEncodingId, preloadTiktokenLibrary } from '~/common/util/token-counter';
|
||||
import { textTokensForEncodingId, preloadTiktokenLibrary } from '~/common/tokens/tokens.text';
|
||||
import { lineHeightTextareaMd } from '~/common/app.theme';
|
||||
import { useTokenizerSelect } from '~/common/components/forms/useTokenizerSelect';
|
||||
import { useTokenizerSelect } from '~/common/tokens/useTokenizerSelect';
|
||||
|
||||
|
||||
function generateColor(index: number) {
|
||||
|
||||
@@ -3,7 +3,7 @@ import * as React from 'react';
|
||||
import { useKnowledgeOfBackendCaps } from '~/modules/backend/store-backend-capabilities';
|
||||
|
||||
import { apiQuery } from '~/common/util/trpc.client';
|
||||
import { preloadTiktokenLibrary } from '~/common/util/token-counter';
|
||||
import { preloadTiktokenLibrary } from '~/common/tokens/tokens.text';
|
||||
|
||||
|
||||
// configuration
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import type { DLLM } from '~/modules/llms/store-llms';
|
||||
|
||||
import { textTokensForLLM } from '~/common/util/token-counter';
|
||||
import { textTokensForLLM } from '~/common/tokens/tokens.text';
|
||||
|
||||
import { DMessageAttachmentFragment, DMessageFragment, isContentFragment, isContentOrAttachmentFragment } from '~/common/stores/chat/chat.fragments';
|
||||
import { imageTokensForLLM } from '~/common/tokens/tokens.image';
|
||||
|
||||
|
||||
export function estimateTokensForFragments(fragments: DMessageFragment[], llm: DLLM, addTopGlue: boolean, debugFrom: string) {
|
||||
@@ -21,6 +22,10 @@ export function estimateTextTokens(text: string, llm: DLLM, debugFrom: string):
|
||||
return textTokensForLLM(text, llm, debugFrom) ?? 0;
|
||||
}
|
||||
|
||||
function estimateImageTokens(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM): number {
|
||||
return imageTokensForLLM(width, height, debugTitle, llm);
|
||||
}
|
||||
|
||||
|
||||
// Content Parts
|
||||
|
||||
@@ -37,7 +42,7 @@ function _fragmentTokens(fragment: DMessageFragment, llm: DLLM, debugFrom: strin
|
||||
const likelyRendition = marshallWrapText(aPart.data.text, aPart.ref, 'markdown-code');
|
||||
return estimateTextTokens(likelyRendition, llm, debugFrom);
|
||||
case 'image_ref':
|
||||
return _imagePartTokens(aPart.width, aPart.height, fragment.title, llm);
|
||||
return estimateImageTokens(aPart.width, aPart.height, fragment.title, llm);
|
||||
}
|
||||
} else if (isContentFragment(fragment)) {
|
||||
const cPart = fragment.part;
|
||||
@@ -45,7 +50,7 @@ function _fragmentTokens(fragment: DMessageFragment, llm: DLLM, debugFrom: strin
|
||||
case 'error':
|
||||
return estimateTextTokens(cPart.error, llm, debugFrom);
|
||||
case 'image_ref':
|
||||
return _imagePartTokens(cPart.width, cPart.height, debugFrom, llm);
|
||||
return estimateImageTokens(cPart.width, cPart.height, debugFrom, llm);
|
||||
case 'ph':
|
||||
return 0;
|
||||
case 'text':
|
||||
@@ -97,38 +102,6 @@ export function marshallWrapDocFragments(initialText: string | null, fragments:
|
||||
}
|
||||
|
||||
|
||||
function _imagePartTokens(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM) {
|
||||
// for the guidelines, see `attachment.pipeline.ts` (lists the latest URLs)
|
||||
switch (llm._source?.vId) {
|
||||
case 'openai':
|
||||
// missing values
|
||||
if (!width || !height) {
|
||||
console.log(`Missing width or height for openai image tokens calculation (${debugTitle || 'no title'})`);
|
||||
return 85;
|
||||
}
|
||||
// 'detail: low' mode, has an image of (or up to) 512x512 -> 85 tokens
|
||||
if (width <= 512 && height <= 512)
|
||||
return 85;
|
||||
// 'detail: high' mode, cover the image with 512x512 patches of 170 tokens, in addition to the 85
|
||||
const patchesX = Math.ceil(width / 512);
|
||||
const patchesY = Math.ceil(height / 512);
|
||||
return 85 + patchesX * patchesY * 170;
|
||||
|
||||
case 'anthropic':
|
||||
// Max case for Anthropic
|
||||
return 1600;
|
||||
|
||||
case 'googleai':
|
||||
// Inferred from the Gemini Videos description, but not sure
|
||||
return 258;
|
||||
|
||||
default:
|
||||
console.warn('Unhandled token preview for image with llm:', llm._source?.vId);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Encoding Glue - TODO: implement these correctly and based off LLMs
|
||||
|
||||
function _glueForFragmentTokens(_llm: DLLM): number {
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import type { DLLM } from '~/modules/llms/store-llms';
|
||||
|
||||
|
||||
export function imageTokensForLLM(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM) {
|
||||
// for the guidelines, see `attachment.pipeline.ts` (lists the latest URLs)
|
||||
switch (llm._source?.vId) {
|
||||
case 'openai':
|
||||
// missing values
|
||||
if (!width || !height) {
|
||||
console.log(`Missing width or height for openai image tokens calculation (${debugTitle || 'no title'})`);
|
||||
return 85;
|
||||
}
|
||||
// 'detail: low' mode, has an image of (or up to) 512x512 -> 85 tokens
|
||||
if (width <= 512 && height <= 512)
|
||||
return 85;
|
||||
// 'detail: high' mode, cover the image with 512x512 patches of 170 tokens, in addition to the 85
|
||||
const patchesX = Math.ceil(width / 512);
|
||||
const patchesY = Math.ceil(height / 512);
|
||||
return 85 + patchesX * patchesY * 170;
|
||||
|
||||
case 'anthropic':
|
||||
// Recommended image sizes:
|
||||
// https://docs.anthropic.com/en/docs/build-with-claude/vision
|
||||
// - Max: 1568px on long edge
|
||||
// - Optimal: ≤1.15 megapixels (e.g., 1092x1092, 951x1268, 896x1344, 819x1456, 784x1568)
|
||||
// - Min: >200px on both edges
|
||||
|
||||
// Max case as fallback
|
||||
if (!width || !height) {
|
||||
// console.log(`Missing width or height for Anthropic image tokens calculation (${debugTitle || 'no title'})`);
|
||||
return 1600;
|
||||
}
|
||||
|
||||
// Calculate tokens based on image size
|
||||
const megapixels = (width * height) / 1000000;
|
||||
const tokens = Math.min(Math.round((width * height) / 750), 1600);
|
||||
|
||||
// Max case for oversized images
|
||||
if (megapixels > 1.15) {
|
||||
// console.log(`Image exceeds recommended size for Anthropic (${debugTitle || 'no title'})`);
|
||||
return 1600;
|
||||
}
|
||||
// if (width < 200 || height < 200) {
|
||||
// console.log(`Image may be too small for optimal Anthropic performance (${debugTitle || 'no title'})`);
|
||||
// }
|
||||
|
||||
return tokens;
|
||||
|
||||
case 'googleai':
|
||||
// Inferred from the Gemini Videos description, but not sure
|
||||
return 258;
|
||||
|
||||
default:
|
||||
console.warn('Unhandled token preview for image with llm:', llm._source?.vId);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -7,22 +7,6 @@ import type { DLLM } from '~/modules/llms/store-llms';
|
||||
const DEBUG_TOKEN_COUNT = false;
|
||||
const fallbackEncodingId: TiktokenEncoding = 'cl100k_base';
|
||||
|
||||
// Globals
|
||||
interface TiktokenTokenizer {
|
||||
id: TiktokenEncoding;
|
||||
label: string;
|
||||
exampleNet?: string;
|
||||
}
|
||||
|
||||
export const TiktokenTokenizers: TiktokenTokenizer[] = [
|
||||
{ id: 'o200k_base', label: 'O200k Base', exampleNet: 'GPT-4o' },
|
||||
{ id: 'cl100k_base', label: 'CL100k Base' },
|
||||
{ id: 'p50k_edit', label: 'P50k Edit' },
|
||||
{ id: 'p50k_base', label: 'P50k Base' },
|
||||
{ id: 'r50k_base', label: 'R50k Base' },
|
||||
{ id: 'gpt2', label: 'GPT-2' },
|
||||
];
|
||||
|
||||
|
||||
// Global symbols to dynamically load the Tiktoken library
|
||||
let get_encoding: ((encoding: TiktokenEncoding) => Tiktoken) | null = null;
|
||||
+18
-1
@@ -1,12 +1,29 @@
|
||||
import * as React from 'react';
|
||||
import type { TiktokenEncoding } from 'tiktoken';
|
||||
|
||||
import type { SxProps } from '@mui/joy/styles/types';
|
||||
import { FormControl, Option, Select } from '@mui/joy';
|
||||
|
||||
import { TiktokenTokenizers } from '~/common/util/token-counter';
|
||||
import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
|
||||
|
||||
|
||||
// Globals
|
||||
interface TiktokenTokenizer {
|
||||
id: TiktokenEncoding;
|
||||
label: string;
|
||||
exampleNet?: string;
|
||||
}
|
||||
|
||||
export const TiktokenTokenizers: TiktokenTokenizer[] = [
|
||||
{ id: 'o200k_base', label: 'O200k Base', exampleNet: 'GPT-4o' },
|
||||
{ id: 'cl100k_base', label: 'CL100k Base' },
|
||||
{ id: 'p50k_edit', label: 'P50k Edit' },
|
||||
{ id: 'p50k_base', label: 'P50k Base' },
|
||||
{ id: 'r50k_base', label: 'R50k Base' },
|
||||
{ id: 'gpt2', label: 'GPT-2' },
|
||||
];
|
||||
|
||||
|
||||
const tokenizerSelectSx: SxProps = {
|
||||
flex: 1,
|
||||
backgroundColor: 'background.popup',
|
||||
Reference in New Issue
Block a user