Tokens: rationalize

2026-05-10 21:50:14 -07:00 · 2024-07-03 11:42:46 -07:00
parent c8b3d8ad9b
commit 14118d3056
6 changed files with 86 additions and 55 deletions
@@ -3,9 +3,9 @@ import * as React from 'react';
 import { Box, Container, FormControl, Textarea, Typography } from '@mui/joy';

 import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
-import { textTokensForEncodingId, preloadTiktokenLibrary } from '~/common/util/token-counter';
+import { textTokensForEncodingId, preloadTiktokenLibrary } from '~/common/tokens/tokens.text';
 import { lineHeightTextareaMd } from '~/common/app.theme';
-import { useTokenizerSelect } from '~/common/components/forms/useTokenizerSelect';
+import { useTokenizerSelect } from '~/common/tokens/useTokenizerSelect';


 function generateColor(index: number) {
@@ -3,7 +3,7 @@ import * as React from 'react';
 import { useKnowledgeOfBackendCaps } from '~/modules/backend/store-backend-capabilities';

 import { apiQuery } from '~/common/util/trpc.client';
-import { preloadTiktokenLibrary } from '~/common/util/token-counter';
+import { preloadTiktokenLibrary } from '~/common/tokens/tokens.text';


 // configuration
@@ -1,8 +1,9 @@
 import type { DLLM } from '~/modules/llms/store-llms';

-import { textTokensForLLM } from '~/common/util/token-counter';
+import { textTokensForLLM } from '~/common/tokens/tokens.text';

 import { DMessageAttachmentFragment, DMessageFragment, isContentFragment, isContentOrAttachmentFragment } from '~/common/stores/chat/chat.fragments';
+import { imageTokensForLLM } from '~/common/tokens/tokens.image';


 export function estimateTokensForFragments(fragments: DMessageFragment[], llm: DLLM, addTopGlue: boolean, debugFrom: string) {
@@ -21,6 +22,10 @@ export function estimateTextTokens(text: string, llm: DLLM, debugFrom: string):
  return textTokensForLLM(text, llm, debugFrom) ?? 0;
 }

+function estimateImageTokens(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM): number {
+  return imageTokensForLLM(width, height, debugTitle, llm);
+}
+

 // Content Parts

@@ -37,7 +42,7 @@ function _fragmentTokens(fragment: DMessageFragment, llm: DLLM, debugFrom: strin
        const likelyRendition = marshallWrapText(aPart.data.text, aPart.ref, 'markdown-code');
        return estimateTextTokens(likelyRendition, llm, debugFrom);
      case 'image_ref':
-        return _imagePartTokens(aPart.width, aPart.height, fragment.title, llm);
+        return estimateImageTokens(aPart.width, aPart.height, fragment.title, llm);
    }
  } else if (isContentFragment(fragment)) {
    const cPart = fragment.part;
@@ -45,7 +50,7 @@ function _fragmentTokens(fragment: DMessageFragment, llm: DLLM, debugFrom: strin
      case 'error':
        return estimateTextTokens(cPart.error, llm, debugFrom);
      case 'image_ref':
-        return _imagePartTokens(cPart.width, cPart.height, debugFrom, llm);
+        return estimateImageTokens(cPart.width, cPart.height, debugFrom, llm);
      case 'ph':
        return 0;
      case 'text':
@@ -97,38 +102,6 @@ export function marshallWrapDocFragments(initialText: string | null, fragments:
 }


-function _imagePartTokens(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM) {
-  // for the guidelines, see `attachment.pipeline.ts` (lists the latest URLs)
-  switch (llm._source?.vId) {
-    case 'openai':
-      // missing values
-      if (!width || !height) {
-        console.log(`Missing width or height for openai image tokens calculation (${debugTitle || 'no title'})`);
-        return 85;
-      }
-      // 'detail: low' mode, has an image of (or up to) 512x512 -> 85 tokens
-      if (width <= 512 && height <= 512)
-        return 85;
-      // 'detail: high' mode, cover the image with 512x512 patches of 170 tokens, in addition to the 85
-      const patchesX = Math.ceil(width / 512);
-      const patchesY = Math.ceil(height / 512);
-      return 85 + patchesX * patchesY * 170;
-
-    case 'anthropic':
-      // Max case for Anthropic
-      return 1600;
-
-    case 'googleai':
-      // Inferred from the Gemini Videos description, but not sure
-      return 258;
-
-    default:
-      console.warn('Unhandled token preview for image with llm:', llm._source?.vId);
-      return 0;
-  }
-}
-
-
 // Encoding Glue - TODO: implement these correctly and based off LLMs

 function _glueForFragmentTokens(_llm: DLLM): number {
@@ -0,0 +1,57 @@
+import type { DLLM } from '~/modules/llms/store-llms';
+
+
+export function imageTokensForLLM(width: number | undefined, height: number | undefined, debugTitle: string | undefined, llm: DLLM) {
+  // for the guidelines, see `attachment.pipeline.ts` (lists the latest URLs)
+  switch (llm._source?.vId) {
+    case 'openai':
+      // missing values
+      if (!width || !height) {
+        console.log(`Missing width or height for openai image tokens calculation (${debugTitle || 'no title'})`);
+        return 85;
+      }
+      // 'detail: low' mode, has an image of (or up to) 512x512 -> 85 tokens
+      if (width <= 512 && height <= 512)
+        return 85;
+      // 'detail: high' mode, cover the image with 512x512 patches of 170 tokens, in addition to the 85
+      const patchesX = Math.ceil(width / 512);
+      const patchesY = Math.ceil(height / 512);
+      return 85 + patchesX * patchesY * 170;
+
+    case 'anthropic':
+      // Recommended image sizes:
+      // https://docs.anthropic.com/en/docs/build-with-claude/vision
+      // - Max: 1568px on long edge
+      // - Optimal: ≤1.15 megapixels (e.g., 1092x1092, 951x1268, 896x1344, 819x1456, 784x1568)
+      // - Min: >200px on both edges
+
+      // Max case as fallback
+      if (!width || !height) {
+        // console.log(`Missing width or height for Anthropic image tokens calculation (${debugTitle || 'no title'})`);
+        return 1600;
+      }
+
+      // Calculate tokens based on image size
+      const megapixels = (width * height) / 1000000;
+      const tokens = Math.min(Math.round((width * height) / 750), 1600);
+
+      // Max case for oversized images
+      if (megapixels > 1.15) {
+        // console.log(`Image exceeds recommended size for Anthropic (${debugTitle || 'no title'})`);
+        return 1600;
+      }
+      // if (width < 200 || height < 200) {
+      //   console.log(`Image may be too small for optimal Anthropic performance (${debugTitle || 'no title'})`);
+      // }
+
+      return tokens;
+
+    case 'googleai':
+      // Inferred from the Gemini Videos description, but not sure
+      return 258;
+
+    default:
+      console.warn('Unhandled token preview for image with llm:', llm._source?.vId);
+      return 0;
+  }
+}
@@ -7,22 +7,6 @@ import type { DLLM } from '~/modules/llms/store-llms';
 const DEBUG_TOKEN_COUNT = false;
 const fallbackEncodingId: TiktokenEncoding = 'cl100k_base';

-// Globals
-interface TiktokenTokenizer {
-  id: TiktokenEncoding;
-  label: string;
-  exampleNet?: string;
-}
-
-export const TiktokenTokenizers: TiktokenTokenizer[] = [
-  { id: 'o200k_base', label: 'O200k Base', exampleNet: 'GPT-4o' },
-  { id: 'cl100k_base', label: 'CL100k Base' },
-  { id: 'p50k_edit', label: 'P50k Edit' },
-  { id: 'p50k_base', label: 'P50k Base' },
-  { id: 'r50k_base', label: 'R50k Base' },
-  { id: 'gpt2', label: 'GPT-2' },
-];
-

 // Global symbols to dynamically load the Tiktoken library
 let get_encoding: ((encoding: TiktokenEncoding) => Tiktoken) | null = null;
@@ -1,12 +1,29 @@
 import * as React from 'react';
+import type { TiktokenEncoding } from 'tiktoken';

 import type { SxProps } from '@mui/joy/styles/types';
 import { FormControl, Option, Select } from '@mui/joy';

-import { TiktokenTokenizers } from '~/common/util/token-counter';
 import { FormLabelStart } from '~/common/components/forms/FormLabelStart';


+// Globals
+interface TiktokenTokenizer {
+  id: TiktokenEncoding;
+  label: string;
+  exampleNet?: string;
+}
+
+export const TiktokenTokenizers: TiktokenTokenizer[] = [
+  { id: 'o200k_base', label: 'O200k Base', exampleNet: 'GPT-4o' },
+  { id: 'cl100k_base', label: 'CL100k Base' },
+  { id: 'p50k_edit', label: 'P50k Edit' },
+  { id: 'p50k_base', label: 'P50k Base' },
+  { id: 'r50k_base', label: 'R50k Base' },
+  { id: 'gpt2', label: 'GPT-2' },
+];
+
+
 const tokenizerSelectSx: SxProps = {
  flex: 1,
  backgroundColor: 'background.popup',