uses accurate Claude tokenization
This commit is contained in:
Generated
+10
@@ -9,6 +9,7 @@
|
||||
"version": "1.0.0",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/tokenizer": "^0.0.4",
|
||||
"axios": "^1.3.5",
|
||||
"cookie-parser": "^1.4.6",
|
||||
"copyfiles": "^2.4.1",
|
||||
@@ -47,6 +48,15 @@
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/tokenizer": {
|
||||
"version": "0.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/tokenizer/-/tokenizer-0.0.4.tgz",
|
||||
"integrity": "sha512-EHRKbxlxlc8W4KCBEseByJ7YwyYCmgu9OyN59H9+IYIGPoKv8tXyQXinkeGDI+cI8Tiuz9wk2jZb/kK7AyvL7g==",
|
||||
"dependencies": {
|
||||
"@types/node": "^18.11.18",
|
||||
"tiktoken": "^1.0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/parser": {
|
||||
"version": "7.22.7",
|
||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.22.7.tgz",
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/tokenizer": "^0.0.4",
|
||||
"axios": "^1.3.5",
|
||||
"cookie-parser": "^1.4.6",
|
||||
"copyfiles": "^2.4.1",
|
||||
|
||||
@@ -7,14 +7,6 @@ import { RequestPreprocessor } from ".";
|
||||
const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
|
||||
const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
|
||||
|
||||
/**
|
||||
* Claude models don't throw an error if you exceed the token limit and
|
||||
* instead just become extremely slow and provide schizo output. To be safe,
|
||||
* we will only allow 95% of the stated limit, which also accounts for our
|
||||
* tokenization being slightly different than Anthropic's.
|
||||
*/
|
||||
const CLAUDE_TOKEN_LIMIT_ADJUSTMENT = 0.95;
|
||||
|
||||
/**
|
||||
* Assigns `req.promptTokens` and `req.outputTokens` based on the request body
|
||||
* and outbound API format, which combined determine the size of the context.
|
||||
@@ -71,11 +63,11 @@ function validateContextSize(req: Request) {
|
||||
} else if (model.match(/gpt-4/)) {
|
||||
modelMax = 8192;
|
||||
} else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?(?:-100k)/)) {
|
||||
modelMax = 100000 * CLAUDE_TOKEN_LIMIT_ADJUSTMENT;
|
||||
modelMax = 100000;
|
||||
} else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?$/)) {
|
||||
modelMax = 9000 * CLAUDE_TOKEN_LIMIT_ADJUSTMENT;
|
||||
modelMax = 9000;
|
||||
} else if (model.match(/claude-2/)) {
|
||||
modelMax = 100000 * CLAUDE_TOKEN_LIMIT_ADJUSTMENT;
|
||||
modelMax = 100000;
|
||||
} else {
|
||||
// Don't really want to throw here because I don't want to have to update
|
||||
// this ASAP every time a new model is released.
|
||||
|
||||
@@ -1,21 +1,6 @@
|
||||
// For now this is just using the GPT vocabulary, even though Claude has a
|
||||
// different one. Token counts won't be perfect so this just provides
|
||||
// a rough estimate.
|
||||
//
|
||||
// TODO: use huggingface tokenizers instead of openai's tiktoken library since
|
||||
// that should support the vocabulary file Anthropic provides.
|
||||
|
||||
import { Tiktoken } from "tiktoken/lite";
|
||||
import cl100k_base from "tiktoken/encoders/cl100k_base.json";
|
||||
|
||||
let encoder: Tiktoken;
|
||||
import { countTokens } from "@anthropic-ai/tokenizer";
|
||||
|
||||
export function init() {
|
||||
encoder = new Tiktoken(
|
||||
cl100k_base.bpe_ranks,
|
||||
cl100k_base.special_tokens,
|
||||
cl100k_base.pat_str
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -24,13 +9,13 @@ export function getTokenCount(prompt: string, _model: string) {
|
||||
// 500k characters should be sufficient for all supported models.
|
||||
if (prompt.length > 500000) {
|
||||
return {
|
||||
tokenizer: "tiktoken (prompt length limit exceeded)",
|
||||
tokenizer: "length fallback",
|
||||
token_count: 100000,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
tokenizer: "tiktoken (cl100k_base)",
|
||||
token_count: encoder.encode(prompt).length,
|
||||
tokenizer: "@anthropic-ai/tokenizer",
|
||||
token_count: countTokens(prompt),
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user