Using weighted averages for pricing
This commit is contained in:
+25
-25
@@ -1,13 +1,13 @@
|
||||
import { config } from "../config";
|
||||
import { ModelFamily } from "./models";
|
||||
|
||||
// technically slightly underestimates, because completion tokens cost more
|
||||
// than prompt tokens but we don't track those separately right now
|
||||
// Using weighted averages now for better guessing, thinking models use around 1:3 ratio for input:output
|
||||
// for the thinking part, other models hover around 3:1 input output, still not the best, but reflects better to real proompting.
|
||||
export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
let cost = 0;
|
||||
switch (model) {
|
||||
case "deepseek":
|
||||
cost = 0.00000135;
|
||||
cost = 0.00000178;
|
||||
// uncached r1 pricing, again the highest average
|
||||
break;
|
||||
case "xai":
|
||||
@@ -16,34 +16,34 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
break;
|
||||
case "gpt41":
|
||||
case "azure-gpt41":
|
||||
cost = 0.000005;
|
||||
cost = 0.0000075;
|
||||
// averaged the same wa* as 4.5
|
||||
break;
|
||||
case "gpt41-mini":
|
||||
case "azure-gpt41-mini":
|
||||
cost = 0.000001;
|
||||
cost = 0.0000015;
|
||||
break;
|
||||
case "gpt41-nano":
|
||||
case "azure-gpt41-nano":
|
||||
cost = 0.00000025;
|
||||
cost = 0.0000003;
|
||||
break;
|
||||
case "gpt45":
|
||||
case "azure-gpt45":
|
||||
// $75/$150 for 1M input/output tokens pricing, averaged to $112
|
||||
cost = 0.000112;
|
||||
cost = 0.00009375;
|
||||
break;
|
||||
case "gpt4o":
|
||||
case "azure-gpt4o":
|
||||
cost = 0.000005;
|
||||
cost = 0.0000075;
|
||||
break;
|
||||
case "azure-gpt4-turbo":
|
||||
case "gpt4-turbo":
|
||||
cost = 0.00001;
|
||||
cost = 0.0000125;
|
||||
break;
|
||||
case "azure-o1-pro":
|
||||
case "o1-pro":
|
||||
// OpenAI o1-pro pricing $150/1M input tokens and $600/1M output tokens
|
||||
cost = 0.00038;
|
||||
cost = 0.0004875;
|
||||
break;
|
||||
case "azure-o1":
|
||||
case "o1":
|
||||
@@ -51,33 +51,33 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
// considerably more output tokens that other models for its hidden
|
||||
// reasoning. The official O1 pricing is $15/1M input tokens and $60/1M
|
||||
// output tokens so we will return a higher estimate here.
|
||||
cost = 0.000038;
|
||||
cost = 0.00004875;
|
||||
break;
|
||||
case "azure-o1-mini":
|
||||
case "o1-mini":
|
||||
case "azure-o3-mini":
|
||||
case "o3-mini":
|
||||
cost = 0.00000275; // $1.1/1M input tokens, $4.4/1M output tokens
|
||||
cost = 0.000003575; // $1.1/1M input tokens, $4.4/1M output tokens
|
||||
break;
|
||||
case "azure-o3":
|
||||
case "o3":
|
||||
cost = 0.000025; // $10/1M input tokens, $40/1M output tokens
|
||||
cost = 0.000032; // $10/1M input tokens, $40/1M output tokens
|
||||
break;
|
||||
case "azure-o4-mini":
|
||||
case "o4-mini":
|
||||
cost = 0.00000275; // $1.1/1M input tokens, $4.4/1M output tokens
|
||||
cost = 0.000003575; // $1.1/1M input tokens, $4.4/1M output tokens
|
||||
break;
|
||||
case "azure-gpt4-32k":
|
||||
case "gpt4-32k":
|
||||
cost = 0.00006;
|
||||
cost = 0.000075;
|
||||
break;
|
||||
case "azure-gpt4":
|
||||
case "gpt4":
|
||||
cost = 0.00003;
|
||||
cost = 0.0000375;
|
||||
break;
|
||||
case "azure-turbo":
|
||||
case "turbo":
|
||||
cost = 0.000001;
|
||||
cost = 0.00000075;
|
||||
break;
|
||||
case "azure-dall-e":
|
||||
case "dall-e":
|
||||
@@ -86,34 +86,34 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
case "aws-claude":
|
||||
case "gcp-claude":
|
||||
case "claude":
|
||||
cost = 0.000008;
|
||||
cost = 0.000001;
|
||||
break;
|
||||
case "aws-claude-opus":
|
||||
case "gcp-claude-opus":
|
||||
case "claude-opus":
|
||||
cost = 0.000015;
|
||||
cost = 0.00003;
|
||||
break;
|
||||
case "aws-mistral-tiny":
|
||||
case "mistral-tiny":
|
||||
cost = 0.00000025;
|
||||
cost = 0.0000003;
|
||||
break;
|
||||
case "aws-mistral-small":
|
||||
case "mistral-small":
|
||||
cost = 0.0000003;
|
||||
cost = 0.00000035;
|
||||
break;
|
||||
case "aws-mistral-medium":
|
||||
case "mistral-medium":
|
||||
cost = 0.00000275;
|
||||
cost = 0.000004;
|
||||
break;
|
||||
case "aws-mistral-large":
|
||||
case "mistral-large":
|
||||
cost = 0.000003;
|
||||
cost = 0.000012;
|
||||
break;
|
||||
case "gemini-flash":
|
||||
cost = 0.0000018;
|
||||
cost = 0.0000002326;
|
||||
break;
|
||||
case "gemini-pro":
|
||||
cost = 0.0000068;
|
||||
cost = 0.00000344;
|
||||
break;
|
||||
}
|
||||
return cost * Math.max(0, tokens);
|
||||
|
||||
Reference in New Issue
Block a user