From 272b812db35b58524a687becca7f8a783ec163b3 Mon Sep 17 00:00:00 2001 From: reanon <85157-reanon@users.noreply.gitgud.io> Date: Mon, 21 Apr 2025 02:00:44 +0000 Subject: [PATCH] Using weighted averages for pricing --- src/shared/stats.ts | 50 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/shared/stats.ts b/src/shared/stats.ts index 5ccd199..14f1f90 100644 --- a/src/shared/stats.ts +++ b/src/shared/stats.ts @@ -1,13 +1,13 @@ import { config } from "../config"; import { ModelFamily } from "./models"; -// technically slightly underestimates, because completion tokens cost more -// than prompt tokens but we don't track those separately right now +// Using weighted averages now for better guessing, thinking models use around 1:3 ratio for input:output +// for the thinking part, other models hover around 3:1 input output, still not the best, but reflects better to real proompting. export function getTokenCostUsd(model: ModelFamily, tokens: number) { let cost = 0; switch (model) { case "deepseek": - cost = 0.00000135; + cost = 0.00000178; // uncached r1 pricing, again the highest average break; case "xai": @@ -16,34 +16,34 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) { break; case "gpt41": case "azure-gpt41": - cost = 0.000005; + cost = 0.0000075; // averaged the same wa* as 4.5 break; case "gpt41-mini": case "azure-gpt41-mini": - cost = 0.000001; + cost = 0.0000015; break; case "gpt41-nano": case "azure-gpt41-nano": - cost = 0.00000025; + cost = 0.0000003; break; case "gpt45": case "azure-gpt45": // $75/$150 for 1M input/output tokens pricing, averaged to $112 - cost = 0.000112; + cost = 0.00009375; break; case "gpt4o": case "azure-gpt4o": - cost = 0.000005; + cost = 0.0000075; break; case "azure-gpt4-turbo": case "gpt4-turbo": - cost = 0.00001; + cost = 0.0000125; break; case "azure-o1-pro": case "o1-pro": // OpenAI o1-pro pricing $150/1M input tokens and $600/1M output tokens - cost = 0.00038; + cost = 0.0004875; break; case "azure-o1": case "o1": @@ -51,33 +51,33 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) { // considerably more output tokens that other models for its hidden // reasoning. The official O1 pricing is $15/1M input tokens and $60/1M // output tokens so we will return a higher estimate here. - cost = 0.000038; + cost = 0.00004875; break; case "azure-o1-mini": case "o1-mini": case "azure-o3-mini": case "o3-mini": - cost = 0.00000275; // $1.1/1M input tokens, $4.4/1M output tokens + cost = 0.000003575; // $1.1/1M input tokens, $4.4/1M output tokens break; case "azure-o3": case "o3": - cost = 0.000025; // $10/1M input tokens, $40/1M output tokens + cost = 0.000032; // $10/1M input tokens, $40/1M output tokens break; case "azure-o4-mini": case "o4-mini": - cost = 0.00000275; // $1.1/1M input tokens, $4.4/1M output tokens + cost = 0.000003575; // $1.1/1M input tokens, $4.4/1M output tokens break; case "azure-gpt4-32k": case "gpt4-32k": - cost = 0.00006; + cost = 0.000075; break; case "azure-gpt4": case "gpt4": - cost = 0.00003; + cost = 0.0000375; break; case "azure-turbo": case "turbo": - cost = 0.000001; + cost = 0.00000075; break; case "azure-dall-e": case "dall-e": @@ -86,34 +86,34 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) { case "aws-claude": case "gcp-claude": case "claude": - cost = 0.000008; + cost = 0.000001; break; case "aws-claude-opus": case "gcp-claude-opus": case "claude-opus": - cost = 0.000015; + cost = 0.00003; break; case "aws-mistral-tiny": case "mistral-tiny": - cost = 0.00000025; + cost = 0.0000003; break; case "aws-mistral-small": case "mistral-small": - cost = 0.0000003; + cost = 0.00000035; break; case "aws-mistral-medium": case "mistral-medium": - cost = 0.00000275; + cost = 0.000004; break; case "aws-mistral-large": case "mistral-large": - cost = 0.000003; + cost = 0.000012; break; case "gemini-flash": - cost = 0.0000018; + cost = 0.0000002326; break; case "gemini-pro": - cost = 0.0000068; + cost = 0.00000344; break; } return cost * Math.max(0, tokens);