From 272b812db35b58524a687becca7f8a783ec163b3 Mon Sep 17 00:00:00 2001
From: reanon <85157-reanon@users.noreply.gitgud.io>
Date: Mon, 21 Apr 2025 02:00:44 +0000
Subject: [PATCH] Using weighted averages for pricing

---
 src/shared/stats.ts | 50 ++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/shared/stats.ts b/src/shared/stats.ts
index 5ccd199..14f1f90 100644
--- a/src/shared/stats.ts
+++ b/src/shared/stats.ts
@@ -1,13 +1,13 @@
 import { config } from "../config";
 import { ModelFamily } from "./models";
 
-// technically slightly underestimates, because completion tokens cost more
-// than prompt tokens but we don't track those separately right now
+// Using weighted averages now for better guessing, thinking models use around 1:3 ratio for input:output 
+// for the thinking part, other models hover around 3:1 input output, still not the best, but reflects better to real proompting.
 export function getTokenCostUsd(model: ModelFamily, tokens: number) {
   let cost = 0;
   switch (model) {
     case "deepseek":
-      cost = 0.00000135;
+      cost = 0.00000178;
       // uncached r1 pricing, again the highest average
       break;
     case "xai":
@@ -16,34 +16,34 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
       break;
     case "gpt41":
     case "azure-gpt41":
-      cost = 0.000005;
+      cost = 0.0000075;
       // averaged the same wa* as 4.5
       break;
     case "gpt41-mini":
     case "azure-gpt41-mini":
-      cost = 0.000001;
+      cost = 0.0000015;
       break;
     case "gpt41-nano":
     case "azure-gpt41-nano":
-      cost = 0.00000025;
+      cost = 0.0000003;
       break;
     case "gpt45":
     case "azure-gpt45":
       // $75/$150 for 1M input/output tokens pricing, averaged to $112
-      cost = 0.000112;
+      cost = 0.00009375;
       break;
     case "gpt4o":
     case "azure-gpt4o":
-      cost = 0.000005;
+      cost = 0.0000075;
       break;
     case "azure-gpt4-turbo":
     case "gpt4-turbo":
-      cost = 0.00001;
+      cost = 0.0000125;
       break;
     case "azure-o1-pro":
     case "o1-pro":
       // OpenAI o1-pro pricing $150/1M input tokens and $600/1M output tokens
-      cost = 0.00038;
+      cost = 0.0004875;
       break;
     case "azure-o1":
     case "o1":
@@ -51,33 +51,33 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
       // considerably more output tokens that other models for its hidden
       // reasoning. The official O1 pricing is $15/1M input tokens and $60/1M
       // output tokens so we will return a higher estimate here.
-      cost = 0.000038;
+      cost = 0.00004875;
       break;
     case "azure-o1-mini":
     case "o1-mini":
     case "azure-o3-mini":
     case "o3-mini":
-      cost = 0.00000275; // $1.1/1M input tokens, $4.4/1M output tokens
+      cost = 0.000003575; // $1.1/1M input tokens, $4.4/1M output tokens
       break;
     case "azure-o3":
     case "o3":
-      cost = 0.000025; // $10/1M input tokens, $40/1M output tokens
+      cost = 0.000032; // $10/1M input tokens, $40/1M output tokens
       break;
     case "azure-o4-mini":
     case "o4-mini":
-      cost = 0.00000275; // $1.1/1M input tokens, $4.4/1M output tokens
+      cost = 0.000003575; // $1.1/1M input tokens, $4.4/1M output tokens
       break;
     case "azure-gpt4-32k":
     case "gpt4-32k":
-      cost = 0.00006;
+      cost = 0.000075;
       break;
     case "azure-gpt4":
     case "gpt4":
-      cost = 0.00003;
+      cost = 0.0000375;
       break;
     case "azure-turbo":
     case "turbo":
-      cost = 0.000001;
+      cost = 0.00000075;
       break;
     case "azure-dall-e":
     case "dall-e":
@@ -86,34 +86,34 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
     case "aws-claude":
     case "gcp-claude":
     case "claude":
-      cost = 0.000008;
+      cost = 0.000001;
       break;
     case "aws-claude-opus":
     case "gcp-claude-opus":
     case "claude-opus":
-      cost = 0.000015;
+      cost = 0.00003;
       break;
     case "aws-mistral-tiny":
     case "mistral-tiny":
-      cost = 0.00000025;
+      cost = 0.0000003;
       break;
     case "aws-mistral-small":
     case "mistral-small":
-      cost = 0.0000003;
+      cost = 0.00000035;
       break;
     case "aws-mistral-medium":
     case "mistral-medium":
-      cost = 0.00000275;
+      cost = 0.000004;
       break;
     case "aws-mistral-large":
     case "mistral-large":
-      cost = 0.000003;
+      cost = 0.000012;
       break;
     case "gemini-flash":
-      cost = 0.0000018;
+      cost = 0.0000002326;
       break;
     case "gemini-pro":
-      cost = 0.0000068;
+      cost = 0.00000344;
       break;
   }
   return cost * Math.max(0, tokens);