OAI-Proxy/src/shared/stats.ts

import { config } from "../config";
import { ModelFamily } from "./models";

// Using weighted averages now for better guessing, thinking models use around 1:3 ratio for input:output
// for the thinking part, other models hover around 3:1 input output, still not the best, but reflects better to real proompting.
export function getTokenCostUsd(model: ModelFamily, tokens: number) {
  let cost = 0;
  switch (model) {
    case "deepseek":
      cost = 0.00000178;
      // uncached r1 pricing, again the highest average
      break;
    case "xai":
      cost = 0.000014;
      // just using the highest input/output price aka grok-3 (because who cares about grok)
      break;
    case "gpt41":
    case "azure-gpt41":
      cost = 0.0000075;
      // averaged the same wa* as 4.5
      break;
    case "gpt41-mini":
    case "azure-gpt41-mini":
      cost = 0.0000015;
      break;
    case "gpt41-nano":
    case "azure-gpt41-nano":
      cost = 0.0000003;
      break;
    case "gpt45":
    case "azure-gpt45":
      // $75/$150 for 1M input/output tokens pricing, averaged to $112
      cost = 0.00009375;
      break;
    case "gpt4o":
    case "azure-gpt4o":
      cost = 0.0000075;
      break;
    case "azure-gpt4-turbo":
    case "gpt4-turbo":
      cost = 0.0000125;
      break;
    case "azure-o1-pro":
    case "o1-pro":
      // OpenAI o1-pro pricing $150/1M input tokens and $600/1M output tokens
      cost = 0.0004875;
      break;
    case "azure-o1":
    case "o1":
      // Currently we do not track output tokens separately, and O1 uses
      // considerably more output tokens that other models for its hidden
      // reasoning. The official O1 pricing is $15/1M input tokens and $60/1M
      // output tokens so we will return a higher estimate here.
      cost = 0.00004875;
      break;
    case "azure-o1-mini":
    case "o1-mini":
    case "azure-o3-mini":
    case "o3-mini":
      cost = 0.000003575; // $1.1/1M input tokens, $4.4/1M output tokens
      break;
    case "azure-o3":
    case "o3":
      cost = 0.000032; // $10/1M input tokens, $40/1M output tokens
      break;
    case "azure-o4-mini":
    case "o4-mini":
      cost = 0.000003575; // $1.1/1M input tokens, $4.4/1M output tokens
      break;
    case "azure-gpt4-32k":
    case "gpt4-32k":
      cost = 0.000075;
      break;
    case "azure-gpt4":
    case "gpt4":
      cost = 0.0000375;
      break;
    case "azure-turbo":
    case "turbo":
      cost = 0.00000075;
      break;
    case "azure-dall-e":
    case "dall-e":
      cost = 0.00001;
      break;
    case "azure-gpt-image":
    case "gpt-image":
      // gpt-image-1 pricing:
      // Text input tokens: $5 per 1M tokens
      // Image input tokens: $10 per 1M tokens
      // Image output tokens: $40 per 1M tokens
      // Weighted average assuming a mix of text/image input and output
      // Typical cost is $0.02-$0.19 per image depending on quality
      cost = 0.000018; // Balanced estimate accounting for input/output mix
      break;
    case "aws-claude":
    case "gcp-claude":
    case "claude":
      cost = 0.00001;
      break;
    case "aws-claude-opus":
    case "gcp-claude-opus":
    case "claude-opus":
      cost = 0.00003;
      break;
    case "aws-mistral-tiny":
    case "mistral-tiny":
      cost = 0.0000003;
      break;
    case "aws-mistral-small":
    case "mistral-small":
      cost = 0.00000035;
      break;
    case "aws-mistral-medium":
    case "mistral-medium":
      cost = 0.000004;
      break;
    case "aws-mistral-large":
    case "mistral-large":
      cost = 0.000012;
      break;
    case "gemini-flash":
      cost = 0.0000002326;
      break;
    case "gemini-pro":
      cost = 0.00000344;
      break;
  }
  return cost * Math.max(0, tokens);
}

export function prettyTokens(tokens: number): string {
  const absTokens = Math.abs(tokens);
  if (absTokens < 1000) {
    return tokens.toString();
  } else if (absTokens < 1000000) {
    return (tokens / 1000).toFixed(1) + "k";
  } else if (absTokens < 1000000000) {
    return (tokens / 1000000).toFixed(2) + "m";
  } else {
    return (tokens / 1000000000).toFixed(3) + "b";
  }
}

export function getCostSuffix(cost: number) {
  if (!config.showTokenCosts) return "";
  return ` ($${cost.toFixed(2)})`;
}