diff --git a/src/apps/chat/components/composer/tokens/TokenTooltip.tsx b/src/apps/chat/components/composer/tokens/TokenTooltip.tsx index 63f066322..9a36b17f8 100644 --- a/src/apps/chat/components/composer/tokens/TokenTooltip.tsx +++ b/src/apps/chat/components/composer/tokens/TokenTooltip.tsx @@ -3,7 +3,7 @@ import * as React from 'react'; import type { SxProps } from '@mui/joy/styles/types'; import { Box, ColorPaletteProp, Tooltip } from '@mui/joy'; -import { DChatGeneratePricing, getLlmPriceForTokens } from '~/common/stores/llms/llms.pricing'; +import { DChatGeneratePricing, getLlmCostForTokens } from '~/common/stores/llms/llms.pricing'; import { adjustContentScaling, themeScalingMap } from '~/common/app.theme'; import { formatModelsCost } from '~/common/util/costUtils'; import { useUIContentScaling } from '~/common/state/store-ui'; @@ -43,8 +43,8 @@ export function tokenCountsMathAndMessage(tokenLimit: number | 0, directTokens: // add the price, if available if (chatPricing) { - const inputPrice = getLlmPriceForTokens(usedInputTokens, usedInputTokens, chatPricing.input); - const outputPrice = getLlmPriceForTokens(usedInputTokens, responseMaxTokens || 0, chatPricing.output); + const inputPrice = getLlmCostForTokens(usedInputTokens, usedInputTokens, chatPricing.input); + const outputPrice = getLlmCostForTokens(usedInputTokens, responseMaxTokens || 0, chatPricing.output); costMin = inputPrice; const costOutMax = outputPrice; diff --git a/src/common/stores/llms/llms.pricing.ts b/src/common/stores/llms/llms.pricing.ts index c0fa625a2..db302f31c 100644 --- a/src/common/stores/llms/llms.pricing.ts +++ b/src/common/stores/llms/llms.pricing.ts @@ -9,21 +9,26 @@ export type DModelPricing = { chat?: DChatGeneratePricing, } +// NOTE: (!) keep this in sync with ChatGeneratePricing_schema (modules/llms/server/llm.server.types.ts) export type DChatGeneratePricing = { // unit: 'USD_Mtok', - input?: DTieredPrice; - output?: DTieredPrice; + input?: DTieredPricing; + output?: DTieredPricing; cache?: { cType: 'ant-bp'; - read: DTieredPrice; - write: DTieredPrice; + read: DTieredPricing; + write: DTieredPricing; duration: number; // seconds + } | { + cType: 'oai-apc'; + read: DTieredPricing; + // write: DTieredPricing; // Not needed, as it's automatic }; - // NOT in AixWire_API_ListModels.PriceChatGenerate_schema + // NOT in AixWire_API_ListModels.ChatGeneratePricing_schema _isFree?: boolean; // precomputed, so we avoid recalculating it } -type DTieredPrice = DPricePerMToken | DPriceUpTo[]; +type DTieredPricing = DPricePerMToken | DPriceUpTo[]; type DPriceUpTo = { upTo: number | null, @@ -35,46 +40,43 @@ type DPricePerMToken = number | 'free'; /// detect Free Pricing -export function isModelPriceFree(priceChatGenerate: DChatGeneratePricing): boolean { - if (!priceChatGenerate) return true; - return _isPriceFree(priceChatGenerate.input) && _isPriceFree(priceChatGenerate.output); +export function isModelPricingFree(pricingChatGenerate: DChatGeneratePricing): boolean { + if (!pricingChatGenerate) return true; + return _isPricingFree(pricingChatGenerate.input) && _isPricingFree(pricingChatGenerate.output); } -function _isPriceFree(price: DTieredPrice | undefined): boolean { - if (price === 'free') return true; - if (price === undefined) return false; - if (typeof price === 'number') return price === 0; - return price.every(tier => _isPricePerMTokenFree(tier.price)); -} - -function _isPricePerMTokenFree(price: DPricePerMToken): boolean { - return price === 'free' || price === 0; +function _isPricingFree(pricing: DTieredPricing | undefined): boolean { + if (pricing === 'free') return true; + if (pricing === undefined) return false; + if (typeof pricing === 'number') return pricing === 0; + return pricing.every(tier => tier.price === 'free' || tier.price === 0); } -/// Human readable price formatting +/// Human readable cost -export function getLlmPriceForTokens(inputTokens: number, tokens: number, pricing: DTieredPrice | undefined): number | undefined { +export function getLlmCostForTokens(inputTokens: number, tokens: number, pricing: DTieredPricing | undefined): number | undefined { if (!pricing) return undefined; if (pricing === 'free') return 0; + + // Cost = tokens * price / 1e6 if (typeof pricing === 'number') return tokens * pricing / 1e6; // Find the applicable tier based on input tokens const applicableTier = pricing.find(tier => tier.upTo === null || inputTokens <= tier.upTo); - - // This should not happen if the pricing is well-formed if (!applicableTier) { - console.log('[DEV] getPriceForTokens: No applicable tier found for input tokens', { inputTokens, pricing }); + console.log('[DEV] getLlmCostForTokens: No applicable tier found for input tokens', { inputTokens, pricing }); return undefined; } - // Apply the price of the found tier to all tokens + // Cost = tier pricing * tokens / 1e6 (or free) if (applicableTier.price === 'free') return 0; + // Note: apply the pricing of the found tier to all tokens return tokens * applicableTier.price / 1e6; } -// Compatibiltiy layer for pricing V2 -> V3 +// Compatibility layer for pricing V2 -> V3 interface Was_DModelPricingV2 { chatIn?: number @@ -92,7 +94,7 @@ export function portModelPricingV2toV3(llm: DLLM): void { V3.input = pretendIsV2.chatIn; if (pretendIsV2.chatOut) V3.output = pretendIsV2.chatOut; - V3._isFree = isModelPriceFree(V3); + V3._isFree = isModelPricingFree(V3); llm.pricing = { chat: V3 }; delete pretendIsV2.chatIn; delete pretendIsV2.chatOut; diff --git a/src/common/stores/llms/store-llms.ts b/src/common/stores/llms/store-llms.ts index 938650614..3d64ff5ac 100644 --- a/src/common/stores/llms/store-llms.ts +++ b/src/common/stores/llms/store-llms.ts @@ -10,7 +10,7 @@ import type { ModelVendorId } from '~/modules/llms/vendors/vendors.registry'; import type { DLLM, DLLMId } from './llms.types'; import type { DModelsService, DModelsServiceId } from './modelsservice.types'; -import { portModelPricingV2toV3 } from './llms.pricing'; +import { getLlmCostForTokens, portModelPricingV2toV3 } from './llms.pricing'; /// ModelsStore - a store for configured LLMs and configured services @@ -331,57 +331,52 @@ export function getLLMsDebugInfo() { function _heuristicUpdateSelectedLLMs(allLlms: DLLM[], chatLlmId: DLLMId | null, fastLlmId: DLLMId | null, funcLlmId: DLLMId | null) { - // the output of _groupLlmsByVendorRankedByElo - let grouped: ReturnType | null = null; + let grouped: GroupedVendorLLMs; function cachedGrouped() { if (!grouped) grouped = _groupLlmsByVendorRankedByElo(allLlms); return grouped; } - // the best llm + // default Chat: top vendor by Elo, top model if (!chatLlmId || !allLlms.find(llm => llm.id === chatLlmId)) { const vendors = cachedGrouped(); chatLlmId = vendors.length ? vendors[0].llmsByElo[0].id : null; } - // a fast llm (bottom elo of the top vendor ~~ not really a proxy, but not sure which heuristic to use here) - if (!fastLlmId && !allLlms.find(llm => llm.id === fastLlmId)) { + // default Fast: vendors by Elo, lowest cost (if available) + if (!fastLlmId || !allLlms.find(llm => llm.id === fastLlmId)) { const vendors = cachedGrouped(); - fastLlmId = vendors.length - ? vendors[0].llmsByElo.findLast(llm => llm.cbaElo)?.id // last with ELO - ?? vendors[0].llmsByElo[vendors[0].llmsByElo.length - 1].id ?? null // last - : null; + fastLlmId = _selectFastLlmID(vendors); } - // a func llm (same as chat for now, hoping the highest grade also has function calling) + // a func llm (same as chat for now) if (!funcLlmId || !allLlms.find(llm => llm.id === funcLlmId)) funcLlmId = chatLlmId; return { chatLLMId: chatLlmId, fastLLMId: fastLlmId, funcLLMId: funcLlmId }; } -function _groupLlmsByVendorRankedByElo(llms: DLLM[]): { vendorId: ModelVendorId, llmsByElo: { id: DLLMId, cbaElo: number | undefined }[] }[] { + +type BenchVendorLLMs = { vendorId: ModelVendorId, llmsByElo: { id: DLLMId, cbaElo: number | undefined, costRank: number | undefined }[] }; +type GroupedVendorLLMs = BenchVendorLLMs[]; + +function _groupLlmsByVendorRankedByElo(llms: DLLM[]): GroupedVendorLLMs { // group all LLMs by vendor const grouped = llms.reduce((acc, llm) => { if (llm.hidden) return acc; - const vendor = acc.find(v => v.vendorId === llm.vId); - if (!vendor) { - acc.push({ - vendorId: llm.vId, - llmsByElo: [{ - id: llm.id, - cbaElo: llm.benchmark?.cbaElo, - }], - }); - } else { - vendor.llmsByElo.push({ - id: llm.id, - cbaElo: llm.benchmark?.cbaElo, - }); - } + const group = acc.find(v => v.vendorId === llm.vId); + const eloCostItem = { + id: llm.id, + cbaElo: llm.benchmark?.cbaElo, + costRank: !llm.pricing ? undefined : _getLlmCostBenchmark(llm), + }; + if (!group) + acc.push({ vendorId: llm.vId, llmsByElo: [eloCostItem] }); + else + group.llmsByElo.push(eloCostItem); return acc; - }, [] as { vendorId: ModelVendorId, llmsByElo: { id: DLLMId, cbaElo: number | undefined }[] }[]); + }, [] as GroupedVendorLLMs); // sort each vendor's LLMs by elo, decreasing for (const vendor of grouped) @@ -391,3 +386,28 @@ function _groupLlmsByVendorRankedByElo(llms: DLLM[]): { vendorId: ModelVendorId, grouped.sort((a, b) => (b.llmsByElo[0].cbaElo ?? -1) - (a.llmsByElo[0].cbaElo ?? -1)); return grouped; } + +// Hypothetical cost benchmark for a model, based on total cost of 100k input tokens and 10k output tokens. +function _getLlmCostBenchmark(llm: DLLM): number | undefined { + if (!llm.pricing?.chat) return undefined; + const costIn = getLlmCostForTokens(100000, 100000, llm.pricing.chat.input); + const costOut = getLlmCostForTokens(100000, 10000, llm.pricing.chat.output); + return (costIn !== undefined && costOut !== undefined) ? costIn + costOut : undefined; +} + +// Selects the 'fast' llm +function _selectFastLlmID(vendors: GroupedVendorLLMs) { + if (!vendors.length) return null; + for (const vendor of vendors) { + const lowestCostLlm = vendor.llmsByElo.reduce((acc, llm) => { + if (!acc) + return llm; + if (!llm.costRank || !acc.costRank) + return acc; + return llm.costRank < acc.costRank ? llm : acc; + }, null as BenchVendorLLMs['llmsByElo'][number] | null); + if (lowestCostLlm) + return lowestCostLlm.id; + } + return null; +} diff --git a/src/common/stores/metrics/metrics.chatgenerate.ts b/src/common/stores/metrics/metrics.chatgenerate.ts index 3b5590343..65137891d 100644 --- a/src/common/stores/metrics/metrics.chatgenerate.ts +++ b/src/common/stores/metrics/metrics.chatgenerate.ts @@ -1,4 +1,4 @@ -import { DChatGeneratePricing, getLlmPriceForTokens, isModelPriceFree } from '~/common/stores/llms/llms.pricing'; +import { DChatGeneratePricing, getLlmCostForTokens, isModelPricingFree } from '~/common/stores/llms/llms.pricing'; /** * This is a stored type - IMPORTANT: do not break. @@ -105,7 +105,7 @@ export function computeChatGenerationCosts(metrics?: Readonly