Non-linear pricing support.

This commit is contained in:
Enrico Ros
2024-08-17 03:26:59 -07:00
parent 77777da122
commit c6d4f2834e
5 changed files with 69 additions and 34 deletions
@@ -188,7 +188,7 @@ export function Composer(props: {
const tokensHistory = _historyTokenCount;
const tokensReponseMax = (props.chatLLM?.options as DOpenAILLMOptions /* FIXME: BIG ASSUMPTION */)?.llmResponseTokens || 0;
const tokenLimit = props.chatLLM?.contextTokens || 0;
const tokenPricing = props.chatLLM?.pricing?.chat;
const tokenChatPricing = props.chatLLM?.pricing?.chat;
// Effect: load initial text if queued up (e.g. by /link/share_targe)
@@ -699,11 +699,11 @@ export function Composer(props: {
}} />
{!showChatInReferenceTo && tokenLimit > 0 && (tokensComposer > 0 || (tokensHistory + tokensReponseMax) > 0) && (
<TokenProgressbarMemo direct={tokensComposer} history={tokensHistory} responseMax={tokensReponseMax} limit={tokenLimit} tokenPricing={tokenPricing} />
<TokenProgressbarMemo chatPricing={tokenChatPricing} direct={tokensComposer} history={tokensHistory} responseMax={tokensReponseMax} limit={tokenLimit} />
)}
{!showChatInReferenceTo && tokenLimit > 0 && (
<TokenBadgeMemo direct={tokensComposer} history={tokensHistory} responseMax={tokensReponseMax} limit={tokenLimit} tokenPricing={tokenPricing} showCost={labsShowCost} enableHover={!isMobile} showExcess absoluteBottomRight />
<TokenBadgeMemo chatPricing={tokenChatPricing} direct={tokensComposer} history={tokensHistory} responseMax={tokensReponseMax} limit={tokenLimit} showCost={labsShowCost} enableHover={!isMobile} showExcess absoluteBottomRight />
)}
</Box>
@@ -13,16 +13,13 @@ import { formatTokenCost, tokenCountsMathAndMessage, TokenTooltip } from './Toke
export const TokenBadgeMemo = React.memo(TokenBadge);
function TokenBadge(props: {
chatPricing?: DPriceChatGenerate,
direct: number,
history?: number,
responseMax?: number,
limit: number,
// FIXME: continue from here
tokenPricing?: DPriceChatGenerate,
tokenPriceIn?: number,
tokenPriceOut?: number,
enableHover?: boolean,
showCost?: boolean
showExcess?: boolean,
@@ -34,7 +31,7 @@ function TokenBadge(props: {
const [isHovering, setIsHovering] = React.useState(false);
const { message, color, remainingTokens, costMax, costMin } =
tokenCountsMathAndMessage(props.limit, props.direct, props.history, props.responseMax, props.tokenPriceIn, props.tokenPriceOut);
tokenCountsMathAndMessage(props.limit, props.direct, props.history, props.responseMax, props.chatPricing);
// handlers
@@ -15,15 +15,12 @@ import { tokenCountsMathAndMessage, TokenTooltip } from './TokenTooltip';
export const TokenProgressbarMemo = React.memo(TokenProgressbar);
function TokenProgressbar(props: {
chatPricing?: DPriceChatGenerate,
direct: number,
history: number,
responseMax: number,
limit: number,
// FIXME: continue from here
tokenPricing?: DPriceChatGenerate,
tokenPriceIn?: number,
tokenPriceOut?: number,
}) {
// external state
@@ -53,7 +50,7 @@ function TokenProgressbar(props: {
const overflowColor = theme.palette.danger.softColor;
// tooltip message/color
const { message, color } = tokenCountsMathAndMessage(props.limit, props.direct, props.history, props.responseMax, props.tokenPriceIn, props.tokenPriceOut);
const { message, color } = tokenCountsMathAndMessage(props.limit, props.direct, props.history, props.responseMax, props.chatPricing);
// sizes
const containerHeight = 8;
@@ -3,11 +3,13 @@ import * as React from 'react';
import type { SxProps } from '@mui/joy/styles/types';
import { Box, ColorPaletteProp, Tooltip } from '@mui/joy';
import type { DPriceChatGenerate } from '~/common/stores/llms/dllm.types';
import { adjustContentScaling, themeScalingMap } from '~/common/app.theme';
import { getPriceForTokens } from '~/common/stores/llms/llms.pricing';
import { useUIContentScaling } from '~/common/state/store-ui';
export function tokenCountsMathAndMessage(tokenLimit: number | 0, directTokens: number, historyTokens?: number, responseMaxTokens?: number, tokenPriceIn?: number, tokenPriceOut?: number): {
export function tokenCountsMathAndMessage(tokenLimit: number | 0, directTokens: number, historyTokens?: number, responseMaxTokens?: number, chatPricing?: DPriceChatGenerate): {
color: ColorPaletteProp,
message: string,
remainingTokens: number,
@@ -40,28 +42,43 @@ export function tokenCountsMathAndMessage(tokenLimit: number | 0, directTokens:
` - Max response: ${_alignRight(responseMaxTokens || 0)}`;
// add the price, if available
if (tokenPriceIn || tokenPriceOut) {
costMin = tokenPriceIn ? usedInputTokens * tokenPriceIn / 1E6 : undefined;
const costOutMax = (tokenPriceOut && responseMaxTokens) ? responseMaxTokens * tokenPriceOut / 1E6 : undefined;
if (costMin || costOutMax) {
if (chatPricing) {
const inputPrice = getPriceForTokens(usedInputTokens, usedInputTokens, chatPricing.input);
const outputPrice = getPriceForTokens(usedInputTokens, responseMaxTokens || 0, chatPricing.output);
costMin = inputPrice;
const costOutMax = outputPrice;
if (costMin !== undefined || costOutMax !== undefined) {
message += `\n\n\n▶ Chat Turn Cost (max, approximate)\n`;
if (costMin) message += '\n' +
` Input tokens: ${_alignRight(usedInputTokens)}\n` +
` Input Price $/M: ${tokenPriceIn!.toFixed(2).padStart(8)}\n` +
` Input cost: ${('$' + costMin!.toFixed(4)).padStart(8)}\n`;
if (costMin !== undefined) {
const inputPricePerM = costMin * 1e6 / usedInputTokens;
message += '\n' +
` Input tokens: ${_alignRight(usedInputTokens)}\n` +
` Input Price $/M: ${inputPricePerM.toFixed(2).padStart(8)}\n` +
` Input cost: ${('$' + costMin.toFixed(4)).padStart(8)}\n`;
}
if (costOutMax) message += '\n' +
` Max output tokens: ${_alignRight(responseMaxTokens!)}\n` +
` Output Price $/M: ${tokenPriceOut!.toFixed(2).padStart(8)}\n` +
` Max output cost: ${('$' + costOutMax!.toFixed(4)).padStart(8)}\n`;
if (costOutMax !== undefined) {
const outputPricePerM = costOutMax * 1e6 / (responseMaxTokens || 1);
message += '\n' +
` Max output tokens: ${_alignRight(responseMaxTokens!)}\n` +
` Output Price $/M: ${outputPricePerM.toFixed(2).padStart(8)}\n` +
` Max output cost: ${('$' + costOutMax.toFixed(4)).padStart(8)}\n`;
}
if (costMin) message += '\n' +
` > Min message cost: <span class="highlight-cost yellow">${formatTokenCost(costMin).padStart(8)}</span>`;
costMax = (costMin && costOutMax) ? costMin + costOutMax : undefined;
if (costMax) message += '\n' +
` < Max message cost: <span>${formatTokenCost(costMax).padStart(8)}</span>\n` +
' (depends on assistant response)';
if (costMin !== undefined) {
message += '\n' +
` > Min message cost: <span class="highlight-cost yellow">${formatTokenCost(costMin).padStart(8)}</span>`;
}
costMax = (costMin !== undefined && costOutMax !== undefined) ? costMin + costOutMax : undefined;
if (costMax !== undefined) {
message += '\n' +
` < Max message cost: <span>${formatTokenCost(costMax).padStart(8)}</span>\n` +
' (depends on assistant response)';
}
}
}
}
+24
View File
@@ -1,6 +1,8 @@
import type { DLLM, DPriceChatGenerate, DPricePerMToken, DTieredPrice } from './dllm.types';
/// detect Free Pricing
export function isModelPriceFree(priceChatGenerate: DPriceChatGenerate): boolean {
if (!priceChatGenerate) return true;
return _isPriceFree(priceChatGenerate.input) && _isPriceFree(priceChatGenerate.output);
@@ -19,6 +21,28 @@ function _isPricePerMTokenFree(price: DPricePerMToken): boolean {
}
/// Human readable price formatting
export function getPriceForTokens(inputTokens: number, tokens: number, pricing: DTieredPrice | undefined): number | undefined {
if (!pricing) return undefined;
if (pricing === 'free') return 0;
if (typeof pricing === 'number') return tokens * pricing / 1e6;
// Find the applicable tier based on input tokens
const applicableTier = pricing.find(tier => tier.upTo === null || inputTokens <= tier.upTo);
// This should not happen if the pricing is well-formed
if (!applicableTier) {
console.log('[DEV] getPriceForTokens: No applicable tier found for input tokens', { inputTokens, pricing });
return undefined;
}
// Apply the price of the found tier to all tokens
if (applicableTier.price === 'free') return 0;
return tokens * applicableTier.price / 1e6;
}
// Compatibiltiy layer for pricing V2 -> V3
interface Was_DModelPricingV2 {