diff --git a/src/admin/web/manage.ts b/src/admin/web/manage.ts index 1117501..a0101c2 100644 --- a/src/admin/web/manage.ts +++ b/src/admin/web/manage.ts @@ -134,13 +134,9 @@ router.post("/create-user", (req, res) => { const expiresAt = Date.now() + data.temporaryUserDuration * 60 * 1000; const tokenLimits = MODEL_FAMILIES.reduce((limits, modelFamily) => { const quotaValue = data[`temporaryUserQuota_${modelFamily}`]; - if (typeof quotaValue === 'number') { - limits[modelFamily] = { input: quotaValue, output: 0, legacy_total: quotaValue }; - } else { - limits[modelFamily] = { input: 0, output: 0 }; - } + limits[modelFamily] = typeof quotaValue === 'number' ? quotaValue : 0; return limits; - }, {} as UserTokenCounts); + }, {} as any); return { ...data, expiresAt, tokenLimits }; }); @@ -219,42 +215,33 @@ router.post("/import-users", upload.single("users"), (req, res) => { user.tokenCounts = transformedTokenCounts; } - // Also handle tokenLimits and tokenRefresh the same way + // Handle tokenLimits - should be flat numbers if (user.tokenLimits) { const transformedTokenLimits: any = {}; for (const [family, value] of Object.entries(user.tokenLimits)) { if (typeof value === 'number') { - transformedTokenLimits[family] = { - input: 0, - output: 0, - legacy_total: value - }; + // Already in correct format + transformedTokenLimits[family] = value; } else if (typeof value === 'object' && value !== null) { - transformedTokenLimits[family] = { - input: (value as any).input || 0, - output: (value as any).output || 0, - legacy_total: (value as any).legacy_total - }; + // Old format with input/output/legacy_total - sum them up + const val = value as any; + transformedTokenLimits[family] = (val.input ?? 0) + (val.output ?? 0) + (val.legacy_total ?? 0); } } user.tokenLimits = transformedTokenLimits; } + // Handle tokenRefresh - should be flat numbers if (user.tokenRefresh) { const transformedTokenRefresh: any = {}; for (const [family, value] of Object.entries(user.tokenRefresh)) { if (typeof value === 'number') { - transformedTokenRefresh[family] = { - input: 0, - output: 0, - legacy_total: value - }; + // Already in correct format + transformedTokenRefresh[family] = value; } else if (typeof value === 'object' && value !== null) { - transformedTokenRefresh[family] = { - input: (value as any).input || 0, - output: (value as any).output || 0, - legacy_total: (value as any).legacy_total - }; + // Old format with input/output/legacy_total - sum them up + const val = value as any; + transformedTokenRefresh[family] = (val.input ?? 0) + (val.output ?? 0) + (val.legacy_total ?? 0); } } user.tokenRefresh = transformedTokenRefresh; diff --git a/src/admin/web/views/admin_import-users.ejs b/src/admin/web/views/admin_import-users.ejs index b54a510..5c159b6 100644 --- a/src/admin/web/views/admin_import-users.ejs +++ b/src/admin/web/views/admin_import-users.ejs @@ -18,13 +18,19 @@
  • tokenCounts (optional): the number of tokens the user has - consumed. This should be an object with keys turbo, - gpt4, and claude. + consumed. This should be an object with model family keys (e.g. turbo, + gpt4, claude), each containing an object with + input and output token counts.
  • - tokenLimits (optional): the number of tokens the user can - consume. This should be an object with keys turbo, - gpt4, and claude. + tokenLimits (optional): the maximum number of tokens the user can + consume. This should be an object with model family keys (e.g. turbo, + gpt4, claude), each containing a single number + representing the total token quota. +
  • +
  • + tokenRefresh (optional): the amount of tokens to refresh when quotas + are reset. Same format as tokenLimits.
  • createdAt (optional): the timestamp when the user was created diff --git a/src/shared/users/schema.ts b/src/shared/users/schema.ts index 9572460..d525db0 100644 --- a/src/shared/users/schema.ts +++ b/src/shared/users/schema.ts @@ -2,8 +2,7 @@ import { ZodType, z } from "zod"; import { MODEL_FAMILIES, ModelFamily } from "../models"; import { makeOptionalPropsNullable } from "../utils"; -// This just dynamically creates a Zod object type with a key for each model -// family and an optional number value for input and output tokens. +// Schema for token counts - keeps track of input/output usage export const tokenCountsSchema: ZodType = z.object( MODEL_FAMILIES.reduce( (acc, family) => ({ @@ -21,6 +20,17 @@ export const tokenCountsSchema: ZodType = z.object( ) ); +// Schema for token limits - simple numbers representing total quota +export const tokenLimitsSchema: ZodType = z.object( + MODEL_FAMILIES.reduce( + (acc, family) => ({ + ...acc, + [family]: z.number().optional().default(0), + }), + {} as Record> + ) +); + export const UserSchema = z .object({ /** User's personal access token. */ @@ -46,9 +56,9 @@ export const UserSchema = z /** Number of input and output tokens the user has consumed, by model family. */ tokenCounts: tokenCountsSchema, /** Maximum number of tokens the user can consume, by model family. */ - tokenLimits: tokenCountsSchema, + tokenLimits: tokenLimitsSchema, /** User-specific token refresh amount, by model family. */ - tokenRefresh: tokenCountsSchema, + tokenRefresh: tokenLimitsSchema, /** Time at which the user was created. */ createdAt: z.number(), /** Time at which the user last connected. */ @@ -79,5 +89,8 @@ export const UserPartialSchema = makeOptionalPropsNullable(UserSchema) export type UserTokenCounts = { [K in ModelFamily]: { input: number; output: number; legacy_total?: number } | undefined; }; +export type UserTokenLimits = { + [K in ModelFamily]: number | undefined; +}; export type User = z.infer; export type UserUpdate = z.infer; diff --git a/src/shared/users/user-store.ts b/src/shared/users/user-store.ts index 4f646dd..3856892 100644 --- a/src/shared/users/user-store.ts +++ b/src/shared/users/user-store.ts @@ -28,7 +28,7 @@ import { ModelFamily, } from "../models"; import { assertNever } from "../utils"; -import { User, UserTokenCounts, UserUpdate } from "./schema"; +import { User, UserTokenCounts, UserTokenLimits, UserUpdate } from "./schema"; const log = logger.child({ module: "users" }); @@ -73,6 +73,32 @@ const migrateTokenCountsProperty = ( return result; }; +// Migration function for tokenLimits/tokenRefresh to flat numbers +const migrateTokenLimitsProperty = ( + parsedProperty: any, // Data from DB + defaultConfigForProperty: Record // e.g., config.tokenQuota +): UserTokenLimits => { + const result = {} as UserTokenLimits; + + for (const family of MODEL_FAMILIES) { + const dbValue = parsedProperty?.[family]; + const configValue = defaultConfigForProperty[family]; + + if (typeof dbValue === 'number') { + // Already in correct format + result[family] = dbValue; + } else if (typeof dbValue === 'object' && dbValue !== null) { + // Old format with input/output/legacy_total - sum them up + const total = (dbValue.input ?? 0) + (dbValue.output ?? 0) + (dbValue.legacy_total ?? 0); + result[family] = total > 0 ? total : (configValue ?? 0); + } else { + // Missing or invalid - use config default + result[family] = configValue ?? 0; + } + } + return result; +}; + const users: Map = new Map(); const usersToFlush = new Set(); let quotaRefreshJob: schedule.Job | null = null; @@ -120,14 +146,15 @@ export function createUser(createOptions?: { ip: [], type: "normal", promptCount: 0, - tokenCounts: { ...INITIAL_TOKENS }, // New counts don't have legacy_total + tokenCounts: { ...INITIAL_TOKENS }, tokenLimits: createOptions?.tokenLimits ?? MODEL_FAMILIES.reduce((acc, family) => { - const quota = config.tokenQuota[family]; - // If quota is a number, it's a legacy total limit, store it as such - acc[family] = typeof quota === 'number' ? { input: quota, output: 0, legacy_total: quota } : (quota || { input: 0, output: 0 }); + acc[family] = config.tokenQuota[family] ?? 0; return acc; - }, {} as UserTokenCounts), - tokenRefresh: createOptions?.tokenRefresh ?? { ...INITIAL_TOKENS }, // Refresh amounts typically start fresh + }, {} as UserTokenLimits), + tokenRefresh: createOptions?.tokenRefresh ?? MODEL_FAMILIES.reduce((acc, family) => { + acc[family] = config.tokenQuota[family] ?? 0; + return acc; + }, {} as UserTokenLimits), createdAt: Date.now(), meta: {}, }; @@ -170,14 +197,15 @@ export function upsertUser(user: UserUpdate) { ip: [], type: "normal", promptCount: 0, - tokenCounts: { ...INITIAL_TOKENS }, // New counts don't have legacy_total + tokenCounts: { ...INITIAL_TOKENS }, tokenLimits: MODEL_FAMILIES.reduce((acc, family) => { - const quota = config.tokenQuota[family]; - // If quota is a number, it's a legacy total limit, store it as such - acc[family] = typeof quota === 'number' ? { input: quota, output: 0, legacy_total: quota } : (quota || { input: 0, output: 0 }); + acc[family] = config.tokenQuota[family] ?? 0; return acc; - }, {} as UserTokenCounts), - tokenRefresh: { ...INITIAL_TOKENS }, // Refresh amounts typically start fresh + }, {} as UserTokenLimits), + tokenRefresh: MODEL_FAMILIES.reduce((acc, family) => { + acc[family] = config.tokenQuota[family] ?? 0; + return acc; + }, {} as UserTokenLimits), createdAt: Date.now(), meta: {}, }; @@ -207,28 +235,13 @@ export function upsertUser(user: UserUpdate) { } if (updates.tokenLimits) { for (const family of MODEL_FAMILIES) { - updates.tokenLimits[family] ??= { input: 0, output: 0 }; - // The property is now guaranteed to be an object, so the 'number' check is removed. - // Defaulting individual fields if they are missing. - const limits = updates.tokenLimits[family]!; // Should not be undefined here - limits.input ??= 0; - limits.output ??= 0; - // legacy_total is optional and not defaulted here if missing + updates.tokenLimits[family] ??= 0; } } - // tokenRefresh is a special case where we want to merge the existing and - // updated values for each model family, ignoring falsy values. if (updates.tokenRefresh) { - const merged = { ...existing.tokenRefresh } as UserTokenCounts; for (const family of MODEL_FAMILIES) { - const updateRefresh = updates.tokenRefresh[family]; - const existingRefresh = existing.tokenRefresh[family]; - merged[family] = { - input: (updateRefresh?.input || existingRefresh?.input) ?? 0, - output: (updateRefresh?.output || existingRefresh?.output) ?? 0, - }; + updates.tokenRefresh[family] ??= 0; } - updates.tokenRefresh = merged; } users.set(user.token, Object.assign(existing, updates)); @@ -322,74 +335,42 @@ export function hasAvailableQuota({ const modelFamily = getModelFamilyForQuotaUsage(model, api); const { tokenCounts, tokenLimits } = user; - const limitConfig = tokenLimits[modelFamily]; const currentUsage = tokenCounts[modelFamily] ?? { input: 0, output: 0 }; - // If no specific limit object for the family, or if it's essentially unlimited (e.g. input/output are 0 or not set) - // fall back to checking config.tokenQuota which is a number (total limit). - if (!limitConfig || (limitConfig.input === 0 && limitConfig.output === 0 && !config.tokenQuota[modelFamily])) { - return true; // No effective limit - } + // Calculate total tokens consumed so far (including legacy) + const totalConsumed = (currentUsage.input ?? 0) + (currentUsage.output ?? 0) + (currentUsage.legacy_total ?? 0); - let effectiveLimit: number; - if (limitConfig && (limitConfig.input > 0 || limitConfig.output > 0)) { - // If a specific limit object exists and has positive values, sum them. - // This assumes the limit is a total limit. If input/output are separate, this logic needs change. - effectiveLimit = (limitConfig.input ?? Number.MAX_SAFE_INTEGER) + (limitConfig.output ?? Number.MAX_SAFE_INTEGER); - } else { - // Fallback to general numeric quota from config if specific limitObj is not effectively set. - const generalQuota = config.tokenQuota[modelFamily]; - if (typeof generalQuota === 'number' && generalQuota > 0) { - effectiveLimit = generalQuota; - } else { - return true; // No limit defined - } - } + // Get the quota limit as a single number + const limit = tokenLimits[modelFamily] ?? config.tokenQuota[modelFamily] ?? 0; + + // If no limit (0 or undefined), quota is unlimited + if (!limit || limit === 0) return true; - // Assuming 'requested' is for input tokens. If 'requested' can be input or output, - // this needs to be an object {input: number, output: number}. - // For now, we sum current input & output and add 'requested' to input for checking. - // This is a simplification. A more robust solution would involve 'requested' being an object. - const totalConsumed = (currentUsage.input ?? 0) + (currentUsage.output ?? 0) + requested; - return totalConsumed < effectiveLimit; + // Check if the request would exceed the limit + // 'requested' is already the sum of input and output tokens from the middleware + return (totalConsumed + requested) <= limit; } /** - * For the given user, sets token limits for each model family to the sum of the - * current count and the refresh amount, up to the default limit. If a quota is - * not specified for a model family, it is not touched. + * For the given user, refreshes token limits for each model family. The new limit + * is set to the configured quota value (either from user's tokenRefresh or global config). + * This replaces the current limits entirely, not adding to them. */ export function refreshQuota(token: string) { const user = users.get(token); if (!user) return; - const { tokenQuota } = config; - const { tokenCounts, tokenLimits, tokenRefresh } = user; + const { tokenLimits, tokenRefresh } = user; for (const family of MODEL_FAMILIES) { - const currentUsage = tokenCounts[family] ?? { input: 0, output: 0 }; - const userRefreshConfig = tokenRefresh[family] ?? { input: 0, output: 0 }; - const globalDefaultQuotaValue = config.tokenQuota[family]; // This is a number or undefined + // Get the quota value to set (from user refresh config or global default) + const userQuota = tokenRefresh[family] ?? 0; + const globalQuota = config.tokenQuota[family] ?? 0; + + const quotaToSet = userQuota || globalQuota; - let refreshInputAmount = 0; - let refreshOutputAmount = 0; - - // Prioritize user-specific refresh amounts if they are positive - if (userRefreshConfig.input > 0 || userRefreshConfig.output > 0) { - refreshInputAmount = userRefreshConfig.input; - refreshOutputAmount = userRefreshConfig.output; - } else if (typeof globalDefaultQuotaValue === 'number' && globalDefaultQuotaValue > 0) { - // If no user-specific refresh, use the global quota. - // Distribute the global quota. For simplicity, add to input, or define a rule. - // Here, let's assume the global quota is a total that primarily refreshes 'input'. - refreshInputAmount = globalDefaultQuotaValue; - refreshOutputAmount = 0; // Or some portion of globalDefaultQuotaValue - } - - if (refreshInputAmount > 0 || refreshOutputAmount > 0) { - tokenLimits[family] = { - input: (currentUsage.input ?? 0) + refreshInputAmount, - output: (currentUsage.output ?? 0) + refreshOutputAmount, - }; + // Only update if we have a valid quota + if (quotaToSet > 0) { + tokenLimits[family] = quotaToSet; } } usersToFlush.add(token); @@ -510,8 +491,8 @@ async function initFirebase() { createdAt: rawUser.createdAt || Date.now(), // Migrate token fields tokenCounts: migrateTokenCountsProperty(rawUser.tokenCounts, INITIAL_TOKENS), - tokenLimits: migrateTokenCountsProperty(rawUser.tokenLimits, config.tokenQuota), - tokenRefresh: migrateTokenCountsProperty(rawUser.tokenRefresh, INITIAL_TOKENS), + tokenLimits: migrateTokenLimitsProperty(rawUser.tokenLimits, config.tokenQuota), + tokenRefresh: migrateTokenLimitsProperty(rawUser.tokenRefresh, config.tokenQuota), meta: rawUser.meta || {}, }; // Use the internal map directly to avoid re-triggering upsertUser's default creations @@ -569,8 +550,8 @@ async function loadUsersFromSQLite() { // Added type: row.type, promptCount: row.promptCount, tokenCounts: migrateTokenCountsProperty(rawTokenCounts, INITIAL_TOKENS), - tokenLimits: migrateTokenCountsProperty(rawTokenLimits, config.tokenQuota), - tokenRefresh: migrateTokenCountsProperty(rawTokenRefresh, INITIAL_TOKENS), + tokenLimits: migrateTokenLimitsProperty(rawTokenLimits, config.tokenQuota), + tokenRefresh: migrateTokenLimitsProperty(rawTokenRefresh, config.tokenQuota), createdAt: row.createdAt, lastUsedAt: row.lastUsedAt, disabledAt: row.disabledAt, @@ -626,8 +607,8 @@ async function flushUsersToSQLite() { // Added type: user.type, promptCount: user.promptCount, tokenCounts: JSON.stringify(user.tokenCounts || INITIAL_TOKENS), - tokenLimits: JSON.stringify(user.tokenLimits || migrateTokenCountsProperty(null, config.tokenQuota)), - tokenRefresh: JSON.stringify(user.tokenRefresh || INITIAL_TOKENS), + tokenLimits: JSON.stringify(user.tokenLimits || migrateTokenLimitsProperty(null, config.tokenQuota)), + tokenRefresh: JSON.stringify(user.tokenRefresh || migrateTokenLimitsProperty(null, config.tokenQuota)), createdAt: user.createdAt, lastUsedAt: user.lastUsedAt ?? null, disabledAt: user.disabledAt ?? null, diff --git a/src/shared/views/partials/shared_quota-info.ejs b/src/shared/views/partials/shared_quota-info.ejs index c33198e..7294426 100644 --- a/src/shared/views/partials/shared_quota-info.ejs +++ b/src/shared/views/partials/shared_quota-info.ejs @@ -25,17 +25,18 @@ const quotaTableId = Math.random().toString(36).slice(2); <% Object.entries(quota).forEach(([key, configLimit]) => { %> <% const counts = user.tokenCounts[key] || { input: 0, output: 0 }; - const limits = user.tokenLimits[key] || { input: 0, output: 0 }; // Default if not set - const refresh = user.tokenRefresh[key] || { input: 0, output: 0 }; + const limits = user.tokenLimits[key] ?? 0; // Now a flat number + const refresh = user.tokenRefresh[key] ?? 0; // Now a flat number const usageInput = Number(counts.input) || 0; const usageOutput = Number(counts.output) || 0; const usageLegacy = Number(counts.legacy_total) || 0; - const displayUsage = usageInput + usageOutput || usageLegacy; // This is for total token display, not directly for cost calculation here + // Total usage is the sum of all: legacy (historical) + current input + current output + const totalUsage = usageInput + usageOutput + usageLegacy; + const displayUsage = totalUsage; // This is for total token display, not directly for cost calculation here - const limitInput = Number(limits.input) || 0; - // If limit was from legacy config.tokenQuota (a number), it's in limits.legacy_total or limits.input - const displayLimit = limitInput || Number(limits.legacy_total) || 0; + // Limits are now flat numbers + const displayLimit = Number(limits) || 0; // Determine tokens to use for cost calculation const costInputTokens = (usageInput + usageOutput > 0) ? usageInput : usageLegacy; @@ -45,16 +46,16 @@ const quotaTableId = Math.random().toString(36).slice(2); let remaining = 0; let limitIsSet = false; if (displayLimit > 0) { - remaining = displayLimit - (usageInput + usageOutput); + remaining = displayLimit - totalUsage; limitIsSet = true; } else if (typeof configLimit === 'number' && configLimit > 0) { // Fallback to global config limit if user-specific limit is 0 or not set meaningfully - remaining = configLimit - (usageInput + usageOutput); + remaining = configLimit - totalUsage; limitIsSet = true; } - - const refreshDisplayValue = (Number(refresh.input) || 0) + (Number(refresh.output) || 0) || configLimit || 0; + // Refresh is now a flat number + const refreshDisplayValue = Number(refresh) || configLimit || 0; %> <%- key %>