Merge branch 'main' into 'main'

fix quota handling with new user schema

See merge request reanon/nonono!4
This commit is contained in:
reanon
2025-06-16 09:42:39 -08:00
5 changed files with 124 additions and 136 deletions
+14 -27
View File
@@ -134,13 +134,9 @@ router.post("/create-user", (req, res) => {
const expiresAt = Date.now() + data.temporaryUserDuration * 60 * 1000;
const tokenLimits = MODEL_FAMILIES.reduce((limits, modelFamily) => {
const quotaValue = data[`temporaryUserQuota_${modelFamily}`];
if (typeof quotaValue === 'number') {
limits[modelFamily] = { input: quotaValue, output: 0, legacy_total: quotaValue };
} else {
limits[modelFamily] = { input: 0, output: 0 };
}
limits[modelFamily] = typeof quotaValue === 'number' ? quotaValue : 0;
return limits;
}, {} as UserTokenCounts);
}, {} as any);
return { ...data, expiresAt, tokenLimits };
});
@@ -219,42 +215,33 @@ router.post("/import-users", upload.single("users"), (req, res) => {
user.tokenCounts = transformedTokenCounts;
}
// Also handle tokenLimits and tokenRefresh the same way
// Handle tokenLimits - should be flat numbers
if (user.tokenLimits) {
const transformedTokenLimits: any = {};
for (const [family, value] of Object.entries(user.tokenLimits)) {
if (typeof value === 'number') {
transformedTokenLimits[family] = {
input: 0,
output: 0,
legacy_total: value
};
// Already in correct format
transformedTokenLimits[family] = value;
} else if (typeof value === 'object' && value !== null) {
transformedTokenLimits[family] = {
input: (value as any).input || 0,
output: (value as any).output || 0,
legacy_total: (value as any).legacy_total
};
// Old format with input/output/legacy_total - sum them up
const val = value as any;
transformedTokenLimits[family] = (val.input ?? 0) + (val.output ?? 0) + (val.legacy_total ?? 0);
}
}
user.tokenLimits = transformedTokenLimits;
}
// Handle tokenRefresh - should be flat numbers
if (user.tokenRefresh) {
const transformedTokenRefresh: any = {};
for (const [family, value] of Object.entries(user.tokenRefresh)) {
if (typeof value === 'number') {
transformedTokenRefresh[family] = {
input: 0,
output: 0,
legacy_total: value
};
// Already in correct format
transformedTokenRefresh[family] = value;
} else if (typeof value === 'object' && value !== null) {
transformedTokenRefresh[family] = {
input: (value as any).input || 0,
output: (value as any).output || 0,
legacy_total: (value as any).legacy_total
};
// Old format with input/output/legacy_total - sum them up
const val = value as any;
transformedTokenRefresh[family] = (val.input ?? 0) + (val.output ?? 0) + (val.legacy_total ?? 0);
}
}
user.tokenRefresh = transformedTokenRefresh;
+11 -5
View File
@@ -18,13 +18,19 @@
</li>
<li>
<code>tokenCounts</code> (optional): the number of tokens the user has
consumed. This should be an object with keys <code>turbo</code>,
<code>gpt4</code>, and <code>claude</code>.
consumed. This should be an object with model family keys (e.g. <code>turbo</code>,
<code>gpt4</code>, <code>claude</code>), each containing an object with
<code>input</code> and <code>output</code> token counts.
</li>
<li>
<code>tokenLimits</code> (optional): the number of tokens the user can
consume. This should be an object with keys <code>turbo</code>,
<code>gpt4</code>, and <code>claude</code>.
<code>tokenLimits</code> (optional): the maximum number of tokens the user can
consume. This should be an object with model family keys (e.g. <code>turbo</code>,
<code>gpt4</code>, <code>claude</code>), each containing a single number
representing the total token quota.
</li>
<li>
<code>tokenRefresh</code> (optional): the amount of tokens to refresh when quotas
are reset. Same format as <code>tokenLimits</code>.
</li>
<li>
<code>createdAt</code> (optional): the timestamp when the user was created
+17 -4
View File
@@ -2,8 +2,7 @@ import { ZodType, z } from "zod";
import { MODEL_FAMILIES, ModelFamily } from "../models";
import { makeOptionalPropsNullable } from "../utils";
// This just dynamically creates a Zod object type with a key for each model
// family and an optional number value for input and output tokens.
// Schema for token counts - keeps track of input/output usage
export const tokenCountsSchema: ZodType<UserTokenCounts> = z.object(
MODEL_FAMILIES.reduce(
(acc, family) => ({
@@ -21,6 +20,17 @@ export const tokenCountsSchema: ZodType<UserTokenCounts> = z.object(
)
);
// Schema for token limits - simple numbers representing total quota
export const tokenLimitsSchema: ZodType<UserTokenLimits> = z.object(
MODEL_FAMILIES.reduce(
(acc, family) => ({
...acc,
[family]: z.number().optional().default(0),
}),
{} as Record<ModelFamily, ZodType<number>>
)
);
export const UserSchema = z
.object({
/** User's personal access token. */
@@ -46,9 +56,9 @@ export const UserSchema = z
/** Number of input and output tokens the user has consumed, by model family. */
tokenCounts: tokenCountsSchema,
/** Maximum number of tokens the user can consume, by model family. */
tokenLimits: tokenCountsSchema,
tokenLimits: tokenLimitsSchema,
/** User-specific token refresh amount, by model family. */
tokenRefresh: tokenCountsSchema,
tokenRefresh: tokenLimitsSchema,
/** Time at which the user was created. */
createdAt: z.number(),
/** Time at which the user last connected. */
@@ -79,5 +89,8 @@ export const UserPartialSchema = makeOptionalPropsNullable(UserSchema)
export type UserTokenCounts = {
[K in ModelFamily]: { input: number; output: number; legacy_total?: number } | undefined;
};
export type UserTokenLimits = {
[K in ModelFamily]: number | undefined;
};
export type User = z.infer<typeof UserSchema>;
export type UserUpdate = z.infer<typeof UserPartialSchema>;
+71 -90
View File
@@ -28,7 +28,7 @@ import {
ModelFamily,
} from "../models";
import { assertNever } from "../utils";
import { User, UserTokenCounts, UserUpdate } from "./schema";
import { User, UserTokenCounts, UserTokenLimits, UserUpdate } from "./schema";
const log = logger.child({ module: "users" });
@@ -73,6 +73,32 @@ const migrateTokenCountsProperty = (
return result;
};
// Migration function for tokenLimits/tokenRefresh to flat numbers
const migrateTokenLimitsProperty = (
parsedProperty: any, // Data from DB
defaultConfigForProperty: Record<ModelFamily, number | undefined> // e.g., config.tokenQuota
): UserTokenLimits => {
const result = {} as UserTokenLimits;
for (const family of MODEL_FAMILIES) {
const dbValue = parsedProperty?.[family];
const configValue = defaultConfigForProperty[family];
if (typeof dbValue === 'number') {
// Already in correct format
result[family] = dbValue;
} else if (typeof dbValue === 'object' && dbValue !== null) {
// Old format with input/output/legacy_total - sum them up
const total = (dbValue.input ?? 0) + (dbValue.output ?? 0) + (dbValue.legacy_total ?? 0);
result[family] = total > 0 ? total : (configValue ?? 0);
} else {
// Missing or invalid - use config default
result[family] = configValue ?? 0;
}
}
return result;
};
const users: Map<string, User> = new Map();
const usersToFlush = new Set<string>();
let quotaRefreshJob: schedule.Job | null = null;
@@ -120,14 +146,15 @@ export function createUser(createOptions?: {
ip: [],
type: "normal",
promptCount: 0,
tokenCounts: { ...INITIAL_TOKENS }, // New counts don't have legacy_total
tokenCounts: { ...INITIAL_TOKENS },
tokenLimits: createOptions?.tokenLimits ?? MODEL_FAMILIES.reduce((acc, family) => {
const quota = config.tokenQuota[family];
// If quota is a number, it's a legacy total limit, store it as such
acc[family] = typeof quota === 'number' ? { input: quota, output: 0, legacy_total: quota } : (quota || { input: 0, output: 0 });
acc[family] = config.tokenQuota[family] ?? 0;
return acc;
}, {} as UserTokenCounts),
tokenRefresh: createOptions?.tokenRefresh ?? { ...INITIAL_TOKENS }, // Refresh amounts typically start fresh
}, {} as UserTokenLimits),
tokenRefresh: createOptions?.tokenRefresh ?? MODEL_FAMILIES.reduce((acc, family) => {
acc[family] = config.tokenQuota[family] ?? 0;
return acc;
}, {} as UserTokenLimits),
createdAt: Date.now(),
meta: {},
};
@@ -170,14 +197,15 @@ export function upsertUser(user: UserUpdate) {
ip: [],
type: "normal",
promptCount: 0,
tokenCounts: { ...INITIAL_TOKENS }, // New counts don't have legacy_total
tokenCounts: { ...INITIAL_TOKENS },
tokenLimits: MODEL_FAMILIES.reduce((acc, family) => {
const quota = config.tokenQuota[family];
// If quota is a number, it's a legacy total limit, store it as such
acc[family] = typeof quota === 'number' ? { input: quota, output: 0, legacy_total: quota } : (quota || { input: 0, output: 0 });
acc[family] = config.tokenQuota[family] ?? 0;
return acc;
}, {} as UserTokenCounts),
tokenRefresh: { ...INITIAL_TOKENS }, // Refresh amounts typically start fresh
}, {} as UserTokenLimits),
tokenRefresh: MODEL_FAMILIES.reduce((acc, family) => {
acc[family] = config.tokenQuota[family] ?? 0;
return acc;
}, {} as UserTokenLimits),
createdAt: Date.now(),
meta: {},
};
@@ -207,28 +235,13 @@ export function upsertUser(user: UserUpdate) {
}
if (updates.tokenLimits) {
for (const family of MODEL_FAMILIES) {
updates.tokenLimits[family] ??= { input: 0, output: 0 };
// The property is now guaranteed to be an object, so the 'number' check is removed.
// Defaulting individual fields if they are missing.
const limits = updates.tokenLimits[family]!; // Should not be undefined here
limits.input ??= 0;
limits.output ??= 0;
// legacy_total is optional and not defaulted here if missing
updates.tokenLimits[family] ??= 0;
}
}
// tokenRefresh is a special case where we want to merge the existing and
// updated values for each model family, ignoring falsy values.
if (updates.tokenRefresh) {
const merged = { ...existing.tokenRefresh } as UserTokenCounts;
for (const family of MODEL_FAMILIES) {
const updateRefresh = updates.tokenRefresh[family];
const existingRefresh = existing.tokenRefresh[family];
merged[family] = {
input: (updateRefresh?.input || existingRefresh?.input) ?? 0,
output: (updateRefresh?.output || existingRefresh?.output) ?? 0,
};
updates.tokenRefresh[family] ??= 0;
}
updates.tokenRefresh = merged;
}
users.set(user.token, Object.assign(existing, updates));
@@ -322,74 +335,42 @@ export function hasAvailableQuota({
const modelFamily = getModelFamilyForQuotaUsage(model, api);
const { tokenCounts, tokenLimits } = user;
const limitConfig = tokenLimits[modelFamily];
const currentUsage = tokenCounts[modelFamily] ?? { input: 0, output: 0 };
// If no specific limit object for the family, or if it's essentially unlimited (e.g. input/output are 0 or not set)
// fall back to checking config.tokenQuota which is a number (total limit).
if (!limitConfig || (limitConfig.input === 0 && limitConfig.output === 0 && !config.tokenQuota[modelFamily])) {
return true; // No effective limit
}
// Calculate total tokens consumed so far (including legacy)
const totalConsumed = (currentUsage.input ?? 0) + (currentUsage.output ?? 0) + (currentUsage.legacy_total ?? 0);
let effectiveLimit: number;
if (limitConfig && (limitConfig.input > 0 || limitConfig.output > 0)) {
// If a specific limit object exists and has positive values, sum them.
// This assumes the limit is a total limit. If input/output are separate, this logic needs change.
effectiveLimit = (limitConfig.input ?? Number.MAX_SAFE_INTEGER) + (limitConfig.output ?? Number.MAX_SAFE_INTEGER);
} else {
// Fallback to general numeric quota from config if specific limitObj is not effectively set.
const generalQuota = config.tokenQuota[modelFamily];
if (typeof generalQuota === 'number' && generalQuota > 0) {
effectiveLimit = generalQuota;
} else {
return true; // No limit defined
}
}
// Get the quota limit as a single number
const limit = tokenLimits[modelFamily] ?? config.tokenQuota[modelFamily] ?? 0;
// If no limit (0 or undefined), quota is unlimited
if (!limit || limit === 0) return true;
// Assuming 'requested' is for input tokens. If 'requested' can be input or output,
// this needs to be an object {input: number, output: number}.
// For now, we sum current input & output and add 'requested' to input for checking.
// This is a simplification. A more robust solution would involve 'requested' being an object.
const totalConsumed = (currentUsage.input ?? 0) + (currentUsage.output ?? 0) + requested;
return totalConsumed < effectiveLimit;
// Check if the request would exceed the limit
// 'requested' is already the sum of input and output tokens from the middleware
return (totalConsumed + requested) <= limit;
}
/**
* For the given user, sets token limits for each model family to the sum of the
* current count and the refresh amount, up to the default limit. If a quota is
* not specified for a model family, it is not touched.
* For the given user, refreshes token limits for each model family. The new limit
* is set to the configured quota value (either from user's tokenRefresh or global config).
* This replaces the current limits entirely, not adding to them.
*/
export function refreshQuota(token: string) {
const user = users.get(token);
if (!user) return;
const { tokenQuota } = config;
const { tokenCounts, tokenLimits, tokenRefresh } = user;
const { tokenLimits, tokenRefresh } = user;
for (const family of MODEL_FAMILIES) {
const currentUsage = tokenCounts[family] ?? { input: 0, output: 0 };
const userRefreshConfig = tokenRefresh[family] ?? { input: 0, output: 0 };
const globalDefaultQuotaValue = config.tokenQuota[family]; // This is a number or undefined
// Get the quota value to set (from user refresh config or global default)
const userQuota = tokenRefresh[family] ?? 0;
const globalQuota = config.tokenQuota[family] ?? 0;
const quotaToSet = userQuota || globalQuota;
let refreshInputAmount = 0;
let refreshOutputAmount = 0;
// Prioritize user-specific refresh amounts if they are positive
if (userRefreshConfig.input > 0 || userRefreshConfig.output > 0) {
refreshInputAmount = userRefreshConfig.input;
refreshOutputAmount = userRefreshConfig.output;
} else if (typeof globalDefaultQuotaValue === 'number' && globalDefaultQuotaValue > 0) {
// If no user-specific refresh, use the global quota.
// Distribute the global quota. For simplicity, add to input, or define a rule.
// Here, let's assume the global quota is a total that primarily refreshes 'input'.
refreshInputAmount = globalDefaultQuotaValue;
refreshOutputAmount = 0; // Or some portion of globalDefaultQuotaValue
}
if (refreshInputAmount > 0 || refreshOutputAmount > 0) {
tokenLimits[family] = {
input: (currentUsage.input ?? 0) + refreshInputAmount,
output: (currentUsage.output ?? 0) + refreshOutputAmount,
};
// Only update if we have a valid quota
if (quotaToSet > 0) {
tokenLimits[family] = quotaToSet;
}
}
usersToFlush.add(token);
@@ -510,8 +491,8 @@ async function initFirebase() {
createdAt: rawUser.createdAt || Date.now(),
// Migrate token fields
tokenCounts: migrateTokenCountsProperty(rawUser.tokenCounts, INITIAL_TOKENS),
tokenLimits: migrateTokenCountsProperty(rawUser.tokenLimits, config.tokenQuota),
tokenRefresh: migrateTokenCountsProperty(rawUser.tokenRefresh, INITIAL_TOKENS),
tokenLimits: migrateTokenLimitsProperty(rawUser.tokenLimits, config.tokenQuota),
tokenRefresh: migrateTokenLimitsProperty(rawUser.tokenRefresh, config.tokenQuota),
meta: rawUser.meta || {},
};
// Use the internal map directly to avoid re-triggering upsertUser's default creations
@@ -569,8 +550,8 @@ async function loadUsersFromSQLite() { // Added
type: row.type,
promptCount: row.promptCount,
tokenCounts: migrateTokenCountsProperty(rawTokenCounts, INITIAL_TOKENS),
tokenLimits: migrateTokenCountsProperty(rawTokenLimits, config.tokenQuota),
tokenRefresh: migrateTokenCountsProperty(rawTokenRefresh, INITIAL_TOKENS),
tokenLimits: migrateTokenLimitsProperty(rawTokenLimits, config.tokenQuota),
tokenRefresh: migrateTokenLimitsProperty(rawTokenRefresh, config.tokenQuota),
createdAt: row.createdAt,
lastUsedAt: row.lastUsedAt,
disabledAt: row.disabledAt,
@@ -626,8 +607,8 @@ async function flushUsersToSQLite() { // Added
type: user.type,
promptCount: user.promptCount,
tokenCounts: JSON.stringify(user.tokenCounts || INITIAL_TOKENS),
tokenLimits: JSON.stringify(user.tokenLimits || migrateTokenCountsProperty(null, config.tokenQuota)),
tokenRefresh: JSON.stringify(user.tokenRefresh || INITIAL_TOKENS),
tokenLimits: JSON.stringify(user.tokenLimits || migrateTokenLimitsProperty(null, config.tokenQuota)),
tokenRefresh: JSON.stringify(user.tokenRefresh || migrateTokenLimitsProperty(null, config.tokenQuota)),
createdAt: user.createdAt,
lastUsedAt: user.lastUsedAt ?? null,
disabledAt: user.disabledAt ?? null,
+11 -10
View File
@@ -25,17 +25,18 @@ const quotaTableId = Math.random().toString(36).slice(2);
<% Object.entries(quota).forEach(([key, configLimit]) => { %>
<%
const counts = user.tokenCounts[key] || { input: 0, output: 0 };
const limits = user.tokenLimits[key] || { input: 0, output: 0 }; // Default if not set
const refresh = user.tokenRefresh[key] || { input: 0, output: 0 };
const limits = user.tokenLimits[key] ?? 0; // Now a flat number
const refresh = user.tokenRefresh[key] ?? 0; // Now a flat number
const usageInput = Number(counts.input) || 0;
const usageOutput = Number(counts.output) || 0;
const usageLegacy = Number(counts.legacy_total) || 0;
const displayUsage = usageInput + usageOutput || usageLegacy; // This is for total token display, not directly for cost calculation here
// Total usage is the sum of all: legacy (historical) + current input + current output
const totalUsage = usageInput + usageOutput + usageLegacy;
const displayUsage = totalUsage; // This is for total token display, not directly for cost calculation here
const limitInput = Number(limits.input) || 0;
// If limit was from legacy config.tokenQuota (a number), it's in limits.legacy_total or limits.input
const displayLimit = limitInput || Number(limits.legacy_total) || 0;
// Limits are now flat numbers
const displayLimit = Number(limits) || 0;
// Determine tokens to use for cost calculation
const costInputTokens = (usageInput + usageOutput > 0) ? usageInput : usageLegacy;
@@ -45,16 +46,16 @@ const quotaTableId = Math.random().toString(36).slice(2);
let remaining = 0;
let limitIsSet = false;
if (displayLimit > 0) {
remaining = displayLimit - (usageInput + usageOutput);
remaining = displayLimit - totalUsage;
limitIsSet = true;
} else if (typeof configLimit === 'number' && configLimit > 0) {
// Fallback to global config limit if user-specific limit is 0 or not set meaningfully
remaining = configLimit - (usageInput + usageOutput);
remaining = configLimit - totalUsage;
limitIsSet = true;
}
const refreshDisplayValue = (Number(refresh.input) || 0) + (Number(refresh.output) || 0) || configLimit || 0;
// Refresh is now a flat number
const refreshDisplayValue = Number(refresh) || configLimit || 0;
%>
<tr>
<th scope="row"><%- key %></th>