Merge branch 'main' into 'main'

fix quota handling with new user schema See merge request reanon/nonono!4
2025-06-16 09:42:39 -08:00
parent ec82599e24 aec3927c94
commit 2405be71c1
5 changed files with 124 additions and 136 deletions
@@ -134,13 +134,9 @@ router.post("/create-user", (req, res) => {
      const expiresAt = Date.now() + data.temporaryUserDuration * 60 * 1000;
      const tokenLimits = MODEL_FAMILIES.reduce((limits, modelFamily) => {
        const quotaValue = data[`temporaryUserQuota_${modelFamily}`];
-        if (typeof quotaValue === 'number') {
-          limits[modelFamily] = { input: quotaValue, output: 0, legacy_total: quotaValue };
-        } else {
-          limits[modelFamily] = { input: 0, output: 0 };
-        }
+        limits[modelFamily] = typeof quotaValue === 'number' ? quotaValue : 0;
        return limits;
-      }, {} as UserTokenCounts);
+      }, {} as any);
      return { ...data, expiresAt, tokenLimits };
    });

@@ -219,42 +215,33 @@ router.post("/import-users", upload.single("users"), (req, res) => {
      user.tokenCounts = transformedTokenCounts;
    }
    
-    // Also handle tokenLimits and tokenRefresh the same way
+    // Handle tokenLimits - should be flat numbers
    if (user.tokenLimits) {
      const transformedTokenLimits: any = {};
      for (const [family, value] of Object.entries(user.tokenLimits)) {
        if (typeof value === 'number') {
-          transformedTokenLimits[family] = {
-            input: 0,
-            output: 0,
-            legacy_total: value
-          };
+          // Already in correct format
+          transformedTokenLimits[family] = value;
        } else if (typeof value === 'object' && value !== null) {
-          transformedTokenLimits[family] = {
-            input: (value as any).input || 0,
-            output: (value as any).output || 0,
-            legacy_total: (value as any).legacy_total
-          };
+          // Old format with input/output/legacy_total - sum them up
+          const val = value as any;
+          transformedTokenLimits[family] = (val.input ?? 0) + (val.output ?? 0) + (val.legacy_total ?? 0);
        }
      }
      user.tokenLimits = transformedTokenLimits;
    }
    
+    // Handle tokenRefresh - should be flat numbers
    if (user.tokenRefresh) {
      const transformedTokenRefresh: any = {};
      for (const [family, value] of Object.entries(user.tokenRefresh)) {
        if (typeof value === 'number') {
-          transformedTokenRefresh[family] = {
-            input: 0,
-            output: 0,
-            legacy_total: value
-          };
+          // Already in correct format
+          transformedTokenRefresh[family] = value;
        } else if (typeof value === 'object' && value !== null) {
-          transformedTokenRefresh[family] = {
-            input: (value as any).input || 0,
-            output: (value as any).output || 0,
-            legacy_total: (value as any).legacy_total
-          };
+          // Old format with input/output/legacy_total - sum them up
+          const val = value as any;
+          transformedTokenRefresh[family] = (val.input ?? 0) + (val.output ?? 0) + (val.legacy_total ?? 0);
        }
      }
      user.tokenRefresh = transformedTokenRefresh;
@@ -18,13 +18,19 @@
  </li>
  <li>
    <code>tokenCounts</code> (optional): the number of tokens the user has
-    consumed. This should be an object with keys <code>turbo</code>,
-    <code>gpt4</code>, and <code>claude</code>.
+    consumed. This should be an object with model family keys (e.g. <code>turbo</code>,
+    <code>gpt4</code>, <code>claude</code>), each containing an object with 
+    <code>input</code> and <code>output</code> token counts.
  </li>
  <li>
-    <code>tokenLimits</code> (optional): the number of tokens the user can
-    consume. This should be an object with keys <code>turbo</code>,
-    <code>gpt4</code>, and <code>claude</code>.
+    <code>tokenLimits</code> (optional): the maximum number of tokens the user can
+    consume. This should be an object with model family keys (e.g. <code>turbo</code>,
+    <code>gpt4</code>, <code>claude</code>), each containing a single number 
+    representing the total token quota.
+  </li>
+  <li>
+    <code>tokenRefresh</code> (optional): the amount of tokens to refresh when quotas 
+    are reset. Same format as <code>tokenLimits</code>.
  </li>
  <li>
    <code>createdAt</code> (optional): the timestamp when the user was created
@@ -2,8 +2,7 @@ import { ZodType, z } from "zod";
 import { MODEL_FAMILIES, ModelFamily } from "../models";
 import { makeOptionalPropsNullable } from "../utils";

-// This just dynamically creates a Zod object type with a key for each model
-// family and an optional number value for input and output tokens.
+// Schema for token counts - keeps track of input/output usage
 export const tokenCountsSchema: ZodType<UserTokenCounts> = z.object(
  MODEL_FAMILIES.reduce(
    (acc, family) => ({
@@ -21,6 +20,17 @@ export const tokenCountsSchema: ZodType<UserTokenCounts> = z.object(
  )
 );

+// Schema for token limits - simple numbers representing total quota
+export const tokenLimitsSchema: ZodType<UserTokenLimits> = z.object(
+  MODEL_FAMILIES.reduce(
+    (acc, family) => ({
+      ...acc,
+      [family]: z.number().optional().default(0),
+    }),
+    {} as Record<ModelFamily, ZodType<number>>
+  )
+);
+
 export const UserSchema = z
  .object({
    /** User's personal access token. */
@@ -46,9 +56,9 @@ export const UserSchema = z
    /** Number of input and output tokens the user has consumed, by model family. */
    tokenCounts: tokenCountsSchema,
    /** Maximum number of tokens the user can consume, by model family. */
-    tokenLimits: tokenCountsSchema,
+    tokenLimits: tokenLimitsSchema,
    /** User-specific token refresh amount, by model family. */
-    tokenRefresh: tokenCountsSchema,
+    tokenRefresh: tokenLimitsSchema,
    /** Time at which the user was created. */
    createdAt: z.number(),
    /** Time at which the user last connected. */
@@ -79,5 +89,8 @@ export const UserPartialSchema = makeOptionalPropsNullable(UserSchema)
 export type UserTokenCounts = {
  [K in ModelFamily]: { input: number; output: number; legacy_total?: number } | undefined;
 };
+export type UserTokenLimits = {
+  [K in ModelFamily]: number | undefined;
+};
 export type User = z.infer<typeof UserSchema>;
 export type UserUpdate = z.infer<typeof UserPartialSchema>;
@@ -28,7 +28,7 @@ import {
  ModelFamily,
 } from "../models";
 import { assertNever } from "../utils";
-import { User, UserTokenCounts, UserUpdate } from "./schema";
+import { User, UserTokenCounts, UserTokenLimits, UserUpdate } from "./schema";

 const log = logger.child({ module: "users" });

@@ -73,6 +73,32 @@ const migrateTokenCountsProperty = (
  return result;
 };

+// Migration function for tokenLimits/tokenRefresh to flat numbers
+const migrateTokenLimitsProperty = (
+  parsedProperty: any, // Data from DB
+  defaultConfigForProperty: Record<ModelFamily, number | undefined> // e.g., config.tokenQuota
+): UserTokenLimits => {
+  const result = {} as UserTokenLimits;
+
+  for (const family of MODEL_FAMILIES) {
+    const dbValue = parsedProperty?.[family];
+    const configValue = defaultConfigForProperty[family];
+
+    if (typeof dbValue === 'number') {
+      // Already in correct format
+      result[family] = dbValue;
+    } else if (typeof dbValue === 'object' && dbValue !== null) {
+      // Old format with input/output/legacy_total - sum them up
+      const total = (dbValue.input ?? 0) + (dbValue.output ?? 0) + (dbValue.legacy_total ?? 0);
+      result[family] = total > 0 ? total : (configValue ?? 0);
+    } else {
+      // Missing or invalid - use config default
+      result[family] = configValue ?? 0;
+    }
+  }
+  return result;
+};
+
 const users: Map<string, User> = new Map();
 const usersToFlush = new Set<string>();
 let quotaRefreshJob: schedule.Job | null = null;
@@ -120,14 +146,15 @@ export function createUser(createOptions?: {
    ip: [],
    type: "normal",
    promptCount: 0,
-    tokenCounts: { ...INITIAL_TOKENS }, // New counts don't have legacy_total
+    tokenCounts: { ...INITIAL_TOKENS },
    tokenLimits: createOptions?.tokenLimits ?? MODEL_FAMILIES.reduce((acc, family) => {
-      const quota = config.tokenQuota[family];
-      // If quota is a number, it's a legacy total limit, store it as such
-      acc[family] = typeof quota === 'number' ? { input: quota, output: 0, legacy_total: quota } : (quota || { input: 0, output: 0 });
+      acc[family] = config.tokenQuota[family] ?? 0;
      return acc;
-    }, {} as UserTokenCounts),
-    tokenRefresh: createOptions?.tokenRefresh ?? { ...INITIAL_TOKENS }, // Refresh amounts typically start fresh
+    }, {} as UserTokenLimits),
+    tokenRefresh: createOptions?.tokenRefresh ?? MODEL_FAMILIES.reduce((acc, family) => {
+      acc[family] = config.tokenQuota[family] ?? 0;
+      return acc;
+    }, {} as UserTokenLimits),
    createdAt: Date.now(),
    meta: {},
  };
@@ -170,14 +197,15 @@ export function upsertUser(user: UserUpdate) {
    ip: [],
    type: "normal",
    promptCount: 0,
-    tokenCounts: { ...INITIAL_TOKENS }, // New counts don't have legacy_total
+    tokenCounts: { ...INITIAL_TOKENS },
    tokenLimits: MODEL_FAMILIES.reduce((acc, family) => {
-      const quota = config.tokenQuota[family];
-      // If quota is a number, it's a legacy total limit, store it as such
-      acc[family] = typeof quota === 'number' ? { input: quota, output: 0, legacy_total: quota } : (quota || { input: 0, output: 0 });
+      acc[family] = config.tokenQuota[family] ?? 0;
      return acc;
-    }, {} as UserTokenCounts),
-    tokenRefresh: { ...INITIAL_TOKENS }, // Refresh amounts typically start fresh
+    }, {} as UserTokenLimits),
+    tokenRefresh: MODEL_FAMILIES.reduce((acc, family) => {
+      acc[family] = config.tokenQuota[family] ?? 0;
+      return acc;
+    }, {} as UserTokenLimits),
    createdAt: Date.now(),
    meta: {},
  };
@@ -207,28 +235,13 @@ export function upsertUser(user: UserUpdate) {
  }
  if (updates.tokenLimits) {
    for (const family of MODEL_FAMILIES) {
-      updates.tokenLimits[family] ??= { input: 0, output: 0 };
-      // The property is now guaranteed to be an object, so the 'number' check is removed.
-      // Defaulting individual fields if they are missing.
-      const limits = updates.tokenLimits[family]!; // Should not be undefined here
-      limits.input ??= 0;
-      limits.output ??= 0;
-      // legacy_total is optional and not defaulted here if missing
+      updates.tokenLimits[family] ??= 0;
    }
  }
-  // tokenRefresh is a special case where we want to merge the existing and
-  // updated values for each model family, ignoring falsy values.
  if (updates.tokenRefresh) {
-    const merged = { ...existing.tokenRefresh } as UserTokenCounts;
    for (const family of MODEL_FAMILIES) {
-      const updateRefresh = updates.tokenRefresh[family];
-      const existingRefresh = existing.tokenRefresh[family];
-      merged[family] = {
-        input: (updateRefresh?.input || existingRefresh?.input) ?? 0,
-        output: (updateRefresh?.output || existingRefresh?.output) ?? 0,
-      };
+      updates.tokenRefresh[family] ??= 0;
    }
-    updates.tokenRefresh = merged;
  }

  users.set(user.token, Object.assign(existing, updates));
@@ -322,74 +335,42 @@ export function hasAvailableQuota({

  const modelFamily = getModelFamilyForQuotaUsage(model, api);
  const { tokenCounts, tokenLimits } = user;
-  const limitConfig = tokenLimits[modelFamily];
  const currentUsage = tokenCounts[modelFamily] ?? { input: 0, output: 0 };

-  // If no specific limit object for the family, or if it's essentially unlimited (e.g. input/output are 0 or not set)
-  // fall back to checking config.tokenQuota which is a number (total limit).
-  if (!limitConfig || (limitConfig.input === 0 && limitConfig.output === 0 && !config.tokenQuota[modelFamily])) {
-    return true; // No effective limit
-  }
+  // Calculate total tokens consumed so far (including legacy)
+  const totalConsumed = (currentUsage.input ?? 0) + (currentUsage.output ?? 0) + (currentUsage.legacy_total ?? 0);

-  let effectiveLimit: number;
-  if (limitConfig && (limitConfig.input > 0 || limitConfig.output > 0)) {
-    // If a specific limit object exists and has positive values, sum them.
-    // This assumes the limit is a total limit. If input/output are separate, this logic needs change.
-    effectiveLimit = (limitConfig.input ?? Number.MAX_SAFE_INTEGER) + (limitConfig.output ?? Number.MAX_SAFE_INTEGER);
-  } else {
-    // Fallback to general numeric quota from config if specific limitObj is not effectively set.
-    const generalQuota = config.tokenQuota[modelFamily];
-    if (typeof generalQuota === 'number' && generalQuota > 0) {
-      effectiveLimit = generalQuota;
-    } else {
-      return true; // No limit defined
-    }
-  }
+  // Get the quota limit as a single number
+  const limit = tokenLimits[modelFamily] ?? config.tokenQuota[modelFamily] ?? 0;
+
+  // If no limit (0 or undefined), quota is unlimited
+  if (!limit || limit === 0) return true;
  
-  // Assuming 'requested' is for input tokens. If 'requested' can be input or output,
-  // this needs to be an object {input: number, output: number}.
-  // For now, we sum current input & output and add 'requested' to input for checking.
-  // This is a simplification. A more robust solution would involve 'requested' being an object.
-  const totalConsumed = (currentUsage.input ?? 0) + (currentUsage.output ?? 0) + requested;
-  return totalConsumed < effectiveLimit;
+  // Check if the request would exceed the limit
+  // 'requested' is already the sum of input and output tokens from the middleware
+  return (totalConsumed + requested) <= limit;
 }

 /**
- * For the given user, sets token limits for each model family to the sum of the
- * current count and the refresh amount, up to the default limit. If a quota is
- * not specified for a model family, it is not touched.
+ * For the given user, refreshes token limits for each model family. The new limit
+ * is set to the configured quota value (either from user's tokenRefresh or global config).
+ * This replaces the current limits entirely, not adding to them.
 */
 export function refreshQuota(token: string) {
  const user = users.get(token);
  if (!user) return;
-  const { tokenQuota } = config;
-  const { tokenCounts, tokenLimits, tokenRefresh } = user;
+  const { tokenLimits, tokenRefresh } = user;

  for (const family of MODEL_FAMILIES) {
-    const currentUsage = tokenCounts[family] ?? { input: 0, output: 0 };
-    const userRefreshConfig = tokenRefresh[family] ?? { input: 0, output: 0 };
-    const globalDefaultQuotaValue = config.tokenQuota[family]; // This is a number or undefined
+    // Get the quota value to set (from user refresh config or global default)
+    const userQuota = tokenRefresh[family] ?? 0;
+    const globalQuota = config.tokenQuota[family] ?? 0;
+    
+    const quotaToSet = userQuota || globalQuota;

-    let refreshInputAmount = 0;
-    let refreshOutputAmount = 0;
-
-    // Prioritize user-specific refresh amounts if they are positive
-    if (userRefreshConfig.input > 0 || userRefreshConfig.output > 0) {
-      refreshInputAmount = userRefreshConfig.input;
-      refreshOutputAmount = userRefreshConfig.output;
-    } else if (typeof globalDefaultQuotaValue === 'number' && globalDefaultQuotaValue > 0) {
-      // If no user-specific refresh, use the global quota.
-      // Distribute the global quota. For simplicity, add to input, or define a rule.
-      // Here, let's assume the global quota is a total that primarily refreshes 'input'.
-      refreshInputAmount = globalDefaultQuotaValue;
-      refreshOutputAmount = 0; // Or some portion of globalDefaultQuotaValue
-    }
-
-    if (refreshInputAmount > 0 || refreshOutputAmount > 0) {
-      tokenLimits[family] = {
-        input: (currentUsage.input ?? 0) + refreshInputAmount,
-        output: (currentUsage.output ?? 0) + refreshOutputAmount,
-      };
+    // Only update if we have a valid quota
+    if (quotaToSet > 0) {
+      tokenLimits[family] = quotaToSet;
    }
  }
  usersToFlush.add(token);
@@ -510,8 +491,8 @@ async function initFirebase() {
      createdAt: rawUser.createdAt || Date.now(),
      // Migrate token fields
      tokenCounts: migrateTokenCountsProperty(rawUser.tokenCounts, INITIAL_TOKENS),
-      tokenLimits: migrateTokenCountsProperty(rawUser.tokenLimits, config.tokenQuota),
-      tokenRefresh: migrateTokenCountsProperty(rawUser.tokenRefresh, INITIAL_TOKENS),
+      tokenLimits: migrateTokenLimitsProperty(rawUser.tokenLimits, config.tokenQuota),
+      tokenRefresh: migrateTokenLimitsProperty(rawUser.tokenRefresh, config.tokenQuota),
      meta: rawUser.meta || {},
    };
    // Use the internal map directly to avoid re-triggering upsertUser's default creations
@@ -569,8 +550,8 @@ async function loadUsersFromSQLite() { // Added
      type: row.type,
      promptCount: row.promptCount,
      tokenCounts: migrateTokenCountsProperty(rawTokenCounts, INITIAL_TOKENS),
-      tokenLimits: migrateTokenCountsProperty(rawTokenLimits, config.tokenQuota),
-      tokenRefresh: migrateTokenCountsProperty(rawTokenRefresh, INITIAL_TOKENS),
+      tokenLimits: migrateTokenLimitsProperty(rawTokenLimits, config.tokenQuota),
+      tokenRefresh: migrateTokenLimitsProperty(rawTokenRefresh, config.tokenQuota),
      createdAt: row.createdAt,
      lastUsedAt: row.lastUsedAt,
      disabledAt: row.disabledAt,
@@ -626,8 +607,8 @@ async function flushUsersToSQLite() { // Added
          type: user.type,
          promptCount: user.promptCount,
          tokenCounts: JSON.stringify(user.tokenCounts || INITIAL_TOKENS),
-          tokenLimits: JSON.stringify(user.tokenLimits || migrateTokenCountsProperty(null, config.tokenQuota)),
-          tokenRefresh: JSON.stringify(user.tokenRefresh || INITIAL_TOKENS),
+          tokenLimits: JSON.stringify(user.tokenLimits || migrateTokenLimitsProperty(null, config.tokenQuota)),
+          tokenRefresh: JSON.stringify(user.tokenRefresh || migrateTokenLimitsProperty(null, config.tokenQuota)),
          createdAt: user.createdAt,
          lastUsedAt: user.lastUsedAt ?? null,
          disabledAt: user.disabledAt ?? null,
@@ -25,17 +25,18 @@ const quotaTableId = Math.random().toString(36).slice(2);
    <% Object.entries(quota).forEach(([key, configLimit]) => { %>
    <% 
      const counts = user.tokenCounts[key] || { input: 0, output: 0 };
-      const limits = user.tokenLimits[key] || { input: 0, output: 0 }; // Default if not set
-      const refresh = user.tokenRefresh[key] || { input: 0, output: 0 };
+      const limits = user.tokenLimits[key] ?? 0; // Now a flat number
+      const refresh = user.tokenRefresh[key] ?? 0; // Now a flat number
      
      const usageInput = Number(counts.input) || 0;
      const usageOutput = Number(counts.output) || 0;
      const usageLegacy = Number(counts.legacy_total) || 0;
-      const displayUsage = usageInput + usageOutput || usageLegacy; // This is for total token display, not directly for cost calculation here
+      // Total usage is the sum of all: legacy (historical) + current input + current output
+      const totalUsage = usageInput + usageOutput + usageLegacy;
+      const displayUsage = totalUsage; // This is for total token display, not directly for cost calculation here

-      const limitInput = Number(limits.input) || 0;
-      // If limit was from legacy config.tokenQuota (a number), it's in limits.legacy_total or limits.input
-      const displayLimit = limitInput || Number(limits.legacy_total) || 0;
+      // Limits are now flat numbers
+      const displayLimit = Number(limits) || 0;
      
      // Determine tokens to use for cost calculation
      const costInputTokens = (usageInput + usageOutput > 0) ? usageInput : usageLegacy;
@@ -45,16 +46,16 @@ const quotaTableId = Math.random().toString(36).slice(2);
      let remaining = 0;
      let limitIsSet = false;
      if (displayLimit > 0) {
-        remaining = displayLimit - (usageInput + usageOutput);
+        remaining = displayLimit - totalUsage;
        limitIsSet = true;
      } else if (typeof configLimit === 'number' && configLimit > 0) {
        // Fallback to global config limit if user-specific limit is 0 or not set meaningfully
-        remaining = configLimit - (usageInput + usageOutput);
+        remaining = configLimit - totalUsage;
        limitIsSet = true;
      }

-
-      const refreshDisplayValue = (Number(refresh.input) || 0) + (Number(refresh.output) || 0) || configLimit || 0;
+      // Refresh is now a flat number
+      const refreshDisplayValue = Number(refresh) || configLimit || 0;
    %>
    <tr>
      <th scope="row"><%- key %></th>