diff --git a/src/admin/web/manage.ts b/src/admin/web/manage.ts
index 1117501..a0101c2 100644
--- a/src/admin/web/manage.ts
+++ b/src/admin/web/manage.ts
@@ -134,13 +134,9 @@ router.post("/create-user", (req, res) => {
       const expiresAt = Date.now() + data.temporaryUserDuration * 60 * 1000;
       const tokenLimits = MODEL_FAMILIES.reduce((limits, modelFamily) => {
         const quotaValue = data[`temporaryUserQuota_${modelFamily}`];
-        if (typeof quotaValue === 'number') {
-          limits[modelFamily] = { input: quotaValue, output: 0, legacy_total: quotaValue };
-        } else {
-          limits[modelFamily] = { input: 0, output: 0 };
-        }
+        limits[modelFamily] = typeof quotaValue === 'number' ? quotaValue : 0;
         return limits;
-      }, {} as UserTokenCounts);
+      }, {} as any);
       return { ...data, expiresAt, tokenLimits };
     });
 
@@ -219,42 +215,33 @@ router.post("/import-users", upload.single("users"), (req, res) => {
       user.tokenCounts = transformedTokenCounts;
     }
     
-    // Also handle tokenLimits and tokenRefresh the same way
+    // Handle tokenLimits - should be flat numbers
     if (user.tokenLimits) {
       const transformedTokenLimits: any = {};
       for (const [family, value] of Object.entries(user.tokenLimits)) {
         if (typeof value === 'number') {
-          transformedTokenLimits[family] = {
-            input: 0,
-            output: 0,
-            legacy_total: value
-          };
+          // Already in correct format
+          transformedTokenLimits[family] = value;
         } else if (typeof value === 'object' && value !== null) {
-          transformedTokenLimits[family] = {
-            input: (value as any).input || 0,
-            output: (value as any).output || 0,
-            legacy_total: (value as any).legacy_total
-          };
+          // Old format with input/output/legacy_total - sum them up
+          const val = value as any;
+          transformedTokenLimits[family] = (val.input ?? 0) + (val.output ?? 0) + (val.legacy_total ?? 0);
         }
       }
       user.tokenLimits = transformedTokenLimits;
     }
     
+    // Handle tokenRefresh - should be flat numbers
     if (user.tokenRefresh) {
       const transformedTokenRefresh: any = {};
       for (const [family, value] of Object.entries(user.tokenRefresh)) {
         if (typeof value === 'number') {
-          transformedTokenRefresh[family] = {
-            input: 0,
-            output: 0,
-            legacy_total: value
-          };
+          // Already in correct format
+          transformedTokenRefresh[family] = value;
         } else if (typeof value === 'object' && value !== null) {
-          transformedTokenRefresh[family] = {
-            input: (value as any).input || 0,
-            output: (value as any).output || 0,
-            legacy_total: (value as any).legacy_total
-          };
+          // Old format with input/output/legacy_total - sum them up
+          const val = value as any;
+          transformedTokenRefresh[family] = (val.input ?? 0) + (val.output ?? 0) + (val.legacy_total ?? 0);
         }
       }
       user.tokenRefresh = transformedTokenRefresh;
diff --git a/src/admin/web/views/admin_import-users.ejs b/src/admin/web/views/admin_import-users.ejs
index b54a510..5c159b6 100644
--- a/src/admin/web/views/admin_import-users.ejs
+++ b/src/admin/web/views/admin_import-users.ejs
@@ -18,13 +18,19 @@
   </li>
   <li>
     <code>tokenCounts</code> (optional): the number of tokens the user has
-    consumed. This should be an object with keys <code>turbo</code>,
-    <code>gpt4</code>, and <code>claude</code>.
+    consumed. This should be an object with model family keys (e.g. <code>turbo</code>,
+    <code>gpt4</code>, <code>claude</code>), each containing an object with 
+    <code>input</code> and <code>output</code> token counts.
   </li>
   <li>
-    <code>tokenLimits</code> (optional): the number of tokens the user can
-    consume. This should be an object with keys <code>turbo</code>,
-    <code>gpt4</code>, and <code>claude</code>.
+    <code>tokenLimits</code> (optional): the maximum number of tokens the user can
+    consume. This should be an object with model family keys (e.g. <code>turbo</code>,
+    <code>gpt4</code>, <code>claude</code>), each containing a single number 
+    representing the total token quota.
+  </li>
+  <li>
+    <code>tokenRefresh</code> (optional): the amount of tokens to refresh when quotas 
+    are reset. Same format as <code>tokenLimits</code>.
   </li>
   <li>
     <code>createdAt</code> (optional): the timestamp when the user was created
diff --git a/src/shared/users/schema.ts b/src/shared/users/schema.ts
index 9572460..d525db0 100644
--- a/src/shared/users/schema.ts
+++ b/src/shared/users/schema.ts
@@ -2,8 +2,7 @@ import { ZodType, z } from "zod";
 import { MODEL_FAMILIES, ModelFamily } from "../models";
 import { makeOptionalPropsNullable } from "../utils";
 
-// This just dynamically creates a Zod object type with a key for each model
-// family and an optional number value for input and output tokens.
+// Schema for token counts - keeps track of input/output usage
 export const tokenCountsSchema: ZodType<UserTokenCounts> = z.object(
   MODEL_FAMILIES.reduce(
     (acc, family) => ({
@@ -21,6 +20,17 @@ export const tokenCountsSchema: ZodType<UserTokenCounts> = z.object(
   )
 );
 
+// Schema for token limits - simple numbers representing total quota
+export const tokenLimitsSchema: ZodType<UserTokenLimits> = z.object(
+  MODEL_FAMILIES.reduce(
+    (acc, family) => ({
+      ...acc,
+      [family]: z.number().optional().default(0),
+    }),
+    {} as Record<ModelFamily, ZodType<number>>
+  )
+);
+
 export const UserSchema = z
   .object({
     /** User's personal access token. */
@@ -46,9 +56,9 @@ export const UserSchema = z
     /** Number of input and output tokens the user has consumed, by model family. */
     tokenCounts: tokenCountsSchema,
     /** Maximum number of tokens the user can consume, by model family. */
-    tokenLimits: tokenCountsSchema,
+    tokenLimits: tokenLimitsSchema,
     /** User-specific token refresh amount, by model family. */
-    tokenRefresh: tokenCountsSchema,
+    tokenRefresh: tokenLimitsSchema,
     /** Time at which the user was created. */
     createdAt: z.number(),
     /** Time at which the user last connected. */
@@ -79,5 +89,8 @@ export const UserPartialSchema = makeOptionalPropsNullable(UserSchema)
 export type UserTokenCounts = {
   [K in ModelFamily]: { input: number; output: number; legacy_total?: number } | undefined;
 };
+export type UserTokenLimits = {
+  [K in ModelFamily]: number | undefined;
+};
 export type User = z.infer<typeof UserSchema>;
 export type UserUpdate = z.infer<typeof UserPartialSchema>;
diff --git a/src/shared/users/user-store.ts b/src/shared/users/user-store.ts
index 4f646dd..3856892 100644
--- a/src/shared/users/user-store.ts
+++ b/src/shared/users/user-store.ts
@@ -28,7 +28,7 @@ import {
   ModelFamily,
 } from "../models";
 import { assertNever } from "../utils";
-import { User, UserTokenCounts, UserUpdate } from "./schema";
+import { User, UserTokenCounts, UserTokenLimits, UserUpdate } from "./schema";
 
 const log = logger.child({ module: "users" });
 
@@ -73,6 +73,32 @@ const migrateTokenCountsProperty = (
   return result;
 };
 
+// Migration function for tokenLimits/tokenRefresh to flat numbers
+const migrateTokenLimitsProperty = (
+  parsedProperty: any, // Data from DB
+  defaultConfigForProperty: Record<ModelFamily, number | undefined> // e.g., config.tokenQuota
+): UserTokenLimits => {
+  const result = {} as UserTokenLimits;
+
+  for (const family of MODEL_FAMILIES) {
+    const dbValue = parsedProperty?.[family];
+    const configValue = defaultConfigForProperty[family];
+
+    if (typeof dbValue === 'number') {
+      // Already in correct format
+      result[family] = dbValue;
+    } else if (typeof dbValue === 'object' && dbValue !== null) {
+      // Old format with input/output/legacy_total - sum them up
+      const total = (dbValue.input ?? 0) + (dbValue.output ?? 0) + (dbValue.legacy_total ?? 0);
+      result[family] = total > 0 ? total : (configValue ?? 0);
+    } else {
+      // Missing or invalid - use config default
+      result[family] = configValue ?? 0;
+    }
+  }
+  return result;
+};
+
 const users: Map<string, User> = new Map();
 const usersToFlush = new Set<string>();
 let quotaRefreshJob: schedule.Job | null = null;
@@ -120,14 +146,15 @@ export function createUser(createOptions?: {
     ip: [],
     type: "normal",
     promptCount: 0,
-    tokenCounts: { ...INITIAL_TOKENS }, // New counts don't have legacy_total
+    tokenCounts: { ...INITIAL_TOKENS },
     tokenLimits: createOptions?.tokenLimits ?? MODEL_FAMILIES.reduce((acc, family) => {
-      const quota = config.tokenQuota[family];
-      // If quota is a number, it's a legacy total limit, store it as such
-      acc[family] = typeof quota === 'number' ? { input: quota, output: 0, legacy_total: quota } : (quota || { input: 0, output: 0 });
+      acc[family] = config.tokenQuota[family] ?? 0;
       return acc;
-    }, {} as UserTokenCounts),
-    tokenRefresh: createOptions?.tokenRefresh ?? { ...INITIAL_TOKENS }, // Refresh amounts typically start fresh
+    }, {} as UserTokenLimits),
+    tokenRefresh: createOptions?.tokenRefresh ?? MODEL_FAMILIES.reduce((acc, family) => {
+      acc[family] = config.tokenQuota[family] ?? 0;
+      return acc;
+    }, {} as UserTokenLimits),
     createdAt: Date.now(),
     meta: {},
   };
@@ -170,14 +197,15 @@ export function upsertUser(user: UserUpdate) {
     ip: [],
     type: "normal",
     promptCount: 0,
-    tokenCounts: { ...INITIAL_TOKENS }, // New counts don't have legacy_total
+    tokenCounts: { ...INITIAL_TOKENS },
     tokenLimits: MODEL_FAMILIES.reduce((acc, family) => {
-      const quota = config.tokenQuota[family];
-      // If quota is a number, it's a legacy total limit, store it as such
-      acc[family] = typeof quota === 'number' ? { input: quota, output: 0, legacy_total: quota } : (quota || { input: 0, output: 0 });
+      acc[family] = config.tokenQuota[family] ?? 0;
       return acc;
-    }, {} as UserTokenCounts),
-    tokenRefresh: { ...INITIAL_TOKENS }, // Refresh amounts typically start fresh
+    }, {} as UserTokenLimits),
+    tokenRefresh: MODEL_FAMILIES.reduce((acc, family) => {
+      acc[family] = config.tokenQuota[family] ?? 0;
+      return acc;
+    }, {} as UserTokenLimits),
     createdAt: Date.now(),
     meta: {},
   };
@@ -207,28 +235,13 @@ export function upsertUser(user: UserUpdate) {
   }
   if (updates.tokenLimits) {
     for (const family of MODEL_FAMILIES) {
-      updates.tokenLimits[family] ??= { input: 0, output: 0 };
-      // The property is now guaranteed to be an object, so the 'number' check is removed.
-      // Defaulting individual fields if they are missing.
-      const limits = updates.tokenLimits[family]!; // Should not be undefined here
-      limits.input ??= 0;
-      limits.output ??= 0;
-      // legacy_total is optional and not defaulted here if missing
+      updates.tokenLimits[family] ??= 0;
     }
   }
-  // tokenRefresh is a special case where we want to merge the existing and
-  // updated values for each model family, ignoring falsy values.
   if (updates.tokenRefresh) {
-    const merged = { ...existing.tokenRefresh } as UserTokenCounts;
     for (const family of MODEL_FAMILIES) {
-      const updateRefresh = updates.tokenRefresh[family];
-      const existingRefresh = existing.tokenRefresh[family];
-      merged[family] = {
-        input: (updateRefresh?.input || existingRefresh?.input) ?? 0,
-        output: (updateRefresh?.output || existingRefresh?.output) ?? 0,
-      };
+      updates.tokenRefresh[family] ??= 0;
     }
-    updates.tokenRefresh = merged;
   }
 
   users.set(user.token, Object.assign(existing, updates));
@@ -322,74 +335,42 @@ export function hasAvailableQuota({
 
   const modelFamily = getModelFamilyForQuotaUsage(model, api);
   const { tokenCounts, tokenLimits } = user;
-  const limitConfig = tokenLimits[modelFamily];
   const currentUsage = tokenCounts[modelFamily] ?? { input: 0, output: 0 };
 
-  // If no specific limit object for the family, or if it's essentially unlimited (e.g. input/output are 0 or not set)
-  // fall back to checking config.tokenQuota which is a number (total limit).
-  if (!limitConfig || (limitConfig.input === 0 && limitConfig.output === 0 && !config.tokenQuota[modelFamily])) {
-    return true; // No effective limit
-  }
+  // Calculate total tokens consumed so far (including legacy)
+  const totalConsumed = (currentUsage.input ?? 0) + (currentUsage.output ?? 0) + (currentUsage.legacy_total ?? 0);
 
-  let effectiveLimit: number;
-  if (limitConfig && (limitConfig.input > 0 || limitConfig.output > 0)) {
-    // If a specific limit object exists and has positive values, sum them.
-    // This assumes the limit is a total limit. If input/output are separate, this logic needs change.
-    effectiveLimit = (limitConfig.input ?? Number.MAX_SAFE_INTEGER) + (limitConfig.output ?? Number.MAX_SAFE_INTEGER);
-  } else {
-    // Fallback to general numeric quota from config if specific limitObj is not effectively set.
-    const generalQuota = config.tokenQuota[modelFamily];
-    if (typeof generalQuota === 'number' && generalQuota > 0) {
-      effectiveLimit = generalQuota;
-    } else {
-      return true; // No limit defined
-    }
-  }
+  // Get the quota limit as a single number
+  const limit = tokenLimits[modelFamily] ?? config.tokenQuota[modelFamily] ?? 0;
+
+  // If no limit (0 or undefined), quota is unlimited
+  if (!limit || limit === 0) return true;
   
-  // Assuming 'requested' is for input tokens. If 'requested' can be input or output,
-  // this needs to be an object {input: number, output: number}.
-  // For now, we sum current input & output and add 'requested' to input for checking.
-  // This is a simplification. A more robust solution would involve 'requested' being an object.
-  const totalConsumed = (currentUsage.input ?? 0) + (currentUsage.output ?? 0) + requested;
-  return totalConsumed < effectiveLimit;
+  // Check if the request would exceed the limit
+  // 'requested' is already the sum of input and output tokens from the middleware
+  return (totalConsumed + requested) <= limit;
 }
 
 /**
- * For the given user, sets token limits for each model family to the sum of the
- * current count and the refresh amount, up to the default limit. If a quota is
- * not specified for a model family, it is not touched.
+ * For the given user, refreshes token limits for each model family. The new limit
+ * is set to the configured quota value (either from user's tokenRefresh or global config).
+ * This replaces the current limits entirely, not adding to them.
  */
 export function refreshQuota(token: string) {
   const user = users.get(token);
   if (!user) return;
-  const { tokenQuota } = config;
-  const { tokenCounts, tokenLimits, tokenRefresh } = user;
+  const { tokenLimits, tokenRefresh } = user;
 
   for (const family of MODEL_FAMILIES) {
-    const currentUsage = tokenCounts[family] ?? { input: 0, output: 0 };
-    const userRefreshConfig = tokenRefresh[family] ?? { input: 0, output: 0 };
-    const globalDefaultQuotaValue = config.tokenQuota[family]; // This is a number or undefined
+    // Get the quota value to set (from user refresh config or global default)
+    const userQuota = tokenRefresh[family] ?? 0;
+    const globalQuota = config.tokenQuota[family] ?? 0;
+    
+    const quotaToSet = userQuota || globalQuota;
 
-    let refreshInputAmount = 0;
-    let refreshOutputAmount = 0;
-
-    // Prioritize user-specific refresh amounts if they are positive
-    if (userRefreshConfig.input > 0 || userRefreshConfig.output > 0) {
-      refreshInputAmount = userRefreshConfig.input;
-      refreshOutputAmount = userRefreshConfig.output;
-    } else if (typeof globalDefaultQuotaValue === 'number' && globalDefaultQuotaValue > 0) {
-      // If no user-specific refresh, use the global quota.
-      // Distribute the global quota. For simplicity, add to input, or define a rule.
-      // Here, let's assume the global quota is a total that primarily refreshes 'input'.
-      refreshInputAmount = globalDefaultQuotaValue;
-      refreshOutputAmount = 0; // Or some portion of globalDefaultQuotaValue
-    }
-
-    if (refreshInputAmount > 0 || refreshOutputAmount > 0) {
-      tokenLimits[family] = {
-        input: (currentUsage.input ?? 0) + refreshInputAmount,
-        output: (currentUsage.output ?? 0) + refreshOutputAmount,
-      };
+    // Only update if we have a valid quota
+    if (quotaToSet > 0) {
+      tokenLimits[family] = quotaToSet;
     }
   }
   usersToFlush.add(token);
@@ -510,8 +491,8 @@ async function initFirebase() {
       createdAt: rawUser.createdAt || Date.now(),
       // Migrate token fields
       tokenCounts: migrateTokenCountsProperty(rawUser.tokenCounts, INITIAL_TOKENS),
-      tokenLimits: migrateTokenCountsProperty(rawUser.tokenLimits, config.tokenQuota),
-      tokenRefresh: migrateTokenCountsProperty(rawUser.tokenRefresh, INITIAL_TOKENS),
+      tokenLimits: migrateTokenLimitsProperty(rawUser.tokenLimits, config.tokenQuota),
+      tokenRefresh: migrateTokenLimitsProperty(rawUser.tokenRefresh, config.tokenQuota),
       meta: rawUser.meta || {},
     };
     // Use the internal map directly to avoid re-triggering upsertUser's default creations
@@ -569,8 +550,8 @@ async function loadUsersFromSQLite() { // Added
       type: row.type,
       promptCount: row.promptCount,
       tokenCounts: migrateTokenCountsProperty(rawTokenCounts, INITIAL_TOKENS),
-      tokenLimits: migrateTokenCountsProperty(rawTokenLimits, config.tokenQuota),
-      tokenRefresh: migrateTokenCountsProperty(rawTokenRefresh, INITIAL_TOKENS),
+      tokenLimits: migrateTokenLimitsProperty(rawTokenLimits, config.tokenQuota),
+      tokenRefresh: migrateTokenLimitsProperty(rawTokenRefresh, config.tokenQuota),
       createdAt: row.createdAt,
       lastUsedAt: row.lastUsedAt,
       disabledAt: row.disabledAt,
@@ -626,8 +607,8 @@ async function flushUsersToSQLite() { // Added
           type: user.type,
           promptCount: user.promptCount,
           tokenCounts: JSON.stringify(user.tokenCounts || INITIAL_TOKENS),
-          tokenLimits: JSON.stringify(user.tokenLimits || migrateTokenCountsProperty(null, config.tokenQuota)),
-          tokenRefresh: JSON.stringify(user.tokenRefresh || INITIAL_TOKENS),
+          tokenLimits: JSON.stringify(user.tokenLimits || migrateTokenLimitsProperty(null, config.tokenQuota)),
+          tokenRefresh: JSON.stringify(user.tokenRefresh || migrateTokenLimitsProperty(null, config.tokenQuota)),
           createdAt: user.createdAt,
           lastUsedAt: user.lastUsedAt ?? null,
           disabledAt: user.disabledAt ?? null,
diff --git a/src/shared/views/partials/shared_quota-info.ejs b/src/shared/views/partials/shared_quota-info.ejs
index c33198e..7294426 100644
--- a/src/shared/views/partials/shared_quota-info.ejs
+++ b/src/shared/views/partials/shared_quota-info.ejs
@@ -25,17 +25,18 @@ const quotaTableId = Math.random().toString(36).slice(2);
     <% Object.entries(quota).forEach(([key, configLimit]) => { %>
     <% 
       const counts = user.tokenCounts[key] || { input: 0, output: 0 };
-      const limits = user.tokenLimits[key] || { input: 0, output: 0 }; // Default if not set
-      const refresh = user.tokenRefresh[key] || { input: 0, output: 0 };
+      const limits = user.tokenLimits[key] ?? 0; // Now a flat number
+      const refresh = user.tokenRefresh[key] ?? 0; // Now a flat number
       
       const usageInput = Number(counts.input) || 0;
       const usageOutput = Number(counts.output) || 0;
       const usageLegacy = Number(counts.legacy_total) || 0;
-      const displayUsage = usageInput + usageOutput || usageLegacy; // This is for total token display, not directly for cost calculation here
+      // Total usage is the sum of all: legacy (historical) + current input + current output
+      const totalUsage = usageInput + usageOutput + usageLegacy;
+      const displayUsage = totalUsage; // This is for total token display, not directly for cost calculation here
 
-      const limitInput = Number(limits.input) || 0;
-      // If limit was from legacy config.tokenQuota (a number), it's in limits.legacy_total or limits.input
-      const displayLimit = limitInput || Number(limits.legacy_total) || 0;
+      // Limits are now flat numbers
+      const displayLimit = Number(limits) || 0;
       
       // Determine tokens to use for cost calculation
       const costInputTokens = (usageInput + usageOutput > 0) ? usageInput : usageLegacy;
@@ -45,16 +46,16 @@ const quotaTableId = Math.random().toString(36).slice(2);
       let remaining = 0;
       let limitIsSet = false;
       if (displayLimit > 0) {
-        remaining = displayLimit - (usageInput + usageOutput);
+        remaining = displayLimit - totalUsage;
         limitIsSet = true;
       } else if (typeof configLimit === 'number' && configLimit > 0) {
         // Fallback to global config limit if user-specific limit is 0 or not set meaningfully
-        remaining = configLimit - (usageInput + usageOutput);
+        remaining = configLimit - totalUsage;
         limitIsSet = true;
       }
 
-
-      const refreshDisplayValue = (Number(refresh.input) || 0) + (Number(refresh.output) || 0) || configLimit || 0;
+      // Refresh is now a flat number
+      const refreshDisplayValue = Number(refresh) || configLimit || 0;
     %>
     <tr>
       <th scope="row"><%- key %></th>