Lets shoot the moon

2025-07-14 21:10:36 +02:00
parent 08400db220
commit bbd2b88503
16 changed files with 724 additions and 19 deletions
@@ -58,6 +58,10 @@ type Config = {
   * Comma-delimited list of Qwen API keys.
   */
  qwenKey?: string;
+  /**
+   * Comma-delimited list of Moonshot API keys.
+   */
+  moonshotKey?: string;

  /**
   * Comma-delimited list of AWS credentials. Each credential item should be a
@@ -464,6 +468,7 @@ export const config: Config = {
  deepseekKey: getEnvWithDefault("DEEPSEEK_KEY", ""),
  xaiKey: getEnvWithDefault("XAI_KEY", ""),
  cohereKey: getEnvWithDefault("COHERE_KEY", ""),
+  moonshotKey: getEnvWithDefault("MOONSHOT_KEY", ""),
  awsCredentials: getEnvWithDefault("AWS_CREDENTIALS", ""),
  gcpCredentials: getEnvWithDefault("GCP_CREDENTIALS", ""),
  azureCredentials: getEnvWithDefault("AZURE_CREDENTIALS", ""),
@@ -765,6 +770,7 @@ export const OMITTED_KEYS = [
  "xaiKey",
  "cohereKey",
  "qwenKey",
+  "moonshotKey",
  "mistralAIKey",
  "awsCredentials",
  "gcpCredentials",
@@ -31,6 +31,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  cohere: "Cohere",
  deepseek: "Deepseek",
  xai: "Grok",
+  moonshot: "Moonshot",
  turbo: "GPT-4o Mini / 3.5 Turbo",
  gpt4: "GPT-4",
  "gpt4-32k": "GPT-4 32k",
@@ -105,6 +105,9 @@ export const addKey: ProxyReqMutator = (manager) => {
    case "qwen":
      manager.setHeader("Authorization", `Bearer ${assignedKey.key}`);
      break;
+    case "moonshot":
+      manager.setHeader("Authorization", `Bearer ${assignedKey.key}`);
+      break;
    case "aws":
    case "gcp":
    case "google-ai":
@@ -125,23 +125,30 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 100000;
  } else if (model.match(/^deepseek/)) {
    modelMax = 64000;
+  } else if (model.match(/^kimi-k2/)) {
+    // Kimi K2 models have 131k context window
+    modelMax = 131000;
+  } else if (model.match(/moonshot/)) {
+    // Moonshot models typically have 200k context window
+    modelMax = 200000;
+  } else if (model.match(/command[\w-]*-03-202[0-9]/)) {
+    // Cohere's command-a-03 models have 256k context window
+    modelMax = 256000;
+  } else if (model.match(/command/) || model.match(/cohere/)) {
+    // Default for all other Cohere models
+    modelMax = 128000;
  } else if (model.match(/^grok-4/)) {
    modelMax = 256000;
  } else if (model.match(/^grok/)) {
    modelMax = 128000;
-  } else if (model.match(/command-a-03-202[0-9]/)) {
-    // Cohere's command-a-03 models have 256k context window
-    modelMax = 256000;
-  } else if (model.match(/command[\w-]*-03-202[0-9]/)) {
-    // Other Command models with -03- pattern (including r, r-plus) have 128k context window
-    modelMax = 128000;
-  } else if (model.match(/command/) || model.match(/cohere/)) {
-    // Default for all other Cohere models
-    modelMax = 128000;
  } else if (model.match(/^magistral/)) {
    modelMax = 40000;
  } else if (model.match(/^magistral/)) {
    modelMax = 40000;
+  } else if (model.match(/^moonshot/)) {
+    modelMax = 200000;
+  } else if (model.match(/^kimi-k2/)) {
+    modelMax = 131000;
  } else if (model.match(/tral/)) {
    // catches mistral, mixtral, codestral, mathstral, etc. mistral models have
    // no name convention and wildly different context windows so this is a
@@ -267,6 +267,9 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
      case "qwen":
        // No special handling yet
        break;
+      case "moonshot":
+        errorPayload.proxy_note = `The Moonshot API rejected the request. Check the error message for details.`;
+        break;
      default:
        assertNever(service);
    }
@@ -328,6 +331,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
        return;
      case "mistral-ai":
      case "gcp":
+      case "moonshot":
        keyPool.disable(req.key!, "revoked");
        errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`;
        return;
@@ -366,6 +370,9 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
          // Similar handling to OpenAI for rate limits
          await handleOpenAIRateLimitError(req, errorPayload);
          break;
+        case "moonshot":
+          await handleMoonshotRateLimitError(req, errorPayload);
+          break;
      default:
        assertNever(service as never);
    }
@@ -598,6 +605,39 @@ async function handleCohereRateLimitError(
  errorPayload.proxy_note = "Too many requests to the Cohere API. Please try again later.";
 }

+async function handleMoonshotRateLimitError(
+  req: Request,
+  errorPayload: ProxiedErrorPayload
+) {
+  // Mark the current key as rate limited
+  keyPool.markRateLimited(req.key!);
+  
+  // Store the original request attempt count or initialize it
+  req.retryCount = (req.retryCount || 0) + 1;
+  
+  // Only retry up to 3 times with different keys
+  if (req.retryCount <= 3) {
+    try {
+      // Add a small delay before retrying (2-6 seconds for Moonshot)
+      const delayMs = 2000 + Math.floor(Math.random() * 4000);
+      await new Promise(resolve => setTimeout(resolve, delayMs));
+      
+      // Re-enqueue the request to try with a different key
+      await reenqueueRequest(req);
+      req.log.info({ attempt: req.retryCount }, "Moonshot rate-limited request re-enqueued");
+      throw new RetryableError(`Moonshot rate-limited request re-enqueued (attempt ${req.retryCount}/3).`);
+    } catch (error) {
+      if (error instanceof RetryableError) {
+        throw error; // Rethrow RetryableError to continue the flow
+      }
+      req.log.error({ error }, "Failed to re-enqueue rate-limited Moonshot request");
+    }
+  }
+  
+  // If we've already retried 3 times, show the error to the user
+  errorPayload.proxy_note = "Too many requests to the Moonshot API. Please try again later.";
+}
+
 async function handleOpenAIRateLimitError(
  req: Request,
  errorPayload: ProxiedErrorPayload
@@ -0,0 +1,219 @@
+import { Request, RequestHandler, Router } from "express";
+import { createPreprocessorMiddleware } from "./middleware/request";
+import { ipLimiter } from "./rate-limit";
+import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory";
+import { addKey, finalizeBody } from "./middleware/request";
+import { ProxyResHandlerWithBody } from "./middleware/response";
+import axios from "axios";
+import { MoonshotKey, keyPool } from "../shared/key-management";
+import { isMoonshotModel, isMoonshotVisionModel, enableMoonshotPartial, hasMoonshotPartialMode } from "../shared/api-schemas/moonshot";
+import { logger } from "../logger";
+
+const log = logger.child({ module: "proxy", service: "moonshot" });
+let modelsCache: any = null;
+let modelsCacheTime = 0;
+
+const moonshotResponseHandler: ProxyResHandlerWithBody = async (
+  _proxyRes,
+  req,
+  res,
+  body
+) => {
+  if (typeof body !== "object") {
+    throw new Error("Expected body to be an object");
+  }
+
+  res.status(200).json({ ...body, proxy: body.proxy });
+};
+
+const getModelsResponse = async () => {
+  // Return cache if less than 1 minute old
+  if (new Date().getTime() - modelsCacheTime < 1000 * 60) {
+    return modelsCache;
+  }
+
+  try {
+    const modelToUse = "moonshot-v1-8k";
+    const moonshotKey = keyPool.get(modelToUse, "moonshot") as MoonshotKey;
+    
+    if (!moonshotKey || !moonshotKey.key) {
+      log.warn("No valid Moonshot key available for model listing");
+      throw new Error("No valid Moonshot API key available");
+    }
+
+    // Fetch models from Moonshot API
+    const response = await axios.get("https://api.moonshot.cn/v1/models", {
+      headers: {
+        "Content-Type": "application/json",
+        "Authorization": `Bearer ${moonshotKey.key}`
+      },
+    });
+
+    if (!response.data || !response.data.data) {
+      throw new Error("Unexpected response format from Moonshot API");
+    }
+
+    // Format response to ensure OpenAI compatibility
+    const models = {
+      object: "list",
+      data: response.data.data.map((model: any) => ({
+        id: model.id,
+        object: "model",
+        created: model.created || Math.floor(Date.now() / 1000),
+        owned_by: model.owned_by || "moonshot",
+        permission: model.permission || [],
+        root: model.root || model.id,
+        parent: model.parent || null,
+      })),
+    };
+
+    log.debug({ modelCount: models.data.length }, "Retrieved models from Moonshot API");
+
+    // Cache the response
+    modelsCache = models;
+    modelsCacheTime = new Date().getTime();
+    return models;
+  } catch (error) {
+    if (error instanceof Error) {
+      log.error(
+        { errorMessage: error.message, stack: error.stack },
+        "Error fetching Moonshot models"
+      );
+    } else {
+      log.error({ error }, "Unknown error fetching Moonshot models");
+    }
+    
+    // Return a default list of known Moonshot models as fallback
+    return {
+      object: "list",
+      data: [
+        { id: "moonshot-v1-8k", object: "model", created: 1678888000, owned_by: "moonshot" },
+        { id: "moonshot-v1-32k", object: "model", created: 1678888000, owned_by: "moonshot" },
+        { id: "moonshot-v1-128k", object: "model", created: 1678888000, owned_by: "moonshot" },
+      ],
+    };
+  }
+};
+
+const handleModelRequest: RequestHandler = async (_req, res) => {
+  try {
+    const models = await getModelsResponse();
+    res.status(200).json(models);
+  } catch (error) {
+    if (error instanceof Error) {
+      log.error(
+        { errorMessage: error.message, stack: error.stack },
+        "Error handling model request"
+      );
+    } else {
+      log.error({ error }, "Unknown error handling model request");
+    }
+    res.status(500).json({ error: "Failed to fetch models" });
+  }
+};
+
+// Function to handle partial mode for Moonshot
+function handlePartialMode(req: Request) {
+  if (!process.env.NO_MOONSHOT_PARTIAL && req.body.messages && Array.isArray(req.body.messages)) {
+    const msgs = req.body.messages;
+    if (msgs.at(-1)?.role !== 'assistant') return;
+
+    let i = msgs.length - 1;
+    let content = '';
+    
+    while (i >= 0 && msgs[i].role === 'assistant') {
+      // Consolidate consecutive assistant messages
+      content = msgs[i--].content + content;
+    }
+    
+    // Replace consecutive assistant messages with single message with partial: true
+    msgs.splice(i + 1, msgs.length, { role: 'assistant', content, partial: true });
+    log.debug("Consolidated assistant messages and enabled partial mode for Moonshot request");
+  }
+}
+
+// Function to handle vision model content transformation
+function handleVisionContent(req: Request) {
+  const model = req.body.model;
+  
+  if (isMoonshotVisionModel(model) && req.body.messages) {
+    // Ensure vision content is properly formatted
+    req.body.messages = req.body.messages.map((msg: any) => {
+      if (msg.content && typeof msg.content === 'string') {
+        // Keep string content as is for non-vision requests
+        return msg;
+      }
+      return msg;
+    });
+  }
+}
+
+// Function to count tokens for Moonshot models
+function countMoonshotTokens(req: Request) {
+  const model = req.body.model;
+  
+  if (isMoonshotModel(model)) {
+    if (req.promptTokens) {
+      log.debug(
+        { tokens: req.promptTokens, model },
+        "Estimated token count for Moonshot prompt"
+      );
+    }
+  }
+}
+
+// Handle rate limit errors for Moonshot
+async function handleMoonshotRateLimitError(req: Request, error: any) {
+  if (error.response?.status === 429) {
+    log.warn({ model: req.body.model }, "Moonshot rate limit hit, rotating key");
+    
+    const currentKey = req.key as MoonshotKey;
+    keyPool.markRateLimited(currentKey);
+    
+    // Try to get a new key
+    const newKey = keyPool.get(req.body.model, "moonshot") as MoonshotKey;
+    if (newKey.hash !== currentKey.hash) {
+      req.key = newKey;
+      return true; // Retry with new key
+    }
+  }
+  return false;
+}
+
+const moonshotProxy = createQueuedProxyMiddleware({
+  mutations: [
+    addKey,
+    finalizeBody
+  ],
+  target: "https://api.moonshot.cn",
+  blockingResponseHandler: moonshotResponseHandler,
+});
+
+const moonshotRouter = Router();
+
+// Chat completions endpoint
+moonshotRouter.post(
+  "/v1/chat/completions",
+  ipLimiter,
+  createPreprocessorMiddleware(
+    { inApi: "openai", outApi: "openai", service: "moonshot" },
+    { afterTransform: [ handlePartialMode, handleVisionContent, countMoonshotTokens ] }
+  ),
+  moonshotProxy
+);
+
+// Embeddings endpoint
+moonshotRouter.post(
+  "/v1/embeddings",
+  ipLimiter,
+  createPreprocessorMiddleware(
+    { inApi: "openai", outApi: "openai", service: "moonshot" },
+    { afterTransform: [ countMoonshotTokens ] }
+  ),
+  moonshotProxy
+);
+
+// Models endpoint
+moonshotRouter.get("/v1/models", handleModelRequest);
+
+export const moonshot = moonshotRouter;
@@ -14,6 +14,7 @@ import { deepseek } from "./deepseek";
 import { xai } from "./xai";
 import { cohere } from "./cohere";
 import { qwen } from "./qwen";
+import { moonshot } from "./moonshot";
 import { sendErrorToClient } from "./middleware/response/error-generator";

 const proxyRouter = express.Router();
@@ -57,6 +58,7 @@ proxyRouter.use("/deepseek", addV1, deepseek);
 proxyRouter.use("/xai", addV1, xai);
 proxyRouter.use("/cohere", addV1, cohere);
 proxyRouter.use("/qwen", addV1, qwen);
+proxyRouter.use("/moonshot", addV1, moonshot);

 // Redirect browser requests to the homepage.
 proxyRouter.get("*", (req, res, next) => {
@@ -9,6 +9,7 @@ import {
  XaiKey,
  CohereKey,
  QwenKey,
+  MoonshotKey,
 } from "./shared/key-management";
 import {
  AnthropicModelFamily,
@@ -27,6 +28,7 @@ import {
  XaiModelFamily,
  CohereModelFamily,
  QwenModelFamily,
+  MoonshotModelFamily,
 } from "./shared/models";
 import { getCostSuffix, getTokenCostUsd, prettyTokens } from "./shared/stats";
 import { getUniqueIps } from "./proxy/rate-limit";
@@ -50,6 +52,8 @@ const keyIsCohereKey = (k: KeyPoolKey): k is CohereKey =>
  k.service === "cohere";
 const keyIsQwenKey = (k: KeyPoolKey): k is QwenKey =>
  k.service === "qwen";
+const keyIsMoonshotKey = (k: KeyPoolKey): k is MoonshotKey =>
+  k.service === "moonshot";

 /** Stats aggregated across all keys for a given service. */
 type ServiceAggregate = "keys" | "uncheckedKeys" | "orgs";
@@ -147,7 +151,8 @@ export type ServiceInfo = {
  & { [f in DeepseekModelFamily]?: BaseFamilyInfo }
  & { [f in XaiModelFamily]?: BaseFamilyInfo }
  & { [f in CohereModelFamily]?: BaseFamilyInfo }
-  & { [f in QwenModelFamily]?: BaseFamilyInfo };
+  & { [f in QwenModelFamily]?: BaseFamilyInfo }
+  & { [f in MoonshotModelFamily]?: BaseFamilyInfo };

 // https://stackoverflow.com/a/66661477
 // type DeepKeyOf<T> = (
@@ -201,6 +206,9 @@ const SERVICE_ENDPOINTS: { [s in LLMService]: Record<string, string> } = {
  qwen: {
    qwen: `%BASE%/qwen`,
  },
+  moonshot: {
+    moonshot: `%BASE%/moonshot`,
+  },
 };

 const familyStats = new Map<ModelAggregateKey, number>();
@@ -358,6 +366,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
  addToService("xai__keys", k.service === "xai" ? 1 : 0);
  addToService("cohere__keys", k.service === "cohere" ? 1 : 0);
  addToService("qwen__keys", k.service === "qwen" ? 1 : 0);
+  addToService("moonshot__keys", k.service === "moonshot" ? 1 : 0);

  let sumInputTokens = 0;
  let sumOutputTokens = 0;
@@ -521,6 +530,9 @@ function addKeyToAggregates(k: KeyPoolKey) {
    case "qwen":
      k.modelFamilies.forEach(incrementGenericFamilyStats);
      break;
+    case "moonshot":
+      k.modelFamilies.forEach(incrementGenericFamilyStats);
+      break;
    default:
      assertNever(k.service);
  }
@@ -640,6 +652,9 @@ function getInfoForFamily(family: ModelFamily): BaseFamilyInfo {
      case "qwen":
        info.overQuotaKeys = familyStats.get(`${family}__overQuota`) || 0;
        break;
+      case "moonshot":
+        info.overQuotaKeys = familyStats.get(`${family}__overQuota`) || 0;
+        break;
    }
  }

@@ -0,0 +1,106 @@
+import { z } from "zod";
+import { OPENAI_OUTPUT_MAX } from "./openai";
+
+/**
+ * Helper function to check if a model is from Moonshot
+ */
+export function isMoonshotModel(model: string): boolean {
+  return model.includes("moonshot");
+}
+
+/**
+ * Helper function to check if a model is a Moonshot vision model
+ */
+export function isMoonshotVisionModel(model: string): boolean {
+  return model.includes("moonshot") && model.includes("vision");
+}
+
+// Content schema for vision models
+const MoonshotVisionContentSchema = z.union([
+  z.string(),
+  z.array(
+    z.union([
+      z.object({
+        type: z.literal("text"),
+        text: z.string(),
+      }),
+      z.object({
+        type: z.literal("image_url"),
+        image_url: z.object({
+          url: z.string(),
+          detail: z.enum(["low", "high", "auto"]).optional(),
+        }),
+      }),
+    ])
+  ),
+]);
+
+// Basic chat message schema
+const MoonshotChatMessageSchema = z.object({
+  role: z.enum(["user", "assistant", "system"]),
+  content: z.union([z.string(), MoonshotVisionContentSchema]).nullable(),
+  name: z.string().optional(),
+  // Support for partial mode
+  partial: z.boolean().optional(),
+});
+
+const MoonshotMessagesSchema = z.array(MoonshotChatMessageSchema);
+
+// Schema for Moonshot chat completions
+export const MoonshotV1ChatCompletionsSchema = z.object({
+  model: z.string(),
+  messages: MoonshotMessagesSchema,
+  temperature: z.number().optional().default(0.3),
+  top_p: z.number().optional().default(1),
+  max_tokens: z.coerce
+    .number()
+    .int()
+    .nullish()
+    .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
+  stream: z.boolean().optional().default(false),
+  stop: z
+    .union([z.string(), z.array(z.string()).max(5)])
+    .optional()
+    .default([])
+    .transform((v) => (Array.isArray(v) ? v : [v])),
+  seed: z.number().int().min(0).optional(),
+  response_format: z
+    .object({ 
+      type: z.enum(["text", "json_object"])
+    })
+    .optional(),
+  tools: z.array(z.any()).optional(),
+  tool_choice: z.any().optional(),
+  frequency_penalty: z.number().min(-2).max(2).optional().default(0),
+  presence_penalty: z.number().min(-2).max(2).optional().default(0),
+  n: z.number().int().min(1).max(5).optional().default(1),
+});
+
+// Schema for Moonshot embeddings
+export const MoonshotV1EmbeddingsSchema = z.object({
+  model: z.string(),
+  input: z.union([z.string(), z.array(z.string())]),
+  encoding_format: z.enum(["float", "base64"]).optional()
+});
+
+// Helper function to enable partial mode for Moonshot (similar to Deepseek's prefill)
+export function enableMoonshotPartial(messages: any[]): any[] {
+  // If the last message is from assistant and doesn't have partial flag, add it
+  if (messages.length > 0 && messages[messages.length - 1].role === 'assistant') {
+    const lastMessage = messages[messages.length - 1];
+    if (!lastMessage.partial) {
+      return [
+        ...messages.slice(0, -1),
+        { ...lastMessage, partial: true }
+      ];
+    }
+  }
+  return messages;
+}
+
+// Helper function to check if request uses partial mode
+export function hasMoonshotPartialMode(messages: any[]): boolean {
+  return messages.length > 0 && 
+         messages[messages.length - 1].role === 'assistant' &&
+         messages[messages.length - 1].partial === true;
+}
@@ -105,3 +105,4 @@ export { DeepseekKey } from "./deepseek/provider";
 export { XaiKey } from "./xai/provider";
 export { CohereKey } from "./cohere/provider";
 export { QwenKey } from "./qwen/provider";
+export { MoonshotKey } from "./moonshot/provider";
@@ -17,6 +17,7 @@ import { DeepseekKeyProvider } from "./deepseek/provider";
 import { XaiKeyProvider } from "./xai/provider";
 import { CohereKeyProvider } from "./cohere/provider";
 import { QwenKeyProvider } from "./qwen/provider";
+import { MoonshotKeyProvider } from "./moonshot/provider";

 type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate | Partial<GcpKey>;

@@ -38,6 +39,7 @@ export class KeyPool {
    this.keyProviders.push(new XaiKeyProvider());
    this.keyProviders.push(new CohereKeyProvider());
    this.keyProviders.push(new QwenKeyProvider());
+    this.keyProviders.push(new MoonshotKeyProvider());
  }

  public init() {
@@ -81,7 +83,8 @@ export class KeyPool {
      service instanceof DeepseekKeyProvider ||
      service instanceof XaiKeyProvider ||
      service instanceof CohereKeyProvider ||
-      service instanceof QwenKeyProvider
+      service instanceof QwenKeyProvider ||
+      service instanceof MoonshotKeyProvider
    ) {
      service.update(key.hash, { isOverQuota: reason === "quota" });
    }
@@ -211,6 +214,8 @@ export class KeyPool {
      return "cohere";
    } else if (model.includes("qwen")) {
      return "qwen";
+    } else if (model.includes("moonshot")) {
+      return "moonshot";
    } else if (model.startsWith("anthropic.claude")) {
      // AWS offers models from a few providers
      // https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids-arns.html
@@ -0,0 +1,127 @@
+import { MoonshotKey } from "./provider";
+import { logger } from "../../../logger";
+import { assertNever } from "../../utils";
+
+const CHECK_TIMEOUT = 10000;
+const API_URL = "https://api.moonshot.cn/v1/users/me/balance";
+
+export class MoonshotKeyChecker {
+  private log = logger.child({ module: "key-checker", service: "moonshot" });
+
+  constructor(private readonly update: (hash: string, key: Partial<MoonshotKey>) => void) {
+    this.log.info("MoonshotKeyChecker initialized");
+  }
+
+  public async checkKey(key: MoonshotKey): Promise<void> {
+    this.log.info({ hash: key.hash }, "Starting key validation check");
+    try {
+      const result = await this.validateKey(key);
+      this.handleCheckResult(key, result);
+    } catch (error) {
+      if (error instanceof Error) {
+        this.log.warn(
+          { error: error.message, stack: error.stack, hash: key.hash },
+          "Failed to check key status"
+        );
+      } else {
+        this.log.warn(
+          { error, hash: key.hash },
+          "Failed to check key status with unknown error"
+        );
+      }
+    }
+  }
+
+  private async validateKey(key: MoonshotKey): Promise<"valid" | "invalid" | "quota"> {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => {
+      controller.abort();
+      this.log.warn({ hash: key.hash }, "Key validation timed out after " + CHECK_TIMEOUT + "ms");
+    }, CHECK_TIMEOUT);
+
+    try {
+      // Check balance endpoint to verify key validity
+      const headers = {
+        "Content-Type": "application/json",
+        "Authorization": `Bearer ${key.key}`
+      };
+
+      const response = await fetch(API_URL, {
+        method: "GET",
+        headers,
+        signal: controller.signal,
+      });
+
+      if (response.status === 200) {
+        const data = await response.json();
+        // Check if response has the expected Moonshot API structure
+        if (data && data.status === true && data.code === 0 && data.data) {
+          const balance = data.data.available_balance;
+          // Check if balance is too low (consider it quota exceeded if balance is 0 or negative)
+          if (typeof balance === 'number' && balance <= 0) {
+            return "quota";
+          }
+          return "valid";
+        } else {
+          this.log.warn(
+            { response: data, hash: key.hash },
+            "Unexpected response format from Moonshot API"
+          );
+          return "invalid";
+        }
+      } else if (response.status === 401) {
+        // Unauthorized - invalid key
+        return "invalid";
+      } else if (response.status === 429) {
+        // Rate limit - but key is valid
+        return "valid";
+      } else {
+        this.log.warn(
+          { status: response.status, hash: key.hash },
+          "Unexpected status code while testing key validity"
+        );
+        return "invalid";
+      }
+    } catch (error) {
+      if (error instanceof Error && error.name === 'AbortError') {
+        this.log.warn({ hash: key.hash }, "Key validation aborted");
+      }
+      throw error;
+    } finally {
+      clearTimeout(timeout);
+    }
+  }
+
+  private handleCheckResult(
+    key: MoonshotKey,
+    result: "valid" | "invalid" | "quota"
+  ): void {
+    switch (result) {
+      case "valid":
+        this.log.info({ hash: key.hash }, "Key is valid and enabled");
+        this.update(key.hash, {
+          isDisabled: false,
+          lastChecked: Date.now(),
+        });
+        break;
+      case "invalid":
+        this.log.warn({ hash: key.hash }, "Key is invalid, marking as revoked");
+        this.update(key.hash, {
+          isDisabled: true,
+          isRevoked: true,
+          lastChecked: Date.now(),
+        });
+        break;
+      case "quota":
+        this.log.warn({ hash: key.hash }, "Key has exceeded its quota, disabling");
+        this.update(key.hash, {
+          isDisabled: true,
+          isOverQuota: true,
+          lastChecked: Date.now(),
+        });
+        break;
+      default:
+        assertNever(result);
+    }
+  }
+}
@@ -0,0 +1,2 @@
+export { MoonshotKey, MoonshotKeyProvider } from "./provider";
+export { MoonshotKeyChecker } from "./checker";
@@ -0,0 +1,166 @@
+import { Key, KeyProvider, createGenericGetLockoutPeriod } from "..";
+import { MoonshotKeyChecker } from "./checker";
+import { config } from "../../../config";
+import { logger } from "../../../logger";
+import { MoonshotModelFamily, ModelFamily } from "../../models";
+
+export interface MoonshotKey extends Key {
+  readonly service: "moonshot";
+  readonly modelFamilies: MoonshotModelFamily[];
+  isOverQuota: boolean;
+}
+
+export class MoonshotKeyProvider implements KeyProvider<MoonshotKey> {
+  readonly service = "moonshot";
+  
+  private keys: MoonshotKey[] = [];
+  private checker?: MoonshotKeyChecker;
+  private log = logger.child({ module: "key-provider", service: this.service });
+
+  constructor() {
+    const keyConfig = config.moonshotKey?.trim();
+    if (!keyConfig) {
+      return;
+    }
+
+    const keys = keyConfig.split(",").map((k) => k.trim());
+    for (const key of keys) {
+      if (!key) continue;
+      this.keys.push({
+        key,
+        service: this.service,
+        modelFamilies: ["moonshot"],
+        isDisabled: false,
+        isRevoked: false,
+        promptCount: 0,
+        lastUsed: 0,
+        lastChecked: 0,
+        hash: this.hashKey(key),
+        rateLimitedAt: 0,
+        rateLimitedUntil: 0,
+        tokenUsage: {},
+        isOverQuota: false,
+      });
+    }
+  }
+
+  private hashKey(key: string): string {
+    return require("crypto").createHash("sha256").update(key).digest("hex");
+  }
+
+  public init() {
+    if (this.keys.length === 0) return;
+    if (!config.checkKeys) {
+      this.log.warn(
+        "Key checking is disabled. Keys will not be verified."
+      );
+      return;
+    }
+    this.checker = new MoonshotKeyChecker(this.update.bind(this));
+    for (const key of this.keys) {
+      void this.checker.checkKey(key);
+    }
+  }
+
+  public get(model: string): MoonshotKey {
+    const availableKeys = this.keys.filter((k) => !k.isDisabled);
+    if (availableKeys.length === 0) {
+      throw new Error("No Moonshot keys available");
+    }
+    const key = availableKeys[Math.floor(Math.random() * availableKeys.length)];
+    key.lastUsed = Date.now();
+    this.throttle(key.hash);
+    return { ...key };
+  }
+
+  public list(): Omit<MoonshotKey, "key">[] {
+    return this.keys.map(({ key, ...rest }) => rest);
+  }
+
+  public disable(key: MoonshotKey): void {
+    const found = this.keys.find((k) => k.hash === key.hash);
+    if (found) {
+      found.isDisabled = true;
+    }
+  }
+
+  public update(hash: string, update: Partial<MoonshotKey>): void {
+    const key = this.keys.find((k) => k.hash === hash);
+    if (key) {
+      Object.assign(key, update);
+    }
+  }
+
+  public available(): number {
+    return this.keys.filter((k) => !k.isDisabled).length;
+  }
+
+  public incrementUsage(keyHash: string, modelFamily: MoonshotModelFamily, usage: { input: number; output: number }) {
+    const key = this.keys.find((k) => k.hash === keyHash);
+    if (!key) return;
+
+    key.promptCount++;
+
+    if (!key.tokenUsage) {
+      key.tokenUsage = {};
+    }
+    // Moonshot only has one model family "moonshot"
+    if (!key.tokenUsage[modelFamily]) {
+      key.tokenUsage[modelFamily] = { input: 0, output: 0 };
+    }
+
+    const currentFamilyUsage = key.tokenUsage[modelFamily]!;
+    currentFamilyUsage.input += usage.input;
+    currentFamilyUsage.output += usage.output;
+  }
+
+  /**
+   * Upon being rate limited, a key will be locked out for this many milliseconds
+   * while we wait for other concurrent requests to finish.
+   */
+  private static readonly RATE_LIMIT_LOCKOUT = 2000;
+  /**
+   * Upon assigning a key, we will wait this many milliseconds before allowing it
+   * to be used again. This is to prevent the queue from flooding a key with too
+   * many requests while we wait to learn whether previous ones succeeded.
+   */
+  private static readonly KEY_REUSE_DELAY = 500;
+
+  getLockoutPeriod = createGenericGetLockoutPeriod(() => this.keys);
+
+  public markRateLimited(keyHash: string) {
+    this.log.debug({ key: keyHash }, "Key rate limited");
+    const key = this.keys.find((k) => k.hash === keyHash)!;
+    const now = Date.now();
+    key.rateLimitedAt = now;
+    key.rateLimitedUntil = now + MoonshotKeyProvider.RATE_LIMIT_LOCKOUT;
+  }
+
+  public recheck(): void {
+    if (!this.checker || !config.checkKeys) return;
+    for (const key of this.keys) {
+      this.update(key.hash, { 
+        isOverQuota: false,
+        isDisabled: false,
+        lastChecked: 0 
+      });
+      void this.checker.checkKey(key);
+    }
+  }
+
+  /**
+   * Applies a short artificial delay to the key upon dequeueing, in order to
+   * prevent it from being immediately assigned to another request before the
+   * current one can be dispatched.
+   **/
+  private throttle(hash: string) {
+    const now = Date.now();
+    const key = this.keys.find((k) => k.hash === hash)!;
+
+    const currentRateLimit = key.rateLimitedUntil;
+    const nextRateLimit = now + MoonshotKeyProvider.KEY_REUSE_DELAY;
+
+    key.rateLimitedAt = now;
+    key.rateLimitedUntil = Math.max(currentRateLimit, nextRateLimit);
+  }
+}
@@ -18,7 +18,8 @@ export type LLMService =
  | "deepseek"
  | "xai"
  | "cohere"
-  | "qwen";
+  | "qwen"
+  | "moonshot";

 export type OpenAIModelFamily =
  | "turbo"
@@ -58,6 +59,7 @@ export type DeepseekModelFamily = "deepseek";
 export type XaiModelFamily = "xai";
 export type CohereModelFamily = "cohere";
 export type QwenModelFamily = "qwen";
+export type MoonshotModelFamily = "moonshot";

 export type ModelFamily =
  | OpenAIModelFamily
@@ -70,11 +72,13 @@ export type ModelFamily =
  | DeepseekModelFamily
  | XaiModelFamily
  | CohereModelFamily
-  | QwenModelFamily;
+  | QwenModelFamily
+  | MoonshotModelFamily;

 export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  arr: A & ([ModelFamily] extends [A[number]] ? unknown : never)
 ) => arr)([
+  "moonshot",
  "qwen",
  "cohere",
  "xai",
@@ -149,12 +153,14 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
  "deepseek",
  "xai",
  "cohere",
-  "qwen"
+  "qwen",
+  "moonshot"
 ] as const);

 export const MODEL_FAMILY_SERVICE: {
  [f in ModelFamily]: LLMService;
 } = {
+  moonshot: "moonshot",
  qwen: "qwen",
  cohere: "cohere",
  xai: "xai",
@@ -404,12 +410,10 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
      case "openai-image":
        if (req.service === "deepseek") {
          modelFamily = "deepseek";
-        } else {
-          modelFamily = getOpenAIModelFamily(model);
-        }
-        break;
-        if (req.service === "xai") {
+        } else if (req.service === "xai") {
          modelFamily = "xai";
+        } else if (req.service === "moonshot") {
+          modelFamily = "moonshot";
        } else {
          modelFamily = getOpenAIModelFamily(model);
        }
@@ -64,6 +64,7 @@ const MODEL_PRICING: Record<ModelFamily, { input: number; output: number } | und
  // Adding placeholders for families in models.ts but not yet priced here.
  "cohere": { input: 0.15, output: 0.60 }, // Updated to Command R
  "qwen": { input: 1.40, output: 2.80 }, // Qwen-plus, as an example
+  "moonshot": { input: 0.6, output: 2.5 }, // Moonshot kimi k2
 };

 export function getTokenCostDetailsUsd(model: ModelFamily, inputTokens: number, outputTokens?: number): { inputCost: number, outputCost: number, totalCost: number } {