Lets shoot the moon

This commit is contained in:
reanon
2025-07-14 21:10:36 +02:00
parent 08400db220
commit bbd2b88503
16 changed files with 724 additions and 19 deletions
+106
View File
@@ -0,0 +1,106 @@
import { z } from "zod";
import { OPENAI_OUTPUT_MAX } from "./openai";
/**
* Helper function to check if a model is from Moonshot
*/
export function isMoonshotModel(model: string): boolean {
return model.includes("moonshot");
}
/**
* Helper function to check if a model is a Moonshot vision model
*/
export function isMoonshotVisionModel(model: string): boolean {
return model.includes("moonshot") && model.includes("vision");
}
// Content schema for vision models
const MoonshotVisionContentSchema = z.union([
z.string(),
z.array(
z.union([
z.object({
type: z.literal("text"),
text: z.string(),
}),
z.object({
type: z.literal("image_url"),
image_url: z.object({
url: z.string(),
detail: z.enum(["low", "high", "auto"]).optional(),
}),
}),
])
),
]);
// Basic chat message schema
const MoonshotChatMessageSchema = z.object({
role: z.enum(["user", "assistant", "system"]),
content: z.union([z.string(), MoonshotVisionContentSchema]).nullable(),
name: z.string().optional(),
// Support for partial mode
partial: z.boolean().optional(),
});
const MoonshotMessagesSchema = z.array(MoonshotChatMessageSchema);
// Schema for Moonshot chat completions
export const MoonshotV1ChatCompletionsSchema = z.object({
model: z.string(),
messages: MoonshotMessagesSchema,
temperature: z.number().optional().default(0.3),
top_p: z.number().optional().default(1),
max_tokens: z.coerce
.number()
.int()
.nullish()
.transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
stream: z.boolean().optional().default(false),
stop: z
.union([z.string(), z.array(z.string()).max(5)])
.optional()
.default([])
.transform((v) => (Array.isArray(v) ? v : [v])),
seed: z.number().int().min(0).optional(),
response_format: z
.object({
type: z.enum(["text", "json_object"])
})
.optional(),
tools: z.array(z.any()).optional(),
tool_choice: z.any().optional(),
frequency_penalty: z.number().min(-2).max(2).optional().default(0),
presence_penalty: z.number().min(-2).max(2).optional().default(0),
n: z.number().int().min(1).max(5).optional().default(1),
});
// Schema for Moonshot embeddings
export const MoonshotV1EmbeddingsSchema = z.object({
model: z.string(),
input: z.union([z.string(), z.array(z.string())]),
encoding_format: z.enum(["float", "base64"]).optional()
});
// Helper function to enable partial mode for Moonshot (similar to Deepseek's prefill)
export function enableMoonshotPartial(messages: any[]): any[] {
// If the last message is from assistant and doesn't have partial flag, add it
if (messages.length > 0 && messages[messages.length - 1].role === 'assistant') {
const lastMessage = messages[messages.length - 1];
if (!lastMessage.partial) {
return [
...messages.slice(0, -1),
{ ...lastMessage, partial: true }
];
}
}
return messages;
}
// Helper function to check if request uses partial mode
export function hasMoonshotPartialMode(messages: any[]): boolean {
return messages.length > 0 &&
messages[messages.length - 1].role === 'assistant' &&
messages[messages.length - 1].partial === true;
}
+1
View File
@@ -105,3 +105,4 @@ export { DeepseekKey } from "./deepseek/provider";
export { XaiKey } from "./xai/provider";
export { CohereKey } from "./cohere/provider";
export { QwenKey } from "./qwen/provider";
export { MoonshotKey } from "./moonshot/provider";
+6 -1
View File
@@ -17,6 +17,7 @@ import { DeepseekKeyProvider } from "./deepseek/provider";
import { XaiKeyProvider } from "./xai/provider";
import { CohereKeyProvider } from "./cohere/provider";
import { QwenKeyProvider } from "./qwen/provider";
import { MoonshotKeyProvider } from "./moonshot/provider";
type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate | Partial<GcpKey>;
@@ -38,6 +39,7 @@ export class KeyPool {
this.keyProviders.push(new XaiKeyProvider());
this.keyProviders.push(new CohereKeyProvider());
this.keyProviders.push(new QwenKeyProvider());
this.keyProviders.push(new MoonshotKeyProvider());
}
public init() {
@@ -81,7 +83,8 @@ export class KeyPool {
service instanceof DeepseekKeyProvider ||
service instanceof XaiKeyProvider ||
service instanceof CohereKeyProvider ||
service instanceof QwenKeyProvider
service instanceof QwenKeyProvider ||
service instanceof MoonshotKeyProvider
) {
service.update(key.hash, { isOverQuota: reason === "quota" });
}
@@ -211,6 +214,8 @@ export class KeyPool {
return "cohere";
} else if (model.includes("qwen")) {
return "qwen";
} else if (model.includes("moonshot")) {
return "moonshot";
} else if (model.startsWith("anthropic.claude")) {
// AWS offers models from a few providers
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids-arns.html
@@ -0,0 +1,127 @@
import { MoonshotKey } from "./provider";
import { logger } from "../../../logger";
import { assertNever } from "../../utils";
const CHECK_TIMEOUT = 10000;
const API_URL = "https://api.moonshot.cn/v1/users/me/balance";
export class MoonshotKeyChecker {
private log = logger.child({ module: "key-checker", service: "moonshot" });
constructor(private readonly update: (hash: string, key: Partial<MoonshotKey>) => void) {
this.log.info("MoonshotKeyChecker initialized");
}
public async checkKey(key: MoonshotKey): Promise<void> {
this.log.info({ hash: key.hash }, "Starting key validation check");
try {
const result = await this.validateKey(key);
this.handleCheckResult(key, result);
} catch (error) {
if (error instanceof Error) {
this.log.warn(
{ error: error.message, stack: error.stack, hash: key.hash },
"Failed to check key status"
);
} else {
this.log.warn(
{ error, hash: key.hash },
"Failed to check key status with unknown error"
);
}
}
}
private async validateKey(key: MoonshotKey): Promise<"valid" | "invalid" | "quota"> {
const controller = new AbortController();
const timeout = setTimeout(() => {
controller.abort();
this.log.warn({ hash: key.hash }, "Key validation timed out after " + CHECK_TIMEOUT + "ms");
}, CHECK_TIMEOUT);
try {
// Check balance endpoint to verify key validity
const headers = {
"Content-Type": "application/json",
"Authorization": `Bearer ${key.key}`
};
const response = await fetch(API_URL, {
method: "GET",
headers,
signal: controller.signal,
});
if (response.status === 200) {
const data = await response.json();
// Check if response has the expected Moonshot API structure
if (data && data.status === true && data.code === 0 && data.data) {
const balance = data.data.available_balance;
// Check if balance is too low (consider it quota exceeded if balance is 0 or negative)
if (typeof balance === 'number' && balance <= 0) {
return "quota";
}
return "valid";
} else {
this.log.warn(
{ response: data, hash: key.hash },
"Unexpected response format from Moonshot API"
);
return "invalid";
}
} else if (response.status === 401) {
// Unauthorized - invalid key
return "invalid";
} else if (response.status === 429) {
// Rate limit - but key is valid
return "valid";
} else {
this.log.warn(
{ status: response.status, hash: key.hash },
"Unexpected status code while testing key validity"
);
return "invalid";
}
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
this.log.warn({ hash: key.hash }, "Key validation aborted");
}
throw error;
} finally {
clearTimeout(timeout);
}
}
private handleCheckResult(
key: MoonshotKey,
result: "valid" | "invalid" | "quota"
): void {
switch (result) {
case "valid":
this.log.info({ hash: key.hash }, "Key is valid and enabled");
this.update(key.hash, {
isDisabled: false,
lastChecked: Date.now(),
});
break;
case "invalid":
this.log.warn({ hash: key.hash }, "Key is invalid, marking as revoked");
this.update(key.hash, {
isDisabled: true,
isRevoked: true,
lastChecked: Date.now(),
});
break;
case "quota":
this.log.warn({ hash: key.hash }, "Key has exceeded its quota, disabling");
this.update(key.hash, {
isDisabled: true,
isOverQuota: true,
lastChecked: Date.now(),
});
break;
default:
assertNever(result);
}
}
}
@@ -0,0 +1,2 @@
export { MoonshotKey, MoonshotKeyProvider } from "./provider";
export { MoonshotKeyChecker } from "./checker";
@@ -0,0 +1,166 @@
import { Key, KeyProvider, createGenericGetLockoutPeriod } from "..";
import { MoonshotKeyChecker } from "./checker";
import { config } from "../../../config";
import { logger } from "../../../logger";
import { MoonshotModelFamily, ModelFamily } from "../../models";
export interface MoonshotKey extends Key {
readonly service: "moonshot";
readonly modelFamilies: MoonshotModelFamily[];
isOverQuota: boolean;
}
export class MoonshotKeyProvider implements KeyProvider<MoonshotKey> {
readonly service = "moonshot";
private keys: MoonshotKey[] = [];
private checker?: MoonshotKeyChecker;
private log = logger.child({ module: "key-provider", service: this.service });
constructor() {
const keyConfig = config.moonshotKey?.trim();
if (!keyConfig) {
return;
}
const keys = keyConfig.split(",").map((k) => k.trim());
for (const key of keys) {
if (!key) continue;
this.keys.push({
key,
service: this.service,
modelFamilies: ["moonshot"],
isDisabled: false,
isRevoked: false,
promptCount: 0,
lastUsed: 0,
lastChecked: 0,
hash: this.hashKey(key),
rateLimitedAt: 0,
rateLimitedUntil: 0,
tokenUsage: {},
isOverQuota: false,
});
}
}
private hashKey(key: string): string {
return require("crypto").createHash("sha256").update(key).digest("hex");
}
public init() {
if (this.keys.length === 0) return;
if (!config.checkKeys) {
this.log.warn(
"Key checking is disabled. Keys will not be verified."
);
return;
}
this.checker = new MoonshotKeyChecker(this.update.bind(this));
for (const key of this.keys) {
void this.checker.checkKey(key);
}
}
public get(model: string): MoonshotKey {
const availableKeys = this.keys.filter((k) => !k.isDisabled);
if (availableKeys.length === 0) {
throw new Error("No Moonshot keys available");
}
const key = availableKeys[Math.floor(Math.random() * availableKeys.length)];
key.lastUsed = Date.now();
this.throttle(key.hash);
return { ...key };
}
public list(): Omit<MoonshotKey, "key">[] {
return this.keys.map(({ key, ...rest }) => rest);
}
public disable(key: MoonshotKey): void {
const found = this.keys.find((k) => k.hash === key.hash);
if (found) {
found.isDisabled = true;
}
}
public update(hash: string, update: Partial<MoonshotKey>): void {
const key = this.keys.find((k) => k.hash === hash);
if (key) {
Object.assign(key, update);
}
}
public available(): number {
return this.keys.filter((k) => !k.isDisabled).length;
}
public incrementUsage(keyHash: string, modelFamily: MoonshotModelFamily, usage: { input: number; output: number }) {
const key = this.keys.find((k) => k.hash === keyHash);
if (!key) return;
key.promptCount++;
if (!key.tokenUsage) {
key.tokenUsage = {};
}
// Moonshot only has one model family "moonshot"
if (!key.tokenUsage[modelFamily]) {
key.tokenUsage[modelFamily] = { input: 0, output: 0 };
}
const currentFamilyUsage = key.tokenUsage[modelFamily]!;
currentFamilyUsage.input += usage.input;
currentFamilyUsage.output += usage.output;
}
/**
* Upon being rate limited, a key will be locked out for this many milliseconds
* while we wait for other concurrent requests to finish.
*/
private static readonly RATE_LIMIT_LOCKOUT = 2000;
/**
* Upon assigning a key, we will wait this many milliseconds before allowing it
* to be used again. This is to prevent the queue from flooding a key with too
* many requests while we wait to learn whether previous ones succeeded.
*/
private static readonly KEY_REUSE_DELAY = 500;
getLockoutPeriod = createGenericGetLockoutPeriod(() => this.keys);
public markRateLimited(keyHash: string) {
this.log.debug({ key: keyHash }, "Key rate limited");
const key = this.keys.find((k) => k.hash === keyHash)!;
const now = Date.now();
key.rateLimitedAt = now;
key.rateLimitedUntil = now + MoonshotKeyProvider.RATE_LIMIT_LOCKOUT;
}
public recheck(): void {
if (!this.checker || !config.checkKeys) return;
for (const key of this.keys) {
this.update(key.hash, {
isOverQuota: false,
isDisabled: false,
lastChecked: 0
});
void this.checker.checkKey(key);
}
}
/**
* Applies a short artificial delay to the key upon dequeueing, in order to
* prevent it from being immediately assigned to another request before the
* current one can be dispatched.
**/
private throttle(hash: string) {
const now = Date.now();
const key = this.keys.find((k) => k.hash === hash)!;
const currentRateLimit = key.rateLimitedUntil;
const nextRateLimit = now + MoonshotKeyProvider.KEY_REUSE_DELAY;
key.rateLimitedAt = now;
key.rateLimitedUntil = Math.max(currentRateLimit, nextRateLimit);
}
}
+12 -8
View File
@@ -18,7 +18,8 @@ export type LLMService =
| "deepseek"
| "xai"
| "cohere"
| "qwen";
| "qwen"
| "moonshot";
export type OpenAIModelFamily =
| "turbo"
@@ -58,6 +59,7 @@ export type DeepseekModelFamily = "deepseek";
export type XaiModelFamily = "xai";
export type CohereModelFamily = "cohere";
export type QwenModelFamily = "qwen";
export type MoonshotModelFamily = "moonshot";
export type ModelFamily =
| OpenAIModelFamily
@@ -70,11 +72,13 @@ export type ModelFamily =
| DeepseekModelFamily
| XaiModelFamily
| CohereModelFamily
| QwenModelFamily;
| QwenModelFamily
| MoonshotModelFamily;
export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
arr: A & ([ModelFamily] extends [A[number]] ? unknown : never)
) => arr)([
"moonshot",
"qwen",
"cohere",
"xai",
@@ -149,12 +153,14 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
"deepseek",
"xai",
"cohere",
"qwen"
"qwen",
"moonshot"
] as const);
export const MODEL_FAMILY_SERVICE: {
[f in ModelFamily]: LLMService;
} = {
moonshot: "moonshot",
qwen: "qwen",
cohere: "cohere",
xai: "xai",
@@ -404,12 +410,10 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
case "openai-image":
if (req.service === "deepseek") {
modelFamily = "deepseek";
} else {
modelFamily = getOpenAIModelFamily(model);
}
break;
if (req.service === "xai") {
} else if (req.service === "xai") {
modelFamily = "xai";
} else if (req.service === "moonshot") {
modelFamily = "moonshot";
} else {
modelFamily = getOpenAIModelFamily(model);
}
+1
View File
@@ -64,6 +64,7 @@ const MODEL_PRICING: Record<ModelFamily, { input: number; output: number } | und
// Adding placeholders for families in models.ts but not yet priced here.
"cohere": { input: 0.15, output: 0.60 }, // Updated to Command R
"qwen": { input: 1.40, output: 2.80 }, // Qwen-plus, as an example
"moonshot": { input: 0.6, output: 2.5 }, // Moonshot kimi k2
};
export function getTokenCostDetailsUsd(model: ModelFamily, inputTokens: number, outputTokens?: number): { inputCost: number, outputCost: number, totalCost: number } {