diff --git a/src/proxy/google-ai.ts b/src/proxy/google-ai.ts index 5ac7d9d..95365f3 100644 --- a/src/proxy/google-ai.ts +++ b/src/proxy/google-ai.ts @@ -178,27 +178,23 @@ const googleAIRouter = Router(); googleAIRouter.get("/v1/models", handleModelRequest); googleAIRouter.get("/:apiVersion(v1alpha|v1beta)/models", handleNativeModelRequest); -// Native Google AI chat completion endpoint -googleAIRouter.post( - "/:apiVersion(v1alpha|v1beta)/models/:modelId:(generateContent|streamGenerateContent)", - ipLimiter, - createPreprocessorMiddleware( - { inApi: "google-ai", outApi: "google-ai", service: "google-ai" }, - { beforeTransform: [maybeReassignModel], afterTransform: [setStreamFlag] } - ), - googleAIProxy -); - -// OpenAI-to-Google AI compatibility endpoint. -googleAIRouter.post( - "/v1/chat/completions", - ipLimiter, - createPreprocessorMiddleware( - { inApi: "openai", outApi: "google-ai", service: "google-ai" }, - { afterTransform: [maybeReassignModel] } - ), - googleAIProxy -); +/** + * Processes the thinking budget for Gemini 2.5 Flash model. + * Ensures the budget is within the valid range of 0-24576 tokens. + */ +function processThinkingBudget(req: Request) { + if (req.body.generationConfig?.thinkingConfig?.thinkingBudget !== undefined) { + // Ensure thinkingBudget is within allowed range (0-24576) + const budget = req.body.generationConfig.thinkingConfig.thinkingBudget; + + // If it's a number, validate range + if (typeof budget === 'number') { + req.body.generationConfig.thinkingConfig.thinkingBudget = + Math.max(0, Math.min(budget, 24576)); + } + // If it's "auto", leave as is + } +} function setStreamFlag(req: Request) { const isStreaming = req.url.includes("streamGenerateContent"); @@ -236,4 +232,26 @@ function maybeReassignModel(req: Request) { req.body.model = "gemini-1.5-pro-latest"; } +// Native Google AI chat completion endpoint +googleAIRouter.post( + "/:apiVersion(v1alpha|v1beta)/models/:modelId:(generateContent|streamGenerateContent)", + ipLimiter, + createPreprocessorMiddleware( + { inApi: "google-ai", outApi: "google-ai", service: "google-ai" }, + { beforeTransform: [maybeReassignModel], afterTransform: [setStreamFlag, processThinkingBudget] } + ), + googleAIProxy +); + +// OpenAI-to-Google AI compatibility endpoint. +googleAIRouter.post( + "/v1/chat/completions", + ipLimiter, + createPreprocessorMiddleware( + { inApi: "openai", outApi: "google-ai", service: "google-ai" }, + { afterTransform: [maybeReassignModel, processThinkingBudget] } + ), + googleAIProxy +); + export const googleAI = googleAIRouter; diff --git a/src/shared/api-schemas/google-ai.ts b/src/shared/api-schemas/google-ai.ts index 4fb92f0..c4b7cf6 100644 --- a/src/shared/api-schemas/google-ai.ts +++ b/src/shared/api-schemas/google-ai.ts @@ -155,6 +155,12 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer< stops.push(...Array.from(foundNames).map((name) => `\n${name}:`)); stops = [...new Set(stops)].slice(0, 5); + // Handle thinking budget from the original request + let thinkingConfig = undefined; + if (body.generationConfig?.thinkingConfig || body.thinkingConfig) { + thinkingConfig = body.generationConfig?.thinkingConfig || body.thinkingConfig; + } + return { model: req.body.model, stream: rest.stream, @@ -166,6 +172,7 @@ export const transformOpenAIToGoogleAI: APIFormatTransformer< topP: rest.top_p, topK: 40, // openai schema doesn't have this, google ai defaults to 40 temperature: rest.temperature, + ...(thinkingConfig ? { thinkingConfig } : {}) }, safetySettings: [ { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },