diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts index 602b439..c786b06 100644 --- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts +++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts @@ -81,7 +81,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => { modelMax = 200000; } else if (model.match(/^claude-3/)) { modelMax = 200000; - } else if (model.match(/^gemini-$/)) { + } else if (model.match(/^gemini-/)) { modelMax = 1024000; } else if (model.match(/^anthropic\.claude-3/)) { modelMax = 200000; diff --git a/src/shared/key-management/google-ai/provider.ts b/src/shared/key-management/google-ai/provider.ts index 704967e..47695f5 100644 --- a/src/shared/key-management/google-ai/provider.ts +++ b/src/shared/key-management/google-ai/provider.ts @@ -6,7 +6,7 @@ import { getGoogleAIModelFamily, type GoogleAIModelFamily } from "../../models"; import { PaymentRequiredError } from "../../errors"; import { GoogleAIKeyChecker } from "./checker"; -// Note that Google AI is not the same as Vertex AI, both are provided by +// Note that Google AI is not the same as Vertex AI, both are provided by // Google but Vertex is the GCP product for enterprise, while Google API is a // development/hobbyist product. They use completely different APIs and keys. // https://ai.google.dev/docs/migrate_to_cloud @@ -103,13 +103,15 @@ export class GoogleAIKeyProvider implements KeyProvider { return this.keys.map((k) => Object.freeze({ ...k, key: undefined })); } - public get(_model: string) { - const availableKeys = this.keys.filter((k) => !k.isDisabled); + public get(model: string) { + const neededFamily = getGoogleAIModelFamily(model); + const availableKeys = this.keys.filter( + (k) => !k.isDisabled && k.modelFamilies.includes(neededFamily) + ); if (availableKeys.length === 0) { throw new PaymentRequiredError("No Google AI keys available"); } - // (largely copied from the OpenAI provider, without trial key support) // Select a key, from highest priority to lowest priority: // 1. Keys which are not rate limited // a. If all keys were rate limited recently, select the least-recently