diff --git a/src/modules/llms/server/gemini/gemini.models.ts b/src/modules/llms/server/gemini/gemini.models.ts index 60f3f2a02..1ed56feb4 100644 --- a/src/modules/llms/server/gemini/gemini.models.ts +++ b/src/modules/llms/server/gemini/gemini.models.ts @@ -2,7 +2,7 @@ import type { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/w import type { ModelDescriptionSchema } from '../llm.server.types'; -import { LLM_IF_GEM_CodeExecution, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image } from '~/common/stores/llms/llms.types'; +import { LLM_IF_GEM_CodeExecution, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types'; // dev options @@ -38,7 +38,7 @@ const filterLyingModelNames: GeminiWire_API_Models_List.Model['name'][] = [ - Latest stable version gemini-1.0-pro -- - Stable versions gemini-1.0-pro-001 --- - Gemini capabilities chart (updated 2025-04-17): + Gemini capabilities chart (updated 2025-05-27): - [table stakes] System instructions - JSON Mode, with optional JSON Schema - Adjustable Safety Settings @@ -49,6 +49,8 @@ const filterLyingModelNames: GeminiWire_API_Models_List.Model['name'][] = [ - Thinking / Reasoning with thinking budget - Audio generation - Live API + - Native Audio (dialog models) + - Text-to-Speech models */ // Experimental Gemini models are Free of charge @@ -57,7 +59,7 @@ const geminiExpFree: ModelDescriptionSchema['chatPrice'] = { }; -// Pricing based on https://ai.google.dev/pricing (May 20, 2025) +// Pricing based on https://ai.google.dev/pricing (May 27, 2025) const gemini25ProPreviewPricing: ModelDescriptionSchema['chatPrice'] = { input: [{ upTo: 200000, price: 1.25 }, { upTo: null, price: 2.50 }], @@ -74,6 +76,21 @@ const gemini25FlashPreviewThinkingPricing: ModelDescriptionSchema['chatPrice'] = output: 3.50, // thinking }; +const gemini25FlashNativeAudioPricing: ModelDescriptionSchema['chatPrice'] = { + input: 0.50, // text; audio/video is $3.00 but we don't differentiate yet + output: 2.00, // text; audio is $12.00 but we don't differentiate yet +}; + +const gemini25FlashPreviewTTSPricing: ModelDescriptionSchema['chatPrice'] = { + input: 0.50, // text input + // output: 10.00, // not ready for audio output yet, as of 2025-05-27 +}; + +const gemini25ProPreviewTTSPricing: ModelDescriptionSchema['chatPrice'] = { + input: 1.00, // text input + // output: 20.00, // not ready for audio output yet, as of 2025-05-27 +}; + const gemini20FlashPricing: ModelDescriptionSchema['chatPrice'] = { input: 0.10, // text/image/video; audio is $0.70 but we don't differentiate yet output: 0.40, @@ -149,7 +166,19 @@ const _knownGeminiModels: ({ interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, LLM_IF_GEM_CodeExecution], // parameterSpecs: [{ paramId: 'llmVndGeminiShowThoughts' }], // Gemini doesn't show thoughts anymore benchmark: { cbaElo: 1437 /* +1 because free */ }, - hidden: true, // + hidden: true, // seems to be discouraged - still available, but cannot rely on it + }, + + // 2.5 Pro Preview TTS + { + id: 'models/gemini-2.5-pro-preview-tts', + labelOverride: 'Gemini 2.5 Pro Preview TTS (unsupported)', + isPreview: true, + chatPrice: gemini25ProPreviewTTSPricing, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_Outputs_Audio, LLM_IF_Outputs_NoText], + benchmark: undefined, // TTS models are not benchmarkable + hidden: true, // audio outputs are unavailable as of 2025-05-27 + _delete: true, // we are receiving API errors from Gemini - disabled for now as of 2025-05-27 }, // 2.5 Flash Preview @@ -180,6 +209,36 @@ const _knownGeminiModels: ({ hidden: true, }, + // 2.5 Flash Preview TTS + { + id: 'models/gemini-2.5-flash-preview-tts', + labelOverride: 'Gemini 2.5 Flash Preview TTS (unsupported)', + isPreview: true, + chatPrice: gemini25FlashPreviewTTSPricing, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_Outputs_Audio, LLM_IF_Outputs_NoText], + benchmark: undefined, // TTS models are not benchmarkable + hidden: true, // audio outputs are unavailable as of 2025-05-27 + _delete: true, // we are receiving API errors from Gemini - disabled for now as of 2025-05-27 + }, + + // 2.5 Flash Native Audio (Dialog and Thinking variants) + { + id: 'models/gemini-2.5-flash-preview-native-audio-dialog', + isPreview: true, + chatPrice: gemini25FlashNativeAudioPricing, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_Outputs_Audio], + benchmark: undefined, // Native audio models are not benchmarkable + _delete: true, // dialog models unsupported as of 2025-05-27, but keeping the model for now + }, + { + id: 'models/gemini-2.5-flash-exp-native-audio-thinking-dialog', + isPreview: true, + chatPrice: gemini25FlashNativeAudioPricing, + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning, LLM_IF_Outputs_Audio], + benchmark: undefined, // Native audio models are not benchmarkable + _delete: true, // dialog models unsupported as of 2025-05-27, but keeping the model for now + }, + /// Generation 2.0