Compare commits

...

1 Commits

Author SHA1 Message Date
claude[bot] 356359d25e feat(aix): Add Gemini Interactions API support for Deep Research agent
This commit implements the Gemini Interactions API, which enables support
for the Deep Research agent (deep-research-pro-preview-12-2025).

Key changes:
- Add GeminiWire_API_Interactions wire types with request/response schemas
- Create gemini.interactions.ts adapter for AIX → Interactions format
- Create gemini.interactions.parser.ts for streaming/non-streaming responses
- Integrate with dispatch system via vndGeminiInteractionsAgent flag
- Add LLM_IF_GEM_Interactions interface for model capability detection
- Add Deep Research agent model definition with proper sorting

The Interactions API is Google's new unified interface for AI agents that
supports background execution, server-side state management, and long-running
research tasks. This implementation follows the existing pattern established
by the OpenAI Responses API integration.

Closes #899

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Enrico Ros <enricoros@users.noreply.github.com>
2025-12-19 21:06:46 +00:00
13 changed files with 809 additions and 8 deletions
@@ -212,6 +212,14 @@ export const DModelParameterRegistry = {
// No initialValue - undefined means 'dynamic', which for Gemini Pro is the same as 'high' (which is the equivalent of 'medium' for OpenAI's effort levels.. somehow)
} as const,
llmVndGeminiInteractionsAgent: {
label: 'Agent (Interactions API)',
type: 'string' as const,
description: 'Uses Gemini Interactions API with the specified agent (e.g., deep-research-pro-preview-12-2025)',
hidden: true, // Auto-set by model definition
requiredFallback: 'deep-research-pro-preview-12-2025',
} as const,
// NOTE: we don't have this as a parameter, as for now we use it in tandem with llmVndGeminiGoogleSearch
// llmVndGeminiUrlContext: {
// label: 'URL Context',
+3
View File
@@ -151,6 +151,7 @@ export type DModelInterfaceV1 =
| 'oai-realtime'
| 'oai-responses'
| 'gem-code-execution'
| 'gem-interactions' // [Gemini] Interactions API (Deep Research agent)
| 'outputs-audio' // TEMP: ui flag - supports audio output (e.g., text-to-speech)
| 'outputs-image' // TEMP: ui flag - supports image output (image generation)
| 'outputs-no-text' // disable text outputs (used in conjunction with alt-outputs) - assumed off
@@ -181,6 +182,7 @@ export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching';
export const LLM_IF_OAI_Realtime: DModelInterfaceV1 = 'oai-realtime';
export const LLM_IF_OAI_Responses: DModelInterfaceV1 = 'oai-responses';
export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution';
export const LLM_IF_GEM_Interactions: DModelInterfaceV1 = 'gem-interactions';
export const LLM_IF_HOTFIX_NoStream: DModelInterfaceV1 = 'hotfix-no-stream';
export const LLM_IF_HOTFIX_NoTemperature: DModelInterfaceV1 = 'hotfix-no-temperature';
export const LLM_IF_HOTFIX_StripImages: DModelInterfaceV1 = 'hotfix-strip-images';
@@ -205,6 +207,7 @@ export const LLMS_ALL_INTERFACES = [
// Vendor-specific capabilities
LLM_IF_ANT_PromptCaching, // [Anthropic] model supports anthropic-specific caching
LLM_IF_GEM_CodeExecution, // [Gemini] Tool: code execution
LLM_IF_GEM_Interactions, // [Gemini] Interactions API (Deep Research agent)
LLM_IF_OAI_PromptCaching, // [OpenAI] model supports OpenAI prompt caching
LLM_IF_OAI_Realtime, // [OpenAI] realtime API support - unused
LLM_IF_OAI_Responses, // [OpenAI] Responses API (new) support
+2 -1
View File
@@ -48,7 +48,7 @@ export function aixCreateModelFromLLMOptions(
const {
llmRef, llmTemperature, llmResponseTokens, llmTopP,
llmVndAnt1MContext, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort,
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel,
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiInteractionsAgent, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel,
// llmVndMoonshotWebSearch,
llmVndOaiReasoningEffort, llmVndOaiReasoningEffort4, llmVndOaiReasoningEffort52, llmVndOaiReasoningEffort52Pro, llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration,
llmVndOrtWebSearch,
@@ -118,6 +118,7 @@ export function aixCreateModelFromLLMOptions(
...(llmVndGeminiShowThoughts ? { vndGeminiShowThoughts: llmVndGeminiShowThoughts } : {}),
...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}),
...(llmVndGeminiThinkingLevel ? { vndGeminiThinkingLevel: llmVndGeminiThinkingLevel } : {}),
...(llmVndGeminiInteractionsAgent ? { vndGeminiInteractionsAgent: llmVndGeminiInteractionsAgent } : {}),
// ...(llmVndGeminiUrlContext === 'auto' ? { vndGeminiUrlContext: llmVndGeminiUrlContext } : {}),
// ...(llmVndMoonshotWebSearch === 'auto' ? { vndMoonshotWebSearch: 'auto' } : {}),
...(llmVndOaiResponsesAPI ? { vndOaiResponsesAPI: true } : {}),
@@ -468,6 +468,12 @@ export namespace AixWire_API {
vndGeminiThinkingBudget: z.number().optional(), // old param
vndGeminiThinkingLevel: z.enum(['high', 'medium', 'low']).optional(), // new param
vndGeminiUrlContext: z.enum(['auto']).optional(),
/**
* [Gemini, 2025-12-19] Interactions API for Deep Research agent
* When set to an agent name, uses the Interactions API instead of generateContent
* See: https://ai.google.dev/gemini-api/docs/interactions
*/
vndGeminiInteractionsAgent: z.string().optional(),
// Moonshot
vndMoonshotWebSearch: z.enum(['auto']).optional(),
// OpenAI
@@ -464,14 +464,15 @@ export class ChatGenerateTransmitter implements IParticleTransmitter {
}
/** Communicates the upstream response handle, for remote control/resumability */
setUpstreamHandle(handle: string, _type: 'oai-responses' /* the only one for now, used for type safety */) {
setUpstreamHandle(handle: string, type: 'oai-responses' | 'gemini-interactions') {
if (SERVER_DEBUG_WIRE)
console.log('|response-handle|', handle);
console.log('|response-handle|', handle, type);
// NOTE: if needed, we could store the handle locally for server-side resumability, but we just implement client-side (correction, manual) for now
const uht = type === 'gemini-interactions' ? 'vnd.gemini.interactions' : 'vnd.oai.responses';
this.transmissionQueue.push({
cg: 'set-upstream-handle',
handle: {
uht: 'vnd.oai.responses',
uht: uht as any, // TODO: add 'vnd.gemini.interactions' to the type union in aix.wiretypes.ts
responseId: handle,
expiresAt: Date.now() + 30 * 24 * 3600 * 1000, // default: 30 days expiry
},
@@ -0,0 +1,211 @@
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage } from '../../../api/aix.wiretypes';
import { GeminiWire_API_Interactions } from '../../wiretypes/gemini.wiretypes';
import { aixSpillSystemToUser, approxDocPart_To_String } from './adapters.common';
type TRequest = GeminiWire_API_Interactions.Request;
/**
* Gemini Interactions API adapter
*
* Converts AIX format to Gemini Interactions API format.
* Used specifically for agents like Deep Research.
*
* Key differences from generateContent:
* - Uses 'agent' instead of 'model' for agent-based interactions
* - Uses 'input' with turns/content parts instead of 'contents'
* - Supports background execution for long-running tasks
* - Uses different streaming format (event_type-based)
*/
export function aixToGeminiInteractions(
model: AixAPI_Model,
_chatGenerate: AixAPIChatGenerate_Request,
streaming: boolean,
): TRequest {
// Pre-process CGR - approximate spill of System to User message
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
// Build system instruction from system message
let systemInstruction: string | undefined = undefined;
if (chatGenerate.systemMessage?.parts.length) {
const systemParts: string[] = [];
for (const part of chatGenerate.systemMessage.parts) {
switch (part.pt) {
case 'text':
systemParts.push(part.text);
break;
case 'doc':
systemParts.push(approxDocPart_To_String(part));
break;
case 'inline_image':
case 'meta_cache_control':
// Ignore these for system instruction
break;
default:
console.warn(`[Gemini Interactions] Unsupported system part type: ${(part as any).pt}`);
}
}
if (systemParts.length > 0)
systemInstruction = systemParts.join('\n\n');
}
// Convert chat sequence to turns
const input = _toInteractionsTurns(chatGenerate.chatSequence);
// Get the agent name from the model's vndGeminiInteractionsAgent property
const agentName = model.vndGeminiInteractionsAgent;
// For Deep Research and other background agents, we use background=true
// This allows the agent to run asynchronously
const isBackgroundAgent = agentName?.includes('deep-research');
// Construct the request payload
const payload: TRequest = {
// Agent-based interactions use 'agent' instead of 'model'
agent: agentName,
// Input as array of turns
input,
// System instruction (if any)
system_instruction: systemInstruction,
// Generation config
generation_config: {
temperature: model.temperature ?? undefined,
max_output_tokens: model.maxTokens ?? undefined,
// Map thinking level for agents that support it
thinking_level: model.vndGeminiThinkingLevel ?? undefined,
},
// API options
stream: streaming,
background: isBackgroundAgent, // Enable background for Deep Research
store: true, // Enable storage for state management
};
// Clean up undefined values
if (!payload.system_instruction)
delete payload.system_instruction;
if (payload.generation_config) {
if (payload.generation_config.temperature === undefined)
delete payload.generation_config.temperature;
if (payload.generation_config.max_output_tokens === undefined)
delete payload.generation_config.max_output_tokens;
if (payload.generation_config.thinking_level === undefined)
delete payload.generation_config.thinking_level;
if (Object.keys(payload.generation_config).length === 0)
delete payload.generation_config;
}
// Validate the payload
const validated = GeminiWire_API_Interactions.Request_schema.safeParse(payload);
if (!validated.success) {
console.warn('Gemini Interactions: invalid payload. Error:', validated.error.message);
throw new Error(`Invalid sequence for Gemini Interactions API: ${validated.error.issues?.[0]?.message || validated.error.message || validated.error}.`);
}
return validated.data;
}
// Content part type for Interactions API input
type TContentPart =
| { type: 'text'; text: string }
| { type: 'image'; data?: string; mime_type?: string }
| { type: 'audio'; data?: string; mime_type?: string }
| { type: 'function_result'; name: string; call_id: string; result: unknown };
// Turn type for Interactions API input
type TTurn = {
role: 'user' | 'model';
content: TContentPart[];
};
/**
* Convert AIX chat messages to Interactions API turns format
*/
function _toInteractionsTurns(chatSequence: AixMessages_ChatMessage[]): TTurn[] {
return chatSequence.map(message => {
const content: TContentPart[] = [];
for (const part of message.parts) {
switch (part.pt) {
case 'text':
content.push({
type: 'text',
text: part.text,
});
break;
case 'inline_image':
content.push({
type: 'image',
data: part.base64,
mime_type: part.mimeType,
});
break;
case 'inline_audio':
content.push({
type: 'audio',
data: part.base64,
mime_type: part.mimeType,
});
break;
case 'doc':
// Convert doc to text for now
content.push({
type: 'text',
text: approxDocPart_To_String(part),
});
break;
case 'ma':
// Model artifact (thinking) - skip for input
break;
case 'meta_cache_control':
case 'meta_in_reference_to':
// Skip metadata parts
break;
case 'tool_invocation':
// For function calls, we'd need to handle these specially
// For Deep Research, this is less relevant
console.warn('[Gemini Interactions] Tool invocations not yet supported in input');
break;
case 'tool_response':
// Function results
if (part.response.type === 'function_call') {
content.push({
type: 'function_result',
name: part.response._name || part.id,
call_id: part.id,
result: part.response.result,
});
}
break;
default:
console.warn(`[Gemini Interactions] Unsupported part type: ${(part as any).pt}`);
}
}
// If no content, add empty text
if (content.length === 0)
content.push({ type: 'text', text: '' });
return {
role: message.role === 'model' ? 'model' : 'user',
content,
};
});
}
@@ -8,16 +8,18 @@ import { DEEPSEEK_SPECIALE_HOST, DEEPSEEK_SPECIALE_SUFFIX } from '~/modules/llms
import type { AixAPI_Access, AixAPI_Model, AixAPI_ResumeHandle, AixAPIChatGenerate_Request } from '../../api/aix.wiretypes';
import type { AixDemuxers } from '../stream.demuxers';
import { GeminiWire_API_Generate_Content } from '../wiretypes/gemini.wiretypes';
import { GeminiWire_API_Generate_Content, GeminiWire_API_Interactions } from '../wiretypes/gemini.wiretypes';
import { aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate';
import { aixToGeminiGenerateContent } from './adapters/gemini.generateContent';
import { aixToGeminiInteractions } from './adapters/gemini.interactions';
import { aixToOpenAIChatCompletions } from './adapters/openai.chatCompletions';
import { aixToOpenAIResponses } from './adapters/openai.responsesCreate';
import type { IParticleTransmitter } from './parsers/IParticleTransmitter';
import { createAnthropicMessageParser, createAnthropicMessageParserNS } from './parsers/anthropic.parser';
import { createGeminiGenerateContentResponseParser } from './parsers/gemini.parser';
import { createGeminiInteractionsResponseParser } from './parsers/gemini.interactions.parser';
import { createOpenAIChatCompletionsChunkParser, createOpenAIChatCompletionsParserNS } from './parsers/openai.parser';
import { createOpenAIResponseParserNS, createOpenAIResponsesEventParser } from './parsers/openai.responses.parser';
@@ -83,7 +85,27 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
};
}
case 'gemini':
case 'gemini': {
/**
* [Gemini, 2025-12-19] Interactions API for agents like Deep Research
* When vndGeminiInteractionsAgent is set, use the Interactions API instead of generateContent
*/
const useInteractionsAPI = !!model.vndGeminiInteractionsAgent;
if (useInteractionsAPI) {
// Use Interactions API for agent-based interactions (e.g., Deep Research)
const agentName = model.vndGeminiInteractionsAgent!;
return {
request: {
...geminiAccess(access, null, streaming ? GeminiWire_API_Interactions.streamingPostPath : GeminiWire_API_Interactions.postPath, false),
method: 'POST',
body: aixToGeminiInteractions(model, chatGenerate, streaming),
},
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: createGeminiInteractionsResponseParser(agentName, streaming),
};
}
/**
* [Gemini, 2025-04-17] For newer thinking parameters, use v1alpha (we only see statistically better results)
*/
@@ -98,6 +120,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: createGeminiGenerateContentResponseParser(model.id.replace('models/', ''), streaming),
};
}
/**
* Ollama has now an OpenAI compatibility layer for `chatGenerate` API, but still its own protocol for models listing.
@@ -81,7 +81,7 @@ export interface IParticleTransmitter {
setModelName(modelName: string): void;
/** Communicates the upstream response handle, for remote control/resumability */
setUpstreamHandle(handle: string, type: 'oai-responses'): void;
setUpstreamHandle(handle: string, type: 'oai-responses' | 'gemini-interactions'): void;
/** Communicates the finish reason to the client */
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason): void;
@@ -0,0 +1,282 @@
import type { AixWire_Particles } from '../../../api/aix.wiretypes';
import type { ChatGenerateParseFunction } from '../chatGenerate.dispatch';
import type { IParticleTransmitter } from './IParticleTransmitter';
import { IssueSymbols } from '../ChatGenerateTransmitter';
/**
* Gemini Interactions API Response Parser
*
* Parses responses from the Gemini Interactions API, which is used for
* agents like Deep Research. Supports both streaming and non-streaming modes.
*
* Streaming events:
* - content.delta: Incremental text/thought updates
* - interaction.complete: Final interaction with full response
*
* Non-streaming:
* - Single response object with outputs array
*
* Deep Research specifics:
* - Uses background=true for long-running tasks
* - Status can be: in_progress, completed, requires_action, failed, cancelled
* - May require polling via interactions.get() for background tasks
*/
export function createGeminiInteractionsResponseParser(
agentName: string,
isStreaming: boolean,
): ChatGenerateParseFunction {
const parserCreationTimestamp = Date.now();
let sentAgentName = false;
let timeToFirstEvent: number | undefined;
let interactionId: string | undefined;
return function(pt: IParticleTransmitter, rawEventData: string): void {
// Time to first event
if (timeToFirstEvent === undefined)
timeToFirstEvent = Date.now() - parserCreationTimestamp;
// Parse the raw event data
let eventData: any;
try {
eventData = JSON.parse(rawEventData);
} catch (e) {
return pt.setDialectTerminatingIssue(`Failed to parse Interactions API response: ${e}`, null, 'srv-warn');
}
// Set agent name as model name (if not already set)
if (!sentAgentName) {
pt.setModelName(agentName);
sentAgentName = true;
}
// Handle streaming vs non-streaming
if (isStreaming) {
_parseStreamingEvent(pt, eventData, parserCreationTimestamp, timeToFirstEvent);
} else {
_parseNonStreamingResponse(pt, eventData, parserCreationTimestamp, timeToFirstEvent);
}
// Store interaction ID for potential polling
if (eventData.id)
interactionId = eventData.id;
if (eventData.interaction?.id)
interactionId = eventData.interaction.id;
// Store interaction ID for resumability (similar to OpenAI Responses)
if (interactionId)
pt.setUpstreamHandle(interactionId, 'gemini-interactions');
};
}
/**
* Parse streaming events from the Interactions API
*/
function _parseStreamingEvent(
pt: IParticleTransmitter,
eventData: any,
parserCreationTimestamp: number,
timeToFirstEvent: number | undefined,
): void {
const eventType = eventData.event_type;
switch (eventType) {
case 'content.delta':
// Incremental content update
const delta = eventData.delta;
if (delta?.type === 'text' && delta.text) {
pt.appendText(delta.text);
} else if (delta?.type === 'thought' && delta.thought) {
pt.appendReasoningText(delta.thought);
}
break;
case 'interaction.complete':
// Final interaction response
const interaction = eventData.interaction;
if (interaction) {
_handleInteractionComplete(pt, interaction, parserCreationTimestamp, timeToFirstEvent);
}
break;
default:
// Unknown event type - log but don't fail
if (eventType)
console.warn(`[Gemini Interactions] Unknown streaming event type: ${eventType}`);
// For non-event-type responses (like status updates), try to parse as interaction
else if (eventData.status)
_handleInteractionStatus(pt, eventData);
break;
}
}
/**
* Parse non-streaming response from the Interactions API
*/
function _parseNonStreamingResponse(
pt: IParticleTransmitter,
eventData: any,
parserCreationTimestamp: number,
timeToFirstEvent: number | undefined,
): void {
// Non-streaming returns the full interaction object
if (eventData.status) {
_handleInteractionComplete(pt, eventData, parserCreationTimestamp, timeToFirstEvent);
} else {
pt.setDialectTerminatingIssue('Invalid Interactions API response: missing status', null, 'srv-warn');
}
}
/**
* Handle a complete interaction response
*/
function _handleInteractionComplete(
pt: IParticleTransmitter,
interaction: any,
parserCreationTimestamp: number,
timeToFirstEvent: number | undefined,
): void {
// Handle status
const status = interaction.status;
switch (status) {
case 'completed':
// Process all outputs
if (interaction.outputs?.length) {
for (const output of interaction.outputs) {
_processOutput(pt, output);
}
}
break;
case 'in_progress':
// Background task still running - client should poll
pt.appendText('[Deep Research is running in the background. Status: in progress...]\n');
// Don't end the stream yet for background tasks
return;
case 'requires_action':
// Agent needs user input or function execution
pt.appendText('[Agent requires action - function call or user input needed]\n');
// Process any outputs that have been generated so far
if (interaction.outputs?.length) {
for (const output of interaction.outputs) {
_processOutput(pt, output);
}
}
break;
case 'failed':
pt.setTokenStopReason('cg-issue');
return pt.setDialectTerminatingIssue('Deep Research failed', IssueSymbols.Generic, false);
case 'cancelled':
pt.setTokenStopReason('cg-issue');
return pt.setDialectTerminatingIssue('Deep Research was cancelled', null, false);
default:
console.warn(`[Gemini Interactions] Unknown status: ${status}`);
}
// Update metrics
if (interaction.usage) {
const metricsUpdate: AixWire_Particles.CGSelectMetrics = {
TIn: interaction.usage.input_tokens,
TOut: interaction.usage.output_tokens,
};
if (timeToFirstEvent !== undefined)
metricsUpdate.dtStart = timeToFirstEvent;
metricsUpdate.dtAll = Date.now() - parserCreationTimestamp;
pt.updateMetrics(metricsUpdate);
}
}
/**
* Handle interaction status updates (for polling scenarios)
*/
function _handleInteractionStatus(
pt: IParticleTransmitter,
eventData: any,
): void {
const status = eventData.status;
switch (status) {
case 'in_progress':
// Still running - this might be a poll response
pt.appendText('[Research in progress...]\n');
break;
case 'completed':
case 'requires_action':
case 'failed':
case 'cancelled':
// Handle as complete interaction
_handleInteractionComplete(pt, eventData, Date.now(), 0);
break;
default:
console.warn(`[Gemini Interactions] Unknown status in poll: ${status}`);
}
}
/**
* Process a single output from the interaction
*/
function _processOutput(pt: IParticleTransmitter, output: any): void {
const outputType = output.type;
switch (outputType) {
case 'text':
if (output.text)
pt.appendText(output.text);
break;
case 'thought':
if (output.thought)
pt.appendReasoningText(output.thought);
break;
case 'image':
if (output.data && output.mime_type) {
pt.appendImageInline(
output.mime_type,
output.data,
'Gemini Generated Image',
'Gemini Deep Research',
'',
);
}
break;
case 'function_call':
// Handle function calls from the agent
pt.startFunctionCallInvocation(
output.id || null,
output.name,
'json_object',
output.arguments,
);
pt.endMessagePart();
break;
case 'google_search_result':
case 'url_context_result':
// These are metadata/context outputs - could be used for citations
// For now, we skip them as they're supplementary to the main text output
break;
default:
console.warn(`[Gemini Interactions] Unknown output type: ${outputType}`);
}
}
@@ -894,3 +894,239 @@ export namespace GeminiWire_API_Models_List {
});
}
//
// Interactions API (Deep Research Agent)
// https://ai.google.dev/gemini-api/docs/interactions
//
export namespace GeminiWire_API_Interactions {
export const postPath = '/v1beta/interactions';
export const streamingPostPath = '/v1beta/interactions?alt=sse';
export const getPath = (interactionId: string) => `/v1beta/interactions/${interactionId}`;
// Input content types for the Interactions API
const TextInput_schema = z.object({
type: z.literal('text'),
text: z.string(),
});
const ImageInput_schema = z.object({
type: z.literal('image'),
data: z.string().optional(), // base64-encoded
uri: z.string().optional(),
mime_type: z.string().optional(),
});
const AudioInput_schema = z.object({
type: z.literal('audio'),
data: z.string().optional(), // base64-encoded
mime_type: z.string().optional(),
});
const VideoInput_schema = z.object({
type: z.literal('video'),
data: z.string().optional(), // base64-encoded
mime_type: z.string().optional(),
});
const DocumentInput_schema = z.object({
type: z.literal('document'),
data: z.string().optional(), // base64-encoded
mime_type: z.string().optional(),
});
const FunctionResultInput_schema = z.object({
type: z.literal('function_result'),
name: z.string(),
call_id: z.string(),
result: z.any(),
});
const ContentPart_Input_schema = z.union([
TextInput_schema,
ImageInput_schema,
AudioInput_schema,
VideoInput_schema,
DocumentInput_schema,
FunctionResultInput_schema,
]);
const Turn_schema = z.object({
role: z.enum(['user', 'model']),
content: z.union([
z.array(ContentPart_Input_schema),
z.string(),
]),
});
// Function tool definition
const FunctionTool_schema = z.object({
type: z.literal('function'),
name: z.string(),
description: z.string(),
parameters: z.object({
type: z.literal('object'),
properties: z.record(z.string(), z.any()).optional(),
required: z.array(z.string()).optional(),
}).optional(),
});
// Built-in tools
const GoogleSearchTool_schema = z.object({
type: z.literal('google_search'),
});
const CodeExecutionTool_schema = z.object({
type: z.literal('code_execution'),
});
const UrlContextTool_schema = z.object({
type: z.literal('url_context'),
});
const McpServerTool_schema = z.object({
type: z.literal('mcp_server'),
name: z.string(),
url: z.string(),
});
const Tool_schema = z.union([
FunctionTool_schema,
GoogleSearchTool_schema,
CodeExecutionTool_schema,
UrlContextTool_schema,
McpServerTool_schema,
]);
// Generation config
const GenerationConfig_schema = z.object({
temperature: z.number().optional(),
max_output_tokens: z.number().optional(),
thinking_level: z.enum(['minimal', 'low', 'medium', 'high']).optional(),
});
// Request
export type Request = z.infer<typeof Request_schema>;
export const Request_schema = z.object({
// One of model or agent must be provided
model: z.string().optional(),
agent: z.string().optional(),
// Input can be a string, array of content parts, or array of turns
input: z.union([
z.string(),
z.array(ContentPart_Input_schema),
z.array(Turn_schema),
]),
// Optional configuration
tools: z.array(Tool_schema).optional(),
response_format: z.any().optional(), // JSON schema for structured output
generation_config: GenerationConfig_schema.optional(),
system_instruction: z.string().optional(),
// Stateful conversation
previous_interaction_id: z.string().optional(),
// API options
stream: z.boolean().optional(),
background: z.boolean().optional(), // Only for agents
store: z.boolean().optional(), // Default: true
});
// Output content types
const TextOutput_schema = z.object({
type: z.literal('text'),
text: z.string(),
});
const ThoughtOutput_schema = z.object({
type: z.literal('thought'),
thought: z.string(),
});
const ImageOutput_schema = z.object({
type: z.literal('image'),
data: z.string(), // base64-encoded
mime_type: z.string(),
});
const FunctionCallOutput_schema = z.object({
type: z.literal('function_call'),
id: z.string(),
name: z.string(),
arguments: z.any(),
});
const GoogleSearchResultOutput_schema = z.object({
type: z.literal('google_search_result'),
// Search result data
});
const UrlContextResultOutput_schema = z.object({
type: z.literal('url_context_result'),
// URL context data
});
const ContentPart_Output_schema = z.union([
TextOutput_schema,
ThoughtOutput_schema,
ImageOutput_schema,
FunctionCallOutput_schema,
GoogleSearchResultOutput_schema,
UrlContextResultOutput_schema,
]);
// Usage metadata
const Usage_schema = z.object({
input_tokens: z.number().optional(),
output_tokens: z.number().optional(),
total_tokens: z.number().optional(),
});
// Interaction status
const Status_enum = z.enum([
'in_progress',
'completed',
'requires_action',
'failed',
'cancelled',
]);
// Response (non-streaming)
export type Response = z.infer<typeof Response_schema>;
export const Response_schema = z.object({
id: z.string(),
status: Status_enum,
outputs: z.array(ContentPart_Output_schema).optional(),
usage: Usage_schema.optional(),
});
// Streaming event types
const ContentDeltaEvent_schema = z.object({
event_type: z.literal('content.delta'),
delta: z.union([
z.object({ type: z.literal('text'), text: z.string() }),
z.object({ type: z.literal('thought'), thought: z.string() }),
]),
});
const InteractionCompleteEvent_schema = z.object({
event_type: z.literal('interaction.complete'),
interaction: Response_schema,
});
export type StreamEvent = z.infer<typeof StreamEvent_schema>;
export const StreamEvent_schema = z.union([
ContentDeltaEvent_schema,
InteractionCompleteEvent_schema,
]);
}
@@ -35,6 +35,14 @@ export const geminiAccessSchema = z.object({
});
/**
* Build Gemini API access parameters for generateContent and other model APIs.
*
* @param access Gemini access configuration
* @param modelRefId Model ID to use in the path (e.g., 'models/gemini-pro')
* @param apiPath API path template (e.g., '/v1beta/{model=models/*}:generateContent')
* @param useV1Alpha Whether to use v1alpha API version (for experimental features)
*/
export function geminiAccess(access: GeminiAccessSchema, modelRefId: string | null, apiPath: string, useV1Alpha: boolean): { headers: HeadersInit, url: string } {
const geminiHost = llmsFixupHost(access.geminiHost || DEFAULT_GEMINI_HOST, apiPath);
@@ -2,7 +2,7 @@ import type { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/w
import type { ModelDescriptionSchema } from '../llm.server.types';
import { LLM_IF_GEM_CodeExecution, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_NoTemperature, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
import { LLM_IF_GEM_CodeExecution, LLM_IF_GEM_Interactions, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_NoTemperature, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
import { Release } from '~/common/app.release';
@@ -198,6 +198,23 @@ const _knownGeminiModels: ({
benchmark: undefined, // Non-benchmarkable because generates images
},
/// Agents (Interactions API)
// Deep Research Agent - Available via Interactions API
// https://ai.google.dev/gemini-api/docs/deep-research
{
id: 'agents/deep-research-pro-preview-12-2025',
labelOverride: 'Deep Research Pro Preview',
isPreview: true,
chatPrice: gemini25ProPricing, // Uses similar pricing to 2.5 Pro
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
parameterSpecs: [
{ paramId: 'llmVndGeminiInteractionsAgent' }, // Enables Interactions API with agent name
],
benchmark: undefined, // Agent-based, not benchmarkable
// Note: This model uses background=true by default for long-running research tasks
},
/// Generation 2.5
// 2.5 Pro (Stable) - Released June 17, 2025
@@ -710,6 +727,10 @@ const _sortOderIdPrefix: string[] = [
'models/gemini-3-pro',
'models/gemini-3-',
// Agents (Interactions API)
'agents/deep-research-pro-preview',
'agents/',
'models/gemini-exp',
'models/gemini-2.5-pro',
@@ -94,6 +94,7 @@ const ModelParameterSpec_schema = z.object({
'llmVndGeminiShowThoughts',
'llmVndGeminiThinkingBudget',
'llmVndGeminiThinkingLevel',
'llmVndGeminiInteractionsAgent',
// 'llmVndGeminiUrlContext',
// Moonshot
'llmVndMoonshotWebSearch',