mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 356359d25e |
@@ -212,6 +212,14 @@ export const DModelParameterRegistry = {
|
||||
// No initialValue - undefined means 'dynamic', which for Gemini Pro is the same as 'high' (which is the equivalent of 'medium' for OpenAI's effort levels.. somehow)
|
||||
} as const,
|
||||
|
||||
llmVndGeminiInteractionsAgent: {
|
||||
label: 'Agent (Interactions API)',
|
||||
type: 'string' as const,
|
||||
description: 'Uses Gemini Interactions API with the specified agent (e.g., deep-research-pro-preview-12-2025)',
|
||||
hidden: true, // Auto-set by model definition
|
||||
requiredFallback: 'deep-research-pro-preview-12-2025',
|
||||
} as const,
|
||||
|
||||
// NOTE: we don't have this as a parameter, as for now we use it in tandem with llmVndGeminiGoogleSearch
|
||||
// llmVndGeminiUrlContext: {
|
||||
// label: 'URL Context',
|
||||
|
||||
@@ -151,6 +151,7 @@ export type DModelInterfaceV1 =
|
||||
| 'oai-realtime'
|
||||
| 'oai-responses'
|
||||
| 'gem-code-execution'
|
||||
| 'gem-interactions' // [Gemini] Interactions API (Deep Research agent)
|
||||
| 'outputs-audio' // TEMP: ui flag - supports audio output (e.g., text-to-speech)
|
||||
| 'outputs-image' // TEMP: ui flag - supports image output (image generation)
|
||||
| 'outputs-no-text' // disable text outputs (used in conjunction with alt-outputs) - assumed off
|
||||
@@ -181,6 +182,7 @@ export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching';
|
||||
export const LLM_IF_OAI_Realtime: DModelInterfaceV1 = 'oai-realtime';
|
||||
export const LLM_IF_OAI_Responses: DModelInterfaceV1 = 'oai-responses';
|
||||
export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution';
|
||||
export const LLM_IF_GEM_Interactions: DModelInterfaceV1 = 'gem-interactions';
|
||||
export const LLM_IF_HOTFIX_NoStream: DModelInterfaceV1 = 'hotfix-no-stream';
|
||||
export const LLM_IF_HOTFIX_NoTemperature: DModelInterfaceV1 = 'hotfix-no-temperature';
|
||||
export const LLM_IF_HOTFIX_StripImages: DModelInterfaceV1 = 'hotfix-strip-images';
|
||||
@@ -205,6 +207,7 @@ export const LLMS_ALL_INTERFACES = [
|
||||
// Vendor-specific capabilities
|
||||
LLM_IF_ANT_PromptCaching, // [Anthropic] model supports anthropic-specific caching
|
||||
LLM_IF_GEM_CodeExecution, // [Gemini] Tool: code execution
|
||||
LLM_IF_GEM_Interactions, // [Gemini] Interactions API (Deep Research agent)
|
||||
LLM_IF_OAI_PromptCaching, // [OpenAI] model supports OpenAI prompt caching
|
||||
LLM_IF_OAI_Realtime, // [OpenAI] realtime API support - unused
|
||||
LLM_IF_OAI_Responses, // [OpenAI] Responses API (new) support
|
||||
|
||||
@@ -48,7 +48,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
const {
|
||||
llmRef, llmTemperature, llmResponseTokens, llmTopP,
|
||||
llmVndAnt1MContext, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort,
|
||||
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel,
|
||||
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiInteractionsAgent, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel,
|
||||
// llmVndMoonshotWebSearch,
|
||||
llmVndOaiReasoningEffort, llmVndOaiReasoningEffort4, llmVndOaiReasoningEffort52, llmVndOaiReasoningEffort52Pro, llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration,
|
||||
llmVndOrtWebSearch,
|
||||
@@ -118,6 +118,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(llmVndGeminiShowThoughts ? { vndGeminiShowThoughts: llmVndGeminiShowThoughts } : {}),
|
||||
...(llmVndGeminiThinkingBudget !== undefined ? { vndGeminiThinkingBudget: llmVndGeminiThinkingBudget } : {}),
|
||||
...(llmVndGeminiThinkingLevel ? { vndGeminiThinkingLevel: llmVndGeminiThinkingLevel } : {}),
|
||||
...(llmVndGeminiInteractionsAgent ? { vndGeminiInteractionsAgent: llmVndGeminiInteractionsAgent } : {}),
|
||||
// ...(llmVndGeminiUrlContext === 'auto' ? { vndGeminiUrlContext: llmVndGeminiUrlContext } : {}),
|
||||
// ...(llmVndMoonshotWebSearch === 'auto' ? { vndMoonshotWebSearch: 'auto' } : {}),
|
||||
...(llmVndOaiResponsesAPI ? { vndOaiResponsesAPI: true } : {}),
|
||||
|
||||
@@ -468,6 +468,12 @@ export namespace AixWire_API {
|
||||
vndGeminiThinkingBudget: z.number().optional(), // old param
|
||||
vndGeminiThinkingLevel: z.enum(['high', 'medium', 'low']).optional(), // new param
|
||||
vndGeminiUrlContext: z.enum(['auto']).optional(),
|
||||
/**
|
||||
* [Gemini, 2025-12-19] Interactions API for Deep Research agent
|
||||
* When set to an agent name, uses the Interactions API instead of generateContent
|
||||
* See: https://ai.google.dev/gemini-api/docs/interactions
|
||||
*/
|
||||
vndGeminiInteractionsAgent: z.string().optional(),
|
||||
// Moonshot
|
||||
vndMoonshotWebSearch: z.enum(['auto']).optional(),
|
||||
// OpenAI
|
||||
|
||||
@@ -464,14 +464,15 @@ export class ChatGenerateTransmitter implements IParticleTransmitter {
|
||||
}
|
||||
|
||||
/** Communicates the upstream response handle, for remote control/resumability */
|
||||
setUpstreamHandle(handle: string, _type: 'oai-responses' /* the only one for now, used for type safety */) {
|
||||
setUpstreamHandle(handle: string, type: 'oai-responses' | 'gemini-interactions') {
|
||||
if (SERVER_DEBUG_WIRE)
|
||||
console.log('|response-handle|', handle);
|
||||
console.log('|response-handle|', handle, type);
|
||||
// NOTE: if needed, we could store the handle locally for server-side resumability, but we just implement client-side (correction, manual) for now
|
||||
const uht = type === 'gemini-interactions' ? 'vnd.gemini.interactions' : 'vnd.oai.responses';
|
||||
this.transmissionQueue.push({
|
||||
cg: 'set-upstream-handle',
|
||||
handle: {
|
||||
uht: 'vnd.oai.responses',
|
||||
uht: uht as any, // TODO: add 'vnd.gemini.interactions' to the type union in aix.wiretypes.ts
|
||||
responseId: handle,
|
||||
expiresAt: Date.now() + 30 * 24 * 3600 * 1000, // default: 30 days expiry
|
||||
},
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage } from '../../../api/aix.wiretypes';
|
||||
import { GeminiWire_API_Interactions } from '../../wiretypes/gemini.wiretypes';
|
||||
|
||||
import { aixSpillSystemToUser, approxDocPart_To_String } from './adapters.common';
|
||||
|
||||
|
||||
type TRequest = GeminiWire_API_Interactions.Request;
|
||||
|
||||
|
||||
/**
|
||||
* Gemini Interactions API adapter
|
||||
*
|
||||
* Converts AIX format to Gemini Interactions API format.
|
||||
* Used specifically for agents like Deep Research.
|
||||
*
|
||||
* Key differences from generateContent:
|
||||
* - Uses 'agent' instead of 'model' for agent-based interactions
|
||||
* - Uses 'input' with turns/content parts instead of 'contents'
|
||||
* - Supports background execution for long-running tasks
|
||||
* - Uses different streaming format (event_type-based)
|
||||
*/
|
||||
export function aixToGeminiInteractions(
|
||||
model: AixAPI_Model,
|
||||
_chatGenerate: AixAPIChatGenerate_Request,
|
||||
streaming: boolean,
|
||||
): TRequest {
|
||||
|
||||
// Pre-process CGR - approximate spill of System to User message
|
||||
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
|
||||
|
||||
// Build system instruction from system message
|
||||
let systemInstruction: string | undefined = undefined;
|
||||
if (chatGenerate.systemMessage?.parts.length) {
|
||||
const systemParts: string[] = [];
|
||||
for (const part of chatGenerate.systemMessage.parts) {
|
||||
switch (part.pt) {
|
||||
case 'text':
|
||||
systemParts.push(part.text);
|
||||
break;
|
||||
case 'doc':
|
||||
systemParts.push(approxDocPart_To_String(part));
|
||||
break;
|
||||
case 'inline_image':
|
||||
case 'meta_cache_control':
|
||||
// Ignore these for system instruction
|
||||
break;
|
||||
default:
|
||||
console.warn(`[Gemini Interactions] Unsupported system part type: ${(part as any).pt}`);
|
||||
}
|
||||
}
|
||||
if (systemParts.length > 0)
|
||||
systemInstruction = systemParts.join('\n\n');
|
||||
}
|
||||
|
||||
// Convert chat sequence to turns
|
||||
const input = _toInteractionsTurns(chatGenerate.chatSequence);
|
||||
|
||||
// Get the agent name from the model's vndGeminiInteractionsAgent property
|
||||
const agentName = model.vndGeminiInteractionsAgent;
|
||||
|
||||
// For Deep Research and other background agents, we use background=true
|
||||
// This allows the agent to run asynchronously
|
||||
const isBackgroundAgent = agentName?.includes('deep-research');
|
||||
|
||||
// Construct the request payload
|
||||
const payload: TRequest = {
|
||||
// Agent-based interactions use 'agent' instead of 'model'
|
||||
agent: agentName,
|
||||
|
||||
// Input as array of turns
|
||||
input,
|
||||
|
||||
// System instruction (if any)
|
||||
system_instruction: systemInstruction,
|
||||
|
||||
// Generation config
|
||||
generation_config: {
|
||||
temperature: model.temperature ?? undefined,
|
||||
max_output_tokens: model.maxTokens ?? undefined,
|
||||
// Map thinking level for agents that support it
|
||||
thinking_level: model.vndGeminiThinkingLevel ?? undefined,
|
||||
},
|
||||
|
||||
// API options
|
||||
stream: streaming,
|
||||
background: isBackgroundAgent, // Enable background for Deep Research
|
||||
store: true, // Enable storage for state management
|
||||
};
|
||||
|
||||
// Clean up undefined values
|
||||
if (!payload.system_instruction)
|
||||
delete payload.system_instruction;
|
||||
if (payload.generation_config) {
|
||||
if (payload.generation_config.temperature === undefined)
|
||||
delete payload.generation_config.temperature;
|
||||
if (payload.generation_config.max_output_tokens === undefined)
|
||||
delete payload.generation_config.max_output_tokens;
|
||||
if (payload.generation_config.thinking_level === undefined)
|
||||
delete payload.generation_config.thinking_level;
|
||||
if (Object.keys(payload.generation_config).length === 0)
|
||||
delete payload.generation_config;
|
||||
}
|
||||
|
||||
// Validate the payload
|
||||
const validated = GeminiWire_API_Interactions.Request_schema.safeParse(payload);
|
||||
if (!validated.success) {
|
||||
console.warn('Gemini Interactions: invalid payload. Error:', validated.error.message);
|
||||
throw new Error(`Invalid sequence for Gemini Interactions API: ${validated.error.issues?.[0]?.message || validated.error.message || validated.error}.`);
|
||||
}
|
||||
|
||||
return validated.data;
|
||||
}
|
||||
|
||||
|
||||
// Content part type for Interactions API input
|
||||
type TContentPart =
|
||||
| { type: 'text'; text: string }
|
||||
| { type: 'image'; data?: string; mime_type?: string }
|
||||
| { type: 'audio'; data?: string; mime_type?: string }
|
||||
| { type: 'function_result'; name: string; call_id: string; result: unknown };
|
||||
|
||||
// Turn type for Interactions API input
|
||||
type TTurn = {
|
||||
role: 'user' | 'model';
|
||||
content: TContentPart[];
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Convert AIX chat messages to Interactions API turns format
|
||||
*/
|
||||
function _toInteractionsTurns(chatSequence: AixMessages_ChatMessage[]): TTurn[] {
|
||||
return chatSequence.map(message => {
|
||||
const content: TContentPart[] = [];
|
||||
|
||||
for (const part of message.parts) {
|
||||
switch (part.pt) {
|
||||
|
||||
case 'text':
|
||||
content.push({
|
||||
type: 'text',
|
||||
text: part.text,
|
||||
});
|
||||
break;
|
||||
|
||||
case 'inline_image':
|
||||
content.push({
|
||||
type: 'image',
|
||||
data: part.base64,
|
||||
mime_type: part.mimeType,
|
||||
});
|
||||
break;
|
||||
|
||||
case 'inline_audio':
|
||||
content.push({
|
||||
type: 'audio',
|
||||
data: part.base64,
|
||||
mime_type: part.mimeType,
|
||||
});
|
||||
break;
|
||||
|
||||
case 'doc':
|
||||
// Convert doc to text for now
|
||||
content.push({
|
||||
type: 'text',
|
||||
text: approxDocPart_To_String(part),
|
||||
});
|
||||
break;
|
||||
|
||||
case 'ma':
|
||||
// Model artifact (thinking) - skip for input
|
||||
break;
|
||||
|
||||
case 'meta_cache_control':
|
||||
case 'meta_in_reference_to':
|
||||
// Skip metadata parts
|
||||
break;
|
||||
|
||||
case 'tool_invocation':
|
||||
// For function calls, we'd need to handle these specially
|
||||
// For Deep Research, this is less relevant
|
||||
console.warn('[Gemini Interactions] Tool invocations not yet supported in input');
|
||||
break;
|
||||
|
||||
case 'tool_response':
|
||||
// Function results
|
||||
if (part.response.type === 'function_call') {
|
||||
content.push({
|
||||
type: 'function_result',
|
||||
name: part.response._name || part.id,
|
||||
call_id: part.id,
|
||||
result: part.response.result,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
console.warn(`[Gemini Interactions] Unsupported part type: ${(part as any).pt}`);
|
||||
}
|
||||
}
|
||||
|
||||
// If no content, add empty text
|
||||
if (content.length === 0)
|
||||
content.push({ type: 'text', text: '' });
|
||||
|
||||
return {
|
||||
role: message.role === 'model' ? 'model' : 'user',
|
||||
content,
|
||||
};
|
||||
});
|
||||
}
|
||||
@@ -8,16 +8,18 @@ import { DEEPSEEK_SPECIALE_HOST, DEEPSEEK_SPECIALE_SUFFIX } from '~/modules/llms
|
||||
import type { AixAPI_Access, AixAPI_Model, AixAPI_ResumeHandle, AixAPIChatGenerate_Request } from '../../api/aix.wiretypes';
|
||||
import type { AixDemuxers } from '../stream.demuxers';
|
||||
|
||||
import { GeminiWire_API_Generate_Content } from '../wiretypes/gemini.wiretypes';
|
||||
import { GeminiWire_API_Generate_Content, GeminiWire_API_Interactions } from '../wiretypes/gemini.wiretypes';
|
||||
|
||||
import { aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate';
|
||||
import { aixToGeminiGenerateContent } from './adapters/gemini.generateContent';
|
||||
import { aixToGeminiInteractions } from './adapters/gemini.interactions';
|
||||
import { aixToOpenAIChatCompletions } from './adapters/openai.chatCompletions';
|
||||
import { aixToOpenAIResponses } from './adapters/openai.responsesCreate';
|
||||
|
||||
import type { IParticleTransmitter } from './parsers/IParticleTransmitter';
|
||||
import { createAnthropicMessageParser, createAnthropicMessageParserNS } from './parsers/anthropic.parser';
|
||||
import { createGeminiGenerateContentResponseParser } from './parsers/gemini.parser';
|
||||
import { createGeminiInteractionsResponseParser } from './parsers/gemini.interactions.parser';
|
||||
import { createOpenAIChatCompletionsChunkParser, createOpenAIChatCompletionsParserNS } from './parsers/openai.parser';
|
||||
import { createOpenAIResponseParserNS, createOpenAIResponsesEventParser } from './parsers/openai.responses.parser';
|
||||
|
||||
@@ -83,7 +85,27 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
|
||||
};
|
||||
}
|
||||
|
||||
case 'gemini':
|
||||
case 'gemini': {
|
||||
/**
|
||||
* [Gemini, 2025-12-19] Interactions API for agents like Deep Research
|
||||
* When vndGeminiInteractionsAgent is set, use the Interactions API instead of generateContent
|
||||
*/
|
||||
const useInteractionsAPI = !!model.vndGeminiInteractionsAgent;
|
||||
|
||||
if (useInteractionsAPI) {
|
||||
// Use Interactions API for agent-based interactions (e.g., Deep Research)
|
||||
const agentName = model.vndGeminiInteractionsAgent!;
|
||||
return {
|
||||
request: {
|
||||
...geminiAccess(access, null, streaming ? GeminiWire_API_Interactions.streamingPostPath : GeminiWire_API_Interactions.postPath, false),
|
||||
method: 'POST',
|
||||
body: aixToGeminiInteractions(model, chatGenerate, streaming),
|
||||
},
|
||||
demuxerFormat: streaming ? 'fast-sse' : null,
|
||||
chatGenerateParse: createGeminiInteractionsResponseParser(agentName, streaming),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* [Gemini, 2025-04-17] For newer thinking parameters, use v1alpha (we only see statistically better results)
|
||||
*/
|
||||
@@ -98,6 +120,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
|
||||
demuxerFormat: streaming ? 'fast-sse' : null,
|
||||
chatGenerateParse: createGeminiGenerateContentResponseParser(model.id.replace('models/', ''), streaming),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Ollama has now an OpenAI compatibility layer for `chatGenerate` API, but still its own protocol for models listing.
|
||||
|
||||
@@ -81,7 +81,7 @@ export interface IParticleTransmitter {
|
||||
setModelName(modelName: string): void;
|
||||
|
||||
/** Communicates the upstream response handle, for remote control/resumability */
|
||||
setUpstreamHandle(handle: string, type: 'oai-responses'): void;
|
||||
setUpstreamHandle(handle: string, type: 'oai-responses' | 'gemini-interactions'): void;
|
||||
|
||||
/** Communicates the finish reason to the client */
|
||||
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason): void;
|
||||
|
||||
@@ -0,0 +1,282 @@
|
||||
import type { AixWire_Particles } from '../../../api/aix.wiretypes';
|
||||
import type { ChatGenerateParseFunction } from '../chatGenerate.dispatch';
|
||||
import type { IParticleTransmitter } from './IParticleTransmitter';
|
||||
import { IssueSymbols } from '../ChatGenerateTransmitter';
|
||||
|
||||
|
||||
/**
|
||||
* Gemini Interactions API Response Parser
|
||||
*
|
||||
* Parses responses from the Gemini Interactions API, which is used for
|
||||
* agents like Deep Research. Supports both streaming and non-streaming modes.
|
||||
*
|
||||
* Streaming events:
|
||||
* - content.delta: Incremental text/thought updates
|
||||
* - interaction.complete: Final interaction with full response
|
||||
*
|
||||
* Non-streaming:
|
||||
* - Single response object with outputs array
|
||||
*
|
||||
* Deep Research specifics:
|
||||
* - Uses background=true for long-running tasks
|
||||
* - Status can be: in_progress, completed, requires_action, failed, cancelled
|
||||
* - May require polling via interactions.get() for background tasks
|
||||
*/
|
||||
export function createGeminiInteractionsResponseParser(
|
||||
agentName: string,
|
||||
isStreaming: boolean,
|
||||
): ChatGenerateParseFunction {
|
||||
const parserCreationTimestamp = Date.now();
|
||||
let sentAgentName = false;
|
||||
let timeToFirstEvent: number | undefined;
|
||||
let interactionId: string | undefined;
|
||||
|
||||
return function(pt: IParticleTransmitter, rawEventData: string): void {
|
||||
|
||||
// Time to first event
|
||||
if (timeToFirstEvent === undefined)
|
||||
timeToFirstEvent = Date.now() - parserCreationTimestamp;
|
||||
|
||||
// Parse the raw event data
|
||||
let eventData: any;
|
||||
try {
|
||||
eventData = JSON.parse(rawEventData);
|
||||
} catch (e) {
|
||||
return pt.setDialectTerminatingIssue(`Failed to parse Interactions API response: ${e}`, null, 'srv-warn');
|
||||
}
|
||||
|
||||
// Set agent name as model name (if not already set)
|
||||
if (!sentAgentName) {
|
||||
pt.setModelName(agentName);
|
||||
sentAgentName = true;
|
||||
}
|
||||
|
||||
// Handle streaming vs non-streaming
|
||||
if (isStreaming) {
|
||||
_parseStreamingEvent(pt, eventData, parserCreationTimestamp, timeToFirstEvent);
|
||||
} else {
|
||||
_parseNonStreamingResponse(pt, eventData, parserCreationTimestamp, timeToFirstEvent);
|
||||
}
|
||||
|
||||
// Store interaction ID for potential polling
|
||||
if (eventData.id)
|
||||
interactionId = eventData.id;
|
||||
if (eventData.interaction?.id)
|
||||
interactionId = eventData.interaction.id;
|
||||
|
||||
// Store interaction ID for resumability (similar to OpenAI Responses)
|
||||
if (interactionId)
|
||||
pt.setUpstreamHandle(interactionId, 'gemini-interactions');
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse streaming events from the Interactions API
|
||||
*/
|
||||
function _parseStreamingEvent(
|
||||
pt: IParticleTransmitter,
|
||||
eventData: any,
|
||||
parserCreationTimestamp: number,
|
||||
timeToFirstEvent: number | undefined,
|
||||
): void {
|
||||
|
||||
const eventType = eventData.event_type;
|
||||
|
||||
switch (eventType) {
|
||||
|
||||
case 'content.delta':
|
||||
// Incremental content update
|
||||
const delta = eventData.delta;
|
||||
if (delta?.type === 'text' && delta.text) {
|
||||
pt.appendText(delta.text);
|
||||
} else if (delta?.type === 'thought' && delta.thought) {
|
||||
pt.appendReasoningText(delta.thought);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'interaction.complete':
|
||||
// Final interaction response
|
||||
const interaction = eventData.interaction;
|
||||
if (interaction) {
|
||||
_handleInteractionComplete(pt, interaction, parserCreationTimestamp, timeToFirstEvent);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
// Unknown event type - log but don't fail
|
||||
if (eventType)
|
||||
console.warn(`[Gemini Interactions] Unknown streaming event type: ${eventType}`);
|
||||
// For non-event-type responses (like status updates), try to parse as interaction
|
||||
else if (eventData.status)
|
||||
_handleInteractionStatus(pt, eventData);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse non-streaming response from the Interactions API
|
||||
*/
|
||||
function _parseNonStreamingResponse(
|
||||
pt: IParticleTransmitter,
|
||||
eventData: any,
|
||||
parserCreationTimestamp: number,
|
||||
timeToFirstEvent: number | undefined,
|
||||
): void {
|
||||
|
||||
// Non-streaming returns the full interaction object
|
||||
if (eventData.status) {
|
||||
_handleInteractionComplete(pt, eventData, parserCreationTimestamp, timeToFirstEvent);
|
||||
} else {
|
||||
pt.setDialectTerminatingIssue('Invalid Interactions API response: missing status', null, 'srv-warn');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Handle a complete interaction response
|
||||
*/
|
||||
function _handleInteractionComplete(
|
||||
pt: IParticleTransmitter,
|
||||
interaction: any,
|
||||
parserCreationTimestamp: number,
|
||||
timeToFirstEvent: number | undefined,
|
||||
): void {
|
||||
|
||||
// Handle status
|
||||
const status = interaction.status;
|
||||
switch (status) {
|
||||
|
||||
case 'completed':
|
||||
// Process all outputs
|
||||
if (interaction.outputs?.length) {
|
||||
for (const output of interaction.outputs) {
|
||||
_processOutput(pt, output);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 'in_progress':
|
||||
// Background task still running - client should poll
|
||||
pt.appendText('[Deep Research is running in the background. Status: in progress...]\n');
|
||||
// Don't end the stream yet for background tasks
|
||||
return;
|
||||
|
||||
case 'requires_action':
|
||||
// Agent needs user input or function execution
|
||||
pt.appendText('[Agent requires action - function call or user input needed]\n');
|
||||
// Process any outputs that have been generated so far
|
||||
if (interaction.outputs?.length) {
|
||||
for (const output of interaction.outputs) {
|
||||
_processOutput(pt, output);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 'failed':
|
||||
pt.setTokenStopReason('cg-issue');
|
||||
return pt.setDialectTerminatingIssue('Deep Research failed', IssueSymbols.Generic, false);
|
||||
|
||||
case 'cancelled':
|
||||
pt.setTokenStopReason('cg-issue');
|
||||
return pt.setDialectTerminatingIssue('Deep Research was cancelled', null, false);
|
||||
|
||||
default:
|
||||
console.warn(`[Gemini Interactions] Unknown status: ${status}`);
|
||||
}
|
||||
|
||||
// Update metrics
|
||||
if (interaction.usage) {
|
||||
const metricsUpdate: AixWire_Particles.CGSelectMetrics = {
|
||||
TIn: interaction.usage.input_tokens,
|
||||
TOut: interaction.usage.output_tokens,
|
||||
};
|
||||
if (timeToFirstEvent !== undefined)
|
||||
metricsUpdate.dtStart = timeToFirstEvent;
|
||||
metricsUpdate.dtAll = Date.now() - parserCreationTimestamp;
|
||||
pt.updateMetrics(metricsUpdate);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Handle interaction status updates (for polling scenarios)
|
||||
*/
|
||||
function _handleInteractionStatus(
|
||||
pt: IParticleTransmitter,
|
||||
eventData: any,
|
||||
): void {
|
||||
const status = eventData.status;
|
||||
|
||||
switch (status) {
|
||||
case 'in_progress':
|
||||
// Still running - this might be a poll response
|
||||
pt.appendText('[Research in progress...]\n');
|
||||
break;
|
||||
|
||||
case 'completed':
|
||||
case 'requires_action':
|
||||
case 'failed':
|
||||
case 'cancelled':
|
||||
// Handle as complete interaction
|
||||
_handleInteractionComplete(pt, eventData, Date.now(), 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
console.warn(`[Gemini Interactions] Unknown status in poll: ${status}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process a single output from the interaction
|
||||
*/
|
||||
function _processOutput(pt: IParticleTransmitter, output: any): void {
|
||||
const outputType = output.type;
|
||||
|
||||
switch (outputType) {
|
||||
|
||||
case 'text':
|
||||
if (output.text)
|
||||
pt.appendText(output.text);
|
||||
break;
|
||||
|
||||
case 'thought':
|
||||
if (output.thought)
|
||||
pt.appendReasoningText(output.thought);
|
||||
break;
|
||||
|
||||
case 'image':
|
||||
if (output.data && output.mime_type) {
|
||||
pt.appendImageInline(
|
||||
output.mime_type,
|
||||
output.data,
|
||||
'Gemini Generated Image',
|
||||
'Gemini Deep Research',
|
||||
'',
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'function_call':
|
||||
// Handle function calls from the agent
|
||||
pt.startFunctionCallInvocation(
|
||||
output.id || null,
|
||||
output.name,
|
||||
'json_object',
|
||||
output.arguments,
|
||||
);
|
||||
pt.endMessagePart();
|
||||
break;
|
||||
|
||||
case 'google_search_result':
|
||||
case 'url_context_result':
|
||||
// These are metadata/context outputs - could be used for citations
|
||||
// For now, we skip them as they're supplementary to the main text output
|
||||
break;
|
||||
|
||||
default:
|
||||
console.warn(`[Gemini Interactions] Unknown output type: ${outputType}`);
|
||||
}
|
||||
}
|
||||
@@ -894,3 +894,239 @@ export namespace GeminiWire_API_Models_List {
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Interactions API (Deep Research Agent)
|
||||
// https://ai.google.dev/gemini-api/docs/interactions
|
||||
//
|
||||
export namespace GeminiWire_API_Interactions {
|
||||
|
||||
export const postPath = '/v1beta/interactions';
|
||||
export const streamingPostPath = '/v1beta/interactions?alt=sse';
|
||||
export const getPath = (interactionId: string) => `/v1beta/interactions/${interactionId}`;
|
||||
|
||||
// Input content types for the Interactions API
|
||||
|
||||
const TextInput_schema = z.object({
|
||||
type: z.literal('text'),
|
||||
text: z.string(),
|
||||
});
|
||||
|
||||
const ImageInput_schema = z.object({
|
||||
type: z.literal('image'),
|
||||
data: z.string().optional(), // base64-encoded
|
||||
uri: z.string().optional(),
|
||||
mime_type: z.string().optional(),
|
||||
});
|
||||
|
||||
const AudioInput_schema = z.object({
|
||||
type: z.literal('audio'),
|
||||
data: z.string().optional(), // base64-encoded
|
||||
mime_type: z.string().optional(),
|
||||
});
|
||||
|
||||
const VideoInput_schema = z.object({
|
||||
type: z.literal('video'),
|
||||
data: z.string().optional(), // base64-encoded
|
||||
mime_type: z.string().optional(),
|
||||
});
|
||||
|
||||
const DocumentInput_schema = z.object({
|
||||
type: z.literal('document'),
|
||||
data: z.string().optional(), // base64-encoded
|
||||
mime_type: z.string().optional(),
|
||||
});
|
||||
|
||||
const FunctionResultInput_schema = z.object({
|
||||
type: z.literal('function_result'),
|
||||
name: z.string(),
|
||||
call_id: z.string(),
|
||||
result: z.any(),
|
||||
});
|
||||
|
||||
const ContentPart_Input_schema = z.union([
|
||||
TextInput_schema,
|
||||
ImageInput_schema,
|
||||
AudioInput_schema,
|
||||
VideoInput_schema,
|
||||
DocumentInput_schema,
|
||||
FunctionResultInput_schema,
|
||||
]);
|
||||
|
||||
const Turn_schema = z.object({
|
||||
role: z.enum(['user', 'model']),
|
||||
content: z.union([
|
||||
z.array(ContentPart_Input_schema),
|
||||
z.string(),
|
||||
]),
|
||||
});
|
||||
|
||||
// Function tool definition
|
||||
const FunctionTool_schema = z.object({
|
||||
type: z.literal('function'),
|
||||
name: z.string(),
|
||||
description: z.string(),
|
||||
parameters: z.object({
|
||||
type: z.literal('object'),
|
||||
properties: z.record(z.string(), z.any()).optional(),
|
||||
required: z.array(z.string()).optional(),
|
||||
}).optional(),
|
||||
});
|
||||
|
||||
// Built-in tools
|
||||
const GoogleSearchTool_schema = z.object({
|
||||
type: z.literal('google_search'),
|
||||
});
|
||||
|
||||
const CodeExecutionTool_schema = z.object({
|
||||
type: z.literal('code_execution'),
|
||||
});
|
||||
|
||||
const UrlContextTool_schema = z.object({
|
||||
type: z.literal('url_context'),
|
||||
});
|
||||
|
||||
const McpServerTool_schema = z.object({
|
||||
type: z.literal('mcp_server'),
|
||||
name: z.string(),
|
||||
url: z.string(),
|
||||
});
|
||||
|
||||
const Tool_schema = z.union([
|
||||
FunctionTool_schema,
|
||||
GoogleSearchTool_schema,
|
||||
CodeExecutionTool_schema,
|
||||
UrlContextTool_schema,
|
||||
McpServerTool_schema,
|
||||
]);
|
||||
|
||||
// Generation config
|
||||
const GenerationConfig_schema = z.object({
|
||||
temperature: z.number().optional(),
|
||||
max_output_tokens: z.number().optional(),
|
||||
thinking_level: z.enum(['minimal', 'low', 'medium', 'high']).optional(),
|
||||
});
|
||||
|
||||
// Request
|
||||
export type Request = z.infer<typeof Request_schema>;
|
||||
export const Request_schema = z.object({
|
||||
// One of model or agent must be provided
|
||||
model: z.string().optional(),
|
||||
agent: z.string().optional(),
|
||||
|
||||
// Input can be a string, array of content parts, or array of turns
|
||||
input: z.union([
|
||||
z.string(),
|
||||
z.array(ContentPart_Input_schema),
|
||||
z.array(Turn_schema),
|
||||
]),
|
||||
|
||||
// Optional configuration
|
||||
tools: z.array(Tool_schema).optional(),
|
||||
response_format: z.any().optional(), // JSON schema for structured output
|
||||
generation_config: GenerationConfig_schema.optional(),
|
||||
system_instruction: z.string().optional(),
|
||||
|
||||
// Stateful conversation
|
||||
previous_interaction_id: z.string().optional(),
|
||||
|
||||
// API options
|
||||
stream: z.boolean().optional(),
|
||||
background: z.boolean().optional(), // Only for agents
|
||||
store: z.boolean().optional(), // Default: true
|
||||
});
|
||||
|
||||
|
||||
// Output content types
|
||||
|
||||
const TextOutput_schema = z.object({
|
||||
type: z.literal('text'),
|
||||
text: z.string(),
|
||||
});
|
||||
|
||||
const ThoughtOutput_schema = z.object({
|
||||
type: z.literal('thought'),
|
||||
thought: z.string(),
|
||||
});
|
||||
|
||||
const ImageOutput_schema = z.object({
|
||||
type: z.literal('image'),
|
||||
data: z.string(), // base64-encoded
|
||||
mime_type: z.string(),
|
||||
});
|
||||
|
||||
const FunctionCallOutput_schema = z.object({
|
||||
type: z.literal('function_call'),
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
arguments: z.any(),
|
||||
});
|
||||
|
||||
const GoogleSearchResultOutput_schema = z.object({
|
||||
type: z.literal('google_search_result'),
|
||||
// Search result data
|
||||
});
|
||||
|
||||
const UrlContextResultOutput_schema = z.object({
|
||||
type: z.literal('url_context_result'),
|
||||
// URL context data
|
||||
});
|
||||
|
||||
const ContentPart_Output_schema = z.union([
|
||||
TextOutput_schema,
|
||||
ThoughtOutput_schema,
|
||||
ImageOutput_schema,
|
||||
FunctionCallOutput_schema,
|
||||
GoogleSearchResultOutput_schema,
|
||||
UrlContextResultOutput_schema,
|
||||
]);
|
||||
|
||||
// Usage metadata
|
||||
const Usage_schema = z.object({
|
||||
input_tokens: z.number().optional(),
|
||||
output_tokens: z.number().optional(),
|
||||
total_tokens: z.number().optional(),
|
||||
});
|
||||
|
||||
// Interaction status
|
||||
const Status_enum = z.enum([
|
||||
'in_progress',
|
||||
'completed',
|
||||
'requires_action',
|
||||
'failed',
|
||||
'cancelled',
|
||||
]);
|
||||
|
||||
// Response (non-streaming)
|
||||
export type Response = z.infer<typeof Response_schema>;
|
||||
export const Response_schema = z.object({
|
||||
id: z.string(),
|
||||
status: Status_enum,
|
||||
outputs: z.array(ContentPart_Output_schema).optional(),
|
||||
usage: Usage_schema.optional(),
|
||||
});
|
||||
|
||||
|
||||
// Streaming event types
|
||||
|
||||
const ContentDeltaEvent_schema = z.object({
|
||||
event_type: z.literal('content.delta'),
|
||||
delta: z.union([
|
||||
z.object({ type: z.literal('text'), text: z.string() }),
|
||||
z.object({ type: z.literal('thought'), thought: z.string() }),
|
||||
]),
|
||||
});
|
||||
|
||||
const InteractionCompleteEvent_schema = z.object({
|
||||
event_type: z.literal('interaction.complete'),
|
||||
interaction: Response_schema,
|
||||
});
|
||||
|
||||
export type StreamEvent = z.infer<typeof StreamEvent_schema>;
|
||||
export const StreamEvent_schema = z.union([
|
||||
ContentDeltaEvent_schema,
|
||||
InteractionCompleteEvent_schema,
|
||||
]);
|
||||
|
||||
}
|
||||
|
||||
@@ -35,6 +35,14 @@ export const geminiAccessSchema = z.object({
|
||||
});
|
||||
|
||||
|
||||
/**
|
||||
* Build Gemini API access parameters for generateContent and other model APIs.
|
||||
*
|
||||
* @param access Gemini access configuration
|
||||
* @param modelRefId Model ID to use in the path (e.g., 'models/gemini-pro')
|
||||
* @param apiPath API path template (e.g., '/v1beta/{model=models/*}:generateContent')
|
||||
* @param useV1Alpha Whether to use v1alpha API version (for experimental features)
|
||||
*/
|
||||
export function geminiAccess(access: GeminiAccessSchema, modelRefId: string | null, apiPath: string, useV1Alpha: boolean): { headers: HeadersInit, url: string } {
|
||||
|
||||
const geminiHost = llmsFixupHost(access.geminiHost || DEFAULT_GEMINI_HOST, apiPath);
|
||||
|
||||
@@ -2,7 +2,7 @@ import type { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/w
|
||||
|
||||
import type { ModelDescriptionSchema } from '../llm.server.types';
|
||||
|
||||
import { LLM_IF_GEM_CodeExecution, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_NoTemperature, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
|
||||
import { LLM_IF_GEM_CodeExecution, LLM_IF_GEM_Interactions, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_NoTemperature, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
|
||||
import { Release } from '~/common/app.release';
|
||||
|
||||
|
||||
@@ -198,6 +198,23 @@ const _knownGeminiModels: ({
|
||||
benchmark: undefined, // Non-benchmarkable because generates images
|
||||
},
|
||||
|
||||
/// Agents (Interactions API)
|
||||
|
||||
// Deep Research Agent - Available via Interactions API
|
||||
// https://ai.google.dev/gemini-api/docs/deep-research
|
||||
{
|
||||
id: 'agents/deep-research-pro-preview-12-2025',
|
||||
labelOverride: 'Deep Research Pro Preview',
|
||||
isPreview: true,
|
||||
chatPrice: gemini25ProPricing, // Uses similar pricing to 2.5 Pro
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndGeminiInteractionsAgent' }, // Enables Interactions API with agent name
|
||||
],
|
||||
benchmark: undefined, // Agent-based, not benchmarkable
|
||||
// Note: This model uses background=true by default for long-running research tasks
|
||||
},
|
||||
|
||||
/// Generation 2.5
|
||||
|
||||
// 2.5 Pro (Stable) - Released June 17, 2025
|
||||
@@ -710,6 +727,10 @@ const _sortOderIdPrefix: string[] = [
|
||||
'models/gemini-3-pro',
|
||||
'models/gemini-3-',
|
||||
|
||||
// Agents (Interactions API)
|
||||
'agents/deep-research-pro-preview',
|
||||
'agents/',
|
||||
|
||||
'models/gemini-exp',
|
||||
|
||||
'models/gemini-2.5-pro',
|
||||
|
||||
@@ -94,6 +94,7 @@ const ModelParameterSpec_schema = z.object({
|
||||
'llmVndGeminiShowThoughts',
|
||||
'llmVndGeminiThinkingBudget',
|
||||
'llmVndGeminiThinkingLevel',
|
||||
'llmVndGeminiInteractionsAgent',
|
||||
// 'llmVndGeminiUrlContext',
|
||||
// Moonshot
|
||||
'llmVndMoonshotWebSearch',
|
||||
|
||||
Reference in New Issue
Block a user