DMessages/AIX: broaden upstreamHandle

2026-05-10 21:50:14 -07:00 · 2026-04-21 16:10:56 -07:00
parent d8f8999333
commit 974aa12137
13 changed files with 92 additions and 43 deletions
@@ -1,4 +1,5 @@
 import * as React from 'react';
+import TimeAgo from 'react-timeago';

 import { Box, Button, ButtonGroup, Tooltip, Typography } from '@mui/joy';
 import PlayArrowRoundedIcon from '@mui/icons-material/PlayArrowRounded';
@@ -22,6 +23,11 @@ export function BlockOpUpstreamResume(props: {
  const [isDeleting, setIsDeleting] = React.useState(false);
  const [error, setError] = React.useState<string | null>(null);

+  // expiration: boolean is evaluated at render (may lag briefly if nothing re-renders past expiry).
+  // TimeAgo handles its own tick for the label; the button's disabled state is the only consumer of this flag.
+  const { expiresAt, runId = '' } = props.upstreamHandle;
+  const isExpired = expiresAt != null && Date.now() > expiresAt;
+
  // handlers

  const handleResume = React.useCallback(async () => {
@@ -77,7 +83,7 @@ export function BlockOpUpstreamResume(props: {
        {props.onResume && (
          <Tooltip title='Resume generation from last checkpoint'>
            <Button
-              disabled={isResuming || isCancelling || isDeleting}
+              disabled={isResuming || isCancelling || isDeleting || isExpired}
              loading={isResuming}
              startDecorator={<PlayArrowRoundedIcon sx={{ color: 'success.solidBg' }} />}
              onClick={handleResume}
@@ -121,7 +127,8 @@ export function BlockOpUpstreamResume(props: {
      )}

      <Typography level='body-xs' sx={{ fontSize: '0.65rem', opacity: 0.6 }}>
-        Response ID: {props.upstreamHandle.responseId.slice(0, 12)}...
+        Run ID: {runId.slice(0, 12)}...
+        {!!expiresAt && <> · Expires <TimeAgo date={expiresAt} /></>}
      </Typography>
    </Box>
  );
@@ -162,6 +162,7 @@ export function ChatMessage(props: {
  onMessageBeam?: (messageId: string) => Promise<void>,
  onMessageBranch?: (messageId: string) => void,
  onMessageContinue?: (messageId: string, continueText: null | string) => void,
+  onMessageUpstreamResume?: (messageId: string) => Promise<void>,
  onMessageDelete?: (messageId: string) => void,
  onMessageFragmentAppend?: (messageId: DMessageId, fragment: DMessageFragment) => void
  onMessageFragmentDelete?: (messageId: DMessageId, fragmentId: DMessageFragmentId) => void,
@@ -246,7 +247,7 @@ export function ChatMessage(props: {
  // const wordsDiff = useWordsDifference(textSubject, props.diffPreviousText, showDiff);


-  const { onMessageAssistantFrom, onMessageDelete, onMessageFragmentAppend, onMessageFragmentDelete, onMessageFragmentReplace, onMessageContinue } = props;
+  const { onMessageAssistantFrom, onMessageDelete, onMessageFragmentAppend, onMessageFragmentDelete, onMessageFragmentReplace, onMessageContinue, onMessageUpstreamResume } = props;

  const handleFragmentNew = React.useCallback(() => {
    onMessageFragmentAppend?.(messageId, createTextContentFragment(''));
@@ -264,6 +265,10 @@ export function ChatMessage(props: {
    onMessageContinue?.(messageId, continueText);
  }, [messageId, onMessageContinue]);

+  const handleUpstreamResume = React.useCallback(() => {
+    return onMessageUpstreamResume?.(messageId);
+  }, [messageId, onMessageUpstreamResume]);
+

  // Text Editing

@@ -887,13 +892,11 @@ export function ChatMessage(props: {
            />
          )}

-          {/* Upstream Resume... */}
-          {props.isBottom && fromAssistant && lastFragmentIsError && messageGenerator?.upstreamHandle?.responseId && (
+          {/* Upstream Resume - shows whenever there's a stored handle (incl. post-reload, where no error fragment is present) */}
+          {fromAssistant && messageGenerator?.upstreamHandle && (
            <BlockOpUpstreamResume
              upstreamHandle={messageGenerator.upstreamHandle}
-              onResume={console.error}
-              onCancel={console.error}
-              onDelete={console.error}
+              onResume={onMessageUpstreamResume ? handleUpstreamResume : undefined}
            />
          )}

@@ -130,11 +130,10 @@ export type DMessageGenerator = ({
    containerId: string,
    expiresAt: string,                // ISO 8601 UTC timestamp (e.g., "2026-04-07T05:59:32Z")
  },
-  upstreamHandle?: {
-    uht: 'vnd.oai.responses',
-    responseId: string,
-    expiresAt: number | null,         // null = never expires
-  },
+  upstreamHandle?:
+    // unified `runId` across variants - vendor-specific id lives behind it; `uht` is consulted only for dispatch routing
+    | { uht: 'vnd.oai.responses', runId: string /* OpenAI `response.id` */, expiresAt: number | null /* null = never expires */ }
+    | { uht: 'vnd.gem.interactions', runId: string /* Gemini `interaction.id` */, expiresAt: number | null },
  tokenStopReason?:
    | 'client-abort'                  // if the generator stopped due to a client abort signal
    | 'filter'                        // (inline filter message injected) if the generator stopped due to a filter
@@ -149,6 +149,7 @@ export type DModelInterfaceV1 =
  | 'oai-chat-reasoning'
  | 'ant-prompt-caching'
  | 'gem-code-execution'
+  | 'gem-interactions'
  | 'oai-prompt-caching'
  | 'oai-realtime'
  | 'oai-responses'
@@ -180,6 +181,7 @@ export const LLM_IF_Outputs_NoText: DModelInterfaceV1 = 'outputs-no-text';
 export const LLM_IF_Tools_WebSearch: DModelInterfaceV1 = 'tools-web-search';
 export const LLM_IF_ANT_PromptCaching: DModelInterfaceV1 = 'ant-prompt-caching';
 export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution';
+export const LLM_IF_GEM_Interactions: DModelInterfaceV1 = 'gem-interactions';
 export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching';
 export const LLM_IF_OAI_Responses: DModelInterfaceV1 = 'oai-responses';
 export const LLM_IF_HOTFIX_NoStream: DModelInterfaceV1 = 'hotfix-no-stream';
@@ -207,6 +209,7 @@ export const LLMS_ALL_INTERFACES = [
  // Vendor-specific capabilities
  LLM_IF_ANT_PromptCaching,   // [Anthropic] model supports anthropic-specific caching
  LLM_IF_GEM_CodeExecution,   // [Gemini] Tool: code execution
+  LLM_IF_GEM_Interactions,    // [Gemini] Interactions API (required by Deep Research agents)
  LLM_IF_OAI_PromptCaching,   // [OpenAI] model supports OpenAI prompt caching
  LLM_IF_OAI_Responses,       // [OpenAI] Responses API (new) support
  // Hotfixes to patch specific model quirks
@@ -1043,7 +1043,8 @@ export class ContentReassembler {

  private onResponseHandle({ handle }: Extract<AixWire_Particles.ChatGenerateOp, { cg: 'set-upstream-handle' }>): void {
    // validate the handle
-    if (handle?.uht !== 'vnd.oai.responses' || !handle?.responseId || handle?.expiresAt === undefined) {
+    const knownUht = handle?.uht === 'vnd.oai.responses' || handle?.uht === 'vnd.gem.interactions';
+    if (!knownUht || !handle?.runId || handle.expiresAt === undefined) {
      this._appendReassemblyDevError(`Invalid response handle received: ${JSON.stringify(handle)}`);
      return;
    }
@@ -3,7 +3,7 @@ import { findServiceAccessOrThrow } from '~/modules/llms/vendors/vendor.helpers'
 import type { MaybePromise } from '~/common/types/useful.types';
 import { AIVndAntInlineFilesPolicy, getVndAntInlineFiles } from '~/common/stores/store-ai';
 import { AudioPlayer } from '~/common/util/audio/AudioPlayer';
-import { DLLM, DLLMId, LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Responses, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
+import { DLLM, DLLMId, LLM_IF_GEM_Interactions, LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Responses, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
 import { DMessage, DMessageGenerator, createGeneratorAIX_AutoLabel } from '~/common/stores/chat/chat.message';
 import { DMetricsChatGenerate_Lg, DMetricsChatGenerate_Md, metricsChatGenerateLgToMd, metricsComputeChatGenerateCostsMd, } from '~/common/stores/metrics/metrics.chatgenerate';
 import { DModelParameterValues, getAllModelParameterValues } from '~/common/stores/llms/llms.parameters';
@@ -83,6 +83,7 @@ export function aixCreateModelFromLLMOptions(

  // Output APIs
  const llmVndOaiResponsesAPI = llmInterfaces.includes(LLM_IF_OAI_Responses);
+  const llmVndGeminiInteractions = llmInterfaces.includes(LLM_IF_GEM_Interactions);

  // Client-side late stage model HotFixes
  const hotfixOmitTemperature = llmInterfaces.includes(LLM_IF_HOTFIX_NoTemperature);
@@ -127,6 +128,7 @@ export function aixCreateModelFromLLMOptions(
    ...(llmVndBedrockAPI ? { vndBedrockAPI: llmVndBedrockAPI } : {}),

    // Gemini
+    ...(llmVndGeminiInteractions ? { vndGeminiAPI: 'interactions-agent' } : {}),
    ...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}),
    ...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}),
    ...(llmVndGeminiComputerUse ? { vndGeminiComputerUse: llmVndGeminiComputerUse } : {}),
@@ -496,6 +496,7 @@ export namespace AixWire_API {
    vndBedrockAPI: z.enum(['converse', 'invoke-anthropic', 'mantle']).optional(),

    // Gemini
+    vndGeminiAPI: z.enum(['interactions-agent']).optional(), // opt-in per-model API dialect; unset = generateContent
    vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(),
    vndGeminiCodeExecution: z.enum(['auto']).optional(),
    vndGeminiComputerUse: z.enum(['browser']).optional(),
@@ -535,13 +536,21 @@ export namespace AixWire_API {
  /// Resume Handle

  /**
-   * TEMP - Not well defined yet - OpenAI Responses-only implementation
-   * [OpenAI Responses API] Allows reconnecting to an in-progress response by its ID.
+   * Discriminated by upstream handle type:
+   *  - vnd.oai.responses: OpenAI Responses API - GET /v1/responses/{id}
+   *  - vnd.gem.interactions: Gemini Interactions API for background agents - GET-poll /v1beta/interactions/{id}
   */
-  export const ResumeHandle_schema = z.object({
-    responseId: z.string(),
-    startingAfter: z.number().optional(), // the sequence number of event after which to start streaming
-  });
+  export const ResumeHandle_schema = z.discriminatedUnion('uht', [
+    z.object({
+      uht: z.literal('vnd.oai.responses'),
+      runId: z.string(), // upstream: OpenAI Responses `response.id`
+      startingAfter: z.number().optional(), // the sequence number of event after which to start streaming
+    }),
+    z.object({
+      uht: z.literal('vnd.gem.interactions'),
+      runId: z.string(), // upstream: Gemini Interactions `interaction.id`
+    }),
+  ]);

  /// Context

@@ -674,7 +683,7 @@ export namespace AixWire_Particles {
    | { cg: 'set-metrics', metrics: CGSelectMetrics }
    | { cg: 'set-model', name: string }
    | { cg: 'set-provider-infra', label: string }
-    | { cg: 'set-upstream-handle', handle: { uht: 'vnd.oai.responses', responseId: string, expiresAt: number | null } }
+    | { cg: 'set-upstream-handle', handle: { uht: 'vnd.oai.responses' | 'vnd.gem.interactions', runId: string, expiresAt: number | null } }
    | { cg: '_debugDispatchRequest', security: 'dev-env', dispatchRequest: { url: string, headers: string, body: string, bodySize: number } } // may generalize this in the future
    | { cg: '_debugProfiler', measurements: Record<string, number | string>[] };

@@ -518,16 +518,19 @@ export class ChatGenerateTransmitter implements IParticleTransmitter {
  }

  /** Communicates the upstream response handle, for remote control/resumability */
-  setUpstreamHandle(handle: string, _type: 'oai-responses' /* the only one for now, used for type safety */) {
+  setUpstreamHandle(handle: string, type: 'vnd.oai.responses' | 'vnd.gem.interactions') {
    if (SERVER_DEBUG_WIRE)
-      console.log('|response-handle|', handle);
+      console.log('|response-handle|', type, handle);
    // NOTE: if needed, we could store the handle locally for server-side resumability, but we just implement client-side (correction, manual) for now
+    const expireDays = type === 'vnd.gem.interactions'
+      ? 1 // Gemini Interactions: 1d free / 55d paid - use the conservative lower bound
+      : 30; // OpenAI Responses: default 30 days
    this.transmissionQueue.push({
      cg: 'set-upstream-handle',
      handle: {
-        uht: 'vnd.oai.responses',
-        responseId: handle,
-        expiresAt: Date.now() + 30 * 24 * 3600 * 1000, // default: 30 days expiry
+        uht: type,
+        runId: handle,
+        expiresAt: Date.now() + expireDays * 24 * 3600 * 1000,
      },
    });
    // send it right away, in case the connection closes soon
@@ -277,7 +277,9 @@ export async function createChatGenerateResumeDispatch(access: AixAPI_Access, re
    case 'openrouter':

      // ASSUME the OpenAI Responses API - https://platform.openai.com/docs/api-reference/responses/get
-      const { url, headers } = openAIAccess(access, '', `${OPENAI_API_PATHS.responses}/${resumeHandle.responseId}`);
+      if (resumeHandle.uht !== 'vnd.oai.responses')
+        throw new Error(`Resume handle mismatch for ${dialect}: expected 'vnd.oai.responses', got '${resumeHandle.uht}'`);
+      const { url, headers } = openAIAccess(access, '', `${OPENAI_API_PATHS.responses}/${resumeHandle.runId /* OpenAI response.id */}`);
      const queryParams = new URLSearchParams({
        stream: streaming ? 'true' : 'false',
        ...(!!resumeHandle.startingAfter && { starting_after: resumeHandle.startingAfter.toString() }),
@@ -96,7 +96,7 @@ export interface IParticleTransmitter {
  setProviderInfraLabel(label: string): void;

  /** Communicates the upstream response handle, for remote control/resumability */
-  setUpstreamHandle(handle: string, type: 'oai-responses'): void;
+  setUpstreamHandle(handle: string, type: 'vnd.oai.responses' | 'vnd.gem.interactions'): void;

  /** Update the metrics, sent twice (after the first call, and then at the end of the transmission) */
  updateMetrics(update: Partial<AixWire_Particles.CGSelectMetrics>): void;
@@ -316,7 +316,7 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction {
        // -> Upstream Handle (for remote control: resume, cancel, delete)
        // Implementation NOTE: we won't uproll sequence numbers for partial resumes - we'll just download the full response
        if (event.response.store && event.response.id)
-          pt.setUpstreamHandle(event.response.id, 'oai-responses' /*, event.sequence_number - commented, unused for now */);
+          pt.setUpstreamHandle(event.response.id, 'vnd.oai.responses' /*, event.sequence_number - commented, unused for now */);

        // -> Hosted tool configs: cache for per-event enrichment (e.g. image generation progress)
        R.captureHostedToolConfigs(event.response.tools);
@@ -1,7 +1,7 @@
 import type { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';

 import type { DModelParameterId } from '~/common/stores/llms/llms.parameters';
-import { LLM_IF_GEM_CodeExecution, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
+import { LLM_IF_GEM_CodeExecution, LLM_IF_GEM_Interactions, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
 import { Release } from '~/common/app.release';

 import type { ModelDescriptionSchema, OrtVendorLookupResult } from '../llm.server.types';
@@ -42,9 +42,6 @@ const filterLyingModelNames: GeminiWire_API_Models_List.Model['name'][] = [
  // 2024-12-10: name of models that are not what they say they are (e.g. 1114 is actually 1121 as of )
  'models/gemini-1.5-flash-8b-exp-0924', // replaced by non-free
  'models/gemini-1.5-flash-8b-exp-0827', // replaced by non-free
-
-  // Interactions API not supported yet - once added, re-enable this model
-  'models/deep-research-pro-preview-12-2025',
 ];


@@ -374,18 +371,39 @@ const _knownGeminiModels: ({
    // hidden: true, // audio outputs are unavailable as of 2025-05-27
  },

-  // Deep Research Pro Preview - Released December 12, 2025
-  // Autonomous research agent for complex research task planning
+  // Deep Research agents - require the Interactions API
+  // Deep Research Preview - Released April 21, 2026 (latest)
  {
-    hidden: true, // not supported, requires "Interactions API"
+    id: 'models/deep-research-preview-04-2026',
+    labelOverride: 'Deep Research Preview (2026-04)',
+    isPreview: true,
+    chatPrice: gemini25ProPricing, // pricing not explicitly listed; using 2.5 Pro as baseline
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
+    parameterSpecs: [],
+    benchmark: undefined, // Deep research model, not benchmarkable on standard tests
+    // 128K input, 64K output
+  },
+
+  // Deep Research Max Preview - Released April 21, 2026
+  {
+    id: 'models/deep-research-max-preview-04-2026',
+    labelOverride: 'Deep Research Max Preview (2026-04)',
+    isPreview: true,
+    chatPrice: gemini25ProPricing, // baseline estimate (see note above)
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
+    parameterSpecs: [],
+    benchmark: undefined, // Deep research model, not benchmarkable on standard tests
+  },
+
+  // Deep Research Pro Preview - Released December 12, 2025
+  {
+    hidden: true, // yield to newer 2026-04 models
    id: 'models/deep-research-pro-preview-12-2025',
    labelOverride: 'Deep Research Pro Preview',
    isPreview: true,
-    chatPrice: gemini25ProPricing, // Pricing not explicitly listed, using 2.5 Pro as baseline
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning],
-    parameterSpecs: [
-      { paramId: 'llmVndGeminiThinkingBudget' },
-    ],
+    chatPrice: gemini25ProPricing,
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
+    parameterSpecs: [{ paramId: 'llmVndGeminiThinkingBudget' }],
    benchmark: undefined, // Deep research model, not benchmarkable on standard tests
    // Note: 128K input context, 64K output context
  },
@@ -773,6 +791,8 @@ const _sortOderIdPrefix: string[] = [
  'models/gemini-2.5-pro-',
  'models/gemini-2.5-pro-preview-tts',

+  'models/deep-research-max-preview',
+  'models/deep-research-preview',
  'models/deep-research-pro-preview',

  'models/gemini-2.5-flash-preview-09',
@@ -467,7 +467,7 @@ class SweepCollectorTransmitter implements IParticleTransmitter {
  // Non-parts data
  setModelName(_modelName: string): void { /* no-op */ }
  setProviderInfraLabel(_label: string): void { /* no-op */ }
-  setUpstreamHandle(_handle: string, _type: 'oai-responses'): void { /* no-op */ }
+  setUpstreamHandle(_handle: string, _type: string): void { /* no-op */ }
  setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason): void { this.tokenStopReason = reason; }
  updateMetrics(_update: Partial<AixWire_Particles.CGSelectMetrics>): void { /* no-op */ }
 }