DMessages/AIX: broaden upstreamHandle

This commit is contained in:
Enrico Ros
2026-04-21 16:10:56 -07:00
parent d8f8999333
commit 974aa12137
13 changed files with 92 additions and 43 deletions
@@ -1,4 +1,5 @@
import * as React from 'react';
import TimeAgo from 'react-timeago';
import { Box, Button, ButtonGroup, Tooltip, Typography } from '@mui/joy';
import PlayArrowRoundedIcon from '@mui/icons-material/PlayArrowRounded';
@@ -22,6 +23,11 @@ export function BlockOpUpstreamResume(props: {
const [isDeleting, setIsDeleting] = React.useState(false);
const [error, setError] = React.useState<string | null>(null);
// expiration: boolean is evaluated at render (may lag briefly if nothing re-renders past expiry).
// TimeAgo handles its own tick for the label; the button's disabled state is the only consumer of this flag.
const { expiresAt, runId = '' } = props.upstreamHandle;
const isExpired = expiresAt != null && Date.now() > expiresAt;
// handlers
const handleResume = React.useCallback(async () => {
@@ -77,7 +83,7 @@ export function BlockOpUpstreamResume(props: {
{props.onResume && (
<Tooltip title='Resume generation from last checkpoint'>
<Button
disabled={isResuming || isCancelling || isDeleting}
disabled={isResuming || isCancelling || isDeleting || isExpired}
loading={isResuming}
startDecorator={<PlayArrowRoundedIcon sx={{ color: 'success.solidBg' }} />}
onClick={handleResume}
@@ -121,7 +127,8 @@ export function BlockOpUpstreamResume(props: {
)}
<Typography level='body-xs' sx={{ fontSize: '0.65rem', opacity: 0.6 }}>
Response ID: {props.upstreamHandle.responseId.slice(0, 12)}...
Run ID: {runId.slice(0, 12)}...
{!!expiresAt && <> · Expires <TimeAgo date={expiresAt} /></>}
</Typography>
</Box>
);
@@ -162,6 +162,7 @@ export function ChatMessage(props: {
onMessageBeam?: (messageId: string) => Promise<void>,
onMessageBranch?: (messageId: string) => void,
onMessageContinue?: (messageId: string, continueText: null | string) => void,
onMessageUpstreamResume?: (messageId: string) => Promise<void>,
onMessageDelete?: (messageId: string) => void,
onMessageFragmentAppend?: (messageId: DMessageId, fragment: DMessageFragment) => void
onMessageFragmentDelete?: (messageId: DMessageId, fragmentId: DMessageFragmentId) => void,
@@ -246,7 +247,7 @@ export function ChatMessage(props: {
// const wordsDiff = useWordsDifference(textSubject, props.diffPreviousText, showDiff);
const { onMessageAssistantFrom, onMessageDelete, onMessageFragmentAppend, onMessageFragmentDelete, onMessageFragmentReplace, onMessageContinue } = props;
const { onMessageAssistantFrom, onMessageDelete, onMessageFragmentAppend, onMessageFragmentDelete, onMessageFragmentReplace, onMessageContinue, onMessageUpstreamResume } = props;
const handleFragmentNew = React.useCallback(() => {
onMessageFragmentAppend?.(messageId, createTextContentFragment(''));
@@ -264,6 +265,10 @@ export function ChatMessage(props: {
onMessageContinue?.(messageId, continueText);
}, [messageId, onMessageContinue]);
const handleUpstreamResume = React.useCallback(() => {
return onMessageUpstreamResume?.(messageId);
}, [messageId, onMessageUpstreamResume]);
// Text Editing
@@ -887,13 +892,11 @@ export function ChatMessage(props: {
/>
)}
{/* Upstream Resume... */}
{props.isBottom && fromAssistant && lastFragmentIsError && messageGenerator?.upstreamHandle?.responseId && (
{/* Upstream Resume - shows whenever there's a stored handle (incl. post-reload, where no error fragment is present) */}
{fromAssistant && messageGenerator?.upstreamHandle && (
<BlockOpUpstreamResume
upstreamHandle={messageGenerator.upstreamHandle}
onResume={console.error}
onCancel={console.error}
onDelete={console.error}
onResume={onMessageUpstreamResume ? handleUpstreamResume : undefined}
/>
)}
+4 -5
View File
@@ -130,11 +130,10 @@ export type DMessageGenerator = ({
containerId: string,
expiresAt: string, // ISO 8601 UTC timestamp (e.g., "2026-04-07T05:59:32Z")
},
upstreamHandle?: {
uht: 'vnd.oai.responses',
responseId: string,
expiresAt: number | null, // null = never expires
},
upstreamHandle?:
// unified `runId` across variants - vendor-specific id lives behind it; `uht` is consulted only for dispatch routing
| { uht: 'vnd.oai.responses', runId: string /* OpenAI `response.id` */, expiresAt: number | null /* null = never expires */ }
| { uht: 'vnd.gem.interactions', runId: string /* Gemini `interaction.id` */, expiresAt: number | null },
tokenStopReason?:
| 'client-abort' // if the generator stopped due to a client abort signal
| 'filter' // (inline filter message injected) if the generator stopped due to a filter
+3
View File
@@ -149,6 +149,7 @@ export type DModelInterfaceV1 =
| 'oai-chat-reasoning'
| 'ant-prompt-caching'
| 'gem-code-execution'
| 'gem-interactions'
| 'oai-prompt-caching'
| 'oai-realtime'
| 'oai-responses'
@@ -180,6 +181,7 @@ export const LLM_IF_Outputs_NoText: DModelInterfaceV1 = 'outputs-no-text';
export const LLM_IF_Tools_WebSearch: DModelInterfaceV1 = 'tools-web-search';
export const LLM_IF_ANT_PromptCaching: DModelInterfaceV1 = 'ant-prompt-caching';
export const LLM_IF_GEM_CodeExecution: DModelInterfaceV1 = 'gem-code-execution';
export const LLM_IF_GEM_Interactions: DModelInterfaceV1 = 'gem-interactions';
export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching';
export const LLM_IF_OAI_Responses: DModelInterfaceV1 = 'oai-responses';
export const LLM_IF_HOTFIX_NoStream: DModelInterfaceV1 = 'hotfix-no-stream';
@@ -207,6 +209,7 @@ export const LLMS_ALL_INTERFACES = [
// Vendor-specific capabilities
LLM_IF_ANT_PromptCaching, // [Anthropic] model supports anthropic-specific caching
LLM_IF_GEM_CodeExecution, // [Gemini] Tool: code execution
LLM_IF_GEM_Interactions, // [Gemini] Interactions API (required by Deep Research agents)
LLM_IF_OAI_PromptCaching, // [OpenAI] model supports OpenAI prompt caching
LLM_IF_OAI_Responses, // [OpenAI] Responses API (new) support
// Hotfixes to patch specific model quirks
+2 -1
View File
@@ -1043,7 +1043,8 @@ export class ContentReassembler {
private onResponseHandle({ handle }: Extract<AixWire_Particles.ChatGenerateOp, { cg: 'set-upstream-handle' }>): void {
// validate the handle
if (handle?.uht !== 'vnd.oai.responses' || !handle?.responseId || handle?.expiresAt === undefined) {
const knownUht = handle?.uht === 'vnd.oai.responses' || handle?.uht === 'vnd.gem.interactions';
if (!knownUht || !handle?.runId || handle.expiresAt === undefined) {
this._appendReassemblyDevError(`Invalid response handle received: ${JSON.stringify(handle)}`);
return;
}
+3 -1
View File
@@ -3,7 +3,7 @@ import { findServiceAccessOrThrow } from '~/modules/llms/vendors/vendor.helpers'
import type { MaybePromise } from '~/common/types/useful.types';
import { AIVndAntInlineFilesPolicy, getVndAntInlineFiles } from '~/common/stores/store-ai';
import { AudioPlayer } from '~/common/util/audio/AudioPlayer';
import { DLLM, DLLMId, LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Responses, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
import { DLLM, DLLMId, LLM_IF_GEM_Interactions, LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Responses, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
import { DMessage, DMessageGenerator, createGeneratorAIX_AutoLabel } from '~/common/stores/chat/chat.message';
import { DMetricsChatGenerate_Lg, DMetricsChatGenerate_Md, metricsChatGenerateLgToMd, metricsComputeChatGenerateCostsMd, } from '~/common/stores/metrics/metrics.chatgenerate';
import { DModelParameterValues, getAllModelParameterValues } from '~/common/stores/llms/llms.parameters';
@@ -83,6 +83,7 @@ export function aixCreateModelFromLLMOptions(
// Output APIs
const llmVndOaiResponsesAPI = llmInterfaces.includes(LLM_IF_OAI_Responses);
const llmVndGeminiInteractions = llmInterfaces.includes(LLM_IF_GEM_Interactions);
// Client-side late stage model HotFixes
const hotfixOmitTemperature = llmInterfaces.includes(LLM_IF_HOTFIX_NoTemperature);
@@ -127,6 +128,7 @@ export function aixCreateModelFromLLMOptions(
...(llmVndBedrockAPI ? { vndBedrockAPI: llmVndBedrockAPI } : {}),
// Gemini
...(llmVndGeminiInteractions ? { vndGeminiAPI: 'interactions-agent' } : {}),
...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}),
...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}),
...(llmVndGeminiComputerUse ? { vndGeminiComputerUse: llmVndGeminiComputerUse } : {}),
+16 -7
View File
@@ -496,6 +496,7 @@ export namespace AixWire_API {
vndBedrockAPI: z.enum(['converse', 'invoke-anthropic', 'mantle']).optional(),
// Gemini
vndGeminiAPI: z.enum(['interactions-agent']).optional(), // opt-in per-model API dialect; unset = generateContent
vndGeminiAspectRatio: z.enum(['1:1', '2:3', '3:2', '3:4', '4:3', '9:16', '16:9', '21:9']).optional(),
vndGeminiCodeExecution: z.enum(['auto']).optional(),
vndGeminiComputerUse: z.enum(['browser']).optional(),
@@ -535,13 +536,21 @@ export namespace AixWire_API {
/// Resume Handle
/**
* TEMP - Not well defined yet - OpenAI Responses-only implementation
* [OpenAI Responses API] Allows reconnecting to an in-progress response by its ID.
* Discriminated by upstream handle type:
* - vnd.oai.responses: OpenAI Responses API - GET /v1/responses/{id}
* - vnd.gem.interactions: Gemini Interactions API for background agents - GET-poll /v1beta/interactions/{id}
*/
export const ResumeHandle_schema = z.object({
responseId: z.string(),
startingAfter: z.number().optional(), // the sequence number of event after which to start streaming
});
export const ResumeHandle_schema = z.discriminatedUnion('uht', [
z.object({
uht: z.literal('vnd.oai.responses'),
runId: z.string(), // upstream: OpenAI Responses `response.id`
startingAfter: z.number().optional(), // the sequence number of event after which to start streaming
}),
z.object({
uht: z.literal('vnd.gem.interactions'),
runId: z.string(), // upstream: Gemini Interactions `interaction.id`
}),
]);
/// Context
@@ -674,7 +683,7 @@ export namespace AixWire_Particles {
| { cg: 'set-metrics', metrics: CGSelectMetrics }
| { cg: 'set-model', name: string }
| { cg: 'set-provider-infra', label: string }
| { cg: 'set-upstream-handle', handle: { uht: 'vnd.oai.responses', responseId: string, expiresAt: number | null } }
| { cg: 'set-upstream-handle', handle: { uht: 'vnd.oai.responses' | 'vnd.gem.interactions', runId: string, expiresAt: number | null } }
| { cg: '_debugDispatchRequest', security: 'dev-env', dispatchRequest: { url: string, headers: string, body: string, bodySize: number } } // may generalize this in the future
| { cg: '_debugProfiler', measurements: Record<string, number | string>[] };
@@ -518,16 +518,19 @@ export class ChatGenerateTransmitter implements IParticleTransmitter {
}
/** Communicates the upstream response handle, for remote control/resumability */
setUpstreamHandle(handle: string, _type: 'oai-responses' /* the only one for now, used for type safety */) {
setUpstreamHandle(handle: string, type: 'vnd.oai.responses' | 'vnd.gem.interactions') {
if (SERVER_DEBUG_WIRE)
console.log('|response-handle|', handle);
console.log('|response-handle|', type, handle);
// NOTE: if needed, we could store the handle locally for server-side resumability, but we just implement client-side (correction, manual) for now
const expireDays = type === 'vnd.gem.interactions'
? 1 // Gemini Interactions: 1d free / 55d paid - use the conservative lower bound
: 30; // OpenAI Responses: default 30 days
this.transmissionQueue.push({
cg: 'set-upstream-handle',
handle: {
uht: 'vnd.oai.responses',
responseId: handle,
expiresAt: Date.now() + 30 * 24 * 3600 * 1000, // default: 30 days expiry
uht: type,
runId: handle,
expiresAt: Date.now() + expireDays * 24 * 3600 * 1000,
},
});
// send it right away, in case the connection closes soon
@@ -277,7 +277,9 @@ export async function createChatGenerateResumeDispatch(access: AixAPI_Access, re
case 'openrouter':
// ASSUME the OpenAI Responses API - https://platform.openai.com/docs/api-reference/responses/get
const { url, headers } = openAIAccess(access, '', `${OPENAI_API_PATHS.responses}/${resumeHandle.responseId}`);
if (resumeHandle.uht !== 'vnd.oai.responses')
throw new Error(`Resume handle mismatch for ${dialect}: expected 'vnd.oai.responses', got '${resumeHandle.uht}'`);
const { url, headers } = openAIAccess(access, '', `${OPENAI_API_PATHS.responses}/${resumeHandle.runId /* OpenAI response.id */}`);
const queryParams = new URLSearchParams({
stream: streaming ? 'true' : 'false',
...(!!resumeHandle.startingAfter && { starting_after: resumeHandle.startingAfter.toString() }),
@@ -96,7 +96,7 @@ export interface IParticleTransmitter {
setProviderInfraLabel(label: string): void;
/** Communicates the upstream response handle, for remote control/resumability */
setUpstreamHandle(handle: string, type: 'oai-responses'): void;
setUpstreamHandle(handle: string, type: 'vnd.oai.responses' | 'vnd.gem.interactions'): void;
/** Update the metrics, sent twice (after the first call, and then at the end of the transmission) */
updateMetrics(update: Partial<AixWire_Particles.CGSelectMetrics>): void;
@@ -316,7 +316,7 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction {
// -> Upstream Handle (for remote control: resume, cancel, delete)
// Implementation NOTE: we won't uproll sequence numbers for partial resumes - we'll just download the full response
if (event.response.store && event.response.id)
pt.setUpstreamHandle(event.response.id, 'oai-responses' /*, event.sequence_number - commented, unused for now */);
pt.setUpstreamHandle(event.response.id, 'vnd.oai.responses' /*, event.sequence_number - commented, unused for now */);
// -> Hosted tool configs: cache for per-event enrichment (e.g. image generation progress)
R.captureHostedToolConfigs(event.response.tools);
+32 -12
View File
@@ -1,7 +1,7 @@
import type { GeminiWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes';
import type { DModelParameterId } from '~/common/stores/llms/llms.parameters';
import { LLM_IF_GEM_CodeExecution, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
import { LLM_IF_GEM_CodeExecution, LLM_IF_GEM_Interactions, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Outputs_Audio, LLM_IF_Outputs_Image, LLM_IF_Outputs_NoText } from '~/common/stores/llms/llms.types';
import { Release } from '~/common/app.release';
import type { ModelDescriptionSchema, OrtVendorLookupResult } from '../llm.server.types';
@@ -42,9 +42,6 @@ const filterLyingModelNames: GeminiWire_API_Models_List.Model['name'][] = [
// 2024-12-10: name of models that are not what they say they are (e.g. 1114 is actually 1121 as of )
'models/gemini-1.5-flash-8b-exp-0924', // replaced by non-free
'models/gemini-1.5-flash-8b-exp-0827', // replaced by non-free
// Interactions API not supported yet - once added, re-enable this model
'models/deep-research-pro-preview-12-2025',
];
@@ -374,18 +371,39 @@ const _knownGeminiModels: ({
// hidden: true, // audio outputs are unavailable as of 2025-05-27
},
// Deep Research Pro Preview - Released December 12, 2025
// Autonomous research agent for complex research task planning
// Deep Research agents - require the Interactions API
// Deep Research Preview - Released April 21, 2026 (latest)
{
hidden: true, // not supported, requires "Interactions API"
id: 'models/deep-research-preview-04-2026',
labelOverride: 'Deep Research Preview (2026-04)',
isPreview: true,
chatPrice: gemini25ProPricing, // pricing not explicitly listed; using 2.5 Pro as baseline
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
parameterSpecs: [],
benchmark: undefined, // Deep research model, not benchmarkable on standard tests
// 128K input, 64K output
},
// Deep Research Max Preview - Released April 21, 2026
{
id: 'models/deep-research-max-preview-04-2026',
labelOverride: 'Deep Research Max Preview (2026-04)',
isPreview: true,
chatPrice: gemini25ProPricing, // baseline estimate (see note above)
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
parameterSpecs: [],
benchmark: undefined, // Deep research model, not benchmarkable on standard tests
},
// Deep Research Pro Preview - Released December 12, 2025
{
hidden: true, // yield to newer 2026-04 models
id: 'models/deep-research-pro-preview-12-2025',
labelOverride: 'Deep Research Pro Preview',
isPreview: true,
chatPrice: gemini25ProPricing, // Pricing not explicitly listed, using 2.5 Pro as baseline
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning],
parameterSpecs: [
{ paramId: 'llmVndGeminiThinkingBudget' },
],
chatPrice: gemini25ProPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
parameterSpecs: [{ paramId: 'llmVndGeminiThinkingBudget' }],
benchmark: undefined, // Deep research model, not benchmarkable on standard tests
// Note: 128K input context, 64K output context
},
@@ -773,6 +791,8 @@ const _sortOderIdPrefix: string[] = [
'models/gemini-2.5-pro-',
'models/gemini-2.5-pro-preview-tts',
'models/deep-research-max-preview',
'models/deep-research-preview',
'models/deep-research-pro-preview',
'models/gemini-2.5-flash-preview-09',
+1 -1
View File
@@ -467,7 +467,7 @@ class SweepCollectorTransmitter implements IParticleTransmitter {
// Non-parts data
setModelName(_modelName: string): void { /* no-op */ }
setProviderInfraLabel(_label: string): void { /* no-op */ }
setUpstreamHandle(_handle: string, _type: 'oai-responses'): void { /* no-op */ }
setUpstreamHandle(_handle: string, _type: string): void { /* no-op */ }
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason): void { this.tokenStopReason = reason; }
updateMetrics(_update: Partial<AixWire_Particles.CGSelectMetrics>): void { /* no-op */ }
}