Compare commits

...

23 Commits

Author SHA1 Message Date
Enrico Ros cff3d90613 AIX: DeepSeek V4: fix function calling 2026-04-24 05:45:53 -07:00
Enrico Ros 9f89243d7f AIX: DeepSeek V4: fix swalling of tool parts 2026-04-24 05:45:53 -07:00
Enrico Ros 784ee9a4da AIX: DeepSeek V4: wires and parser NS 2026-04-24 05:45:53 -07:00
Enrico Ros 678e6b8ba1 AIX: Gemini Interactions: terminate on error 2026-04-24 05:45:53 -07:00
Enrico Ros 30e301c496 BlockOpUpstreamResume: Stop/Cancel 2026-04-24 03:59:50 -07:00
Enrico Ros b22904f6bb AIX: Gemini Interactions: Cancel + Delete
Also see: googleapis/python-genai#1971
2026-04-24 03:40:34 -07:00
Enrico Ros 3f0de7ddca CH: Auto-Title beam chats when done. Fixes #1078 2026-04-24 03:32:04 -07:00
Enrico Ros 9a6f0f9202 AppChat: never re-open an opened beam. Fixes #1079 2026-04-24 03:24:56 -07:00
Enrico Ros 4f0bae5657 AppChat: do not re-beam or regenerate while beam is open. Fixes #1079 2026-04-24 03:19:17 -07:00
Enrico Ros 2101f06195 Roll AIX 2026-04-24 03:04:09 -07:00
Enrico Ros 6d54b5594c Autotitle: Use natural capitalization. Fixes #1077 2026-04-24 02:48:28 -07:00
Enrico Ros 36b8e5b1df Chat: show Stop/Cancel on streaming upstream runs 2026-04-24 02:47:17 -07:00
Enrico Ros 8252d671c7 LLMs: Gemini: Deep Research models support images 2026-04-24 02:47:13 -07:00
Enrico Ros 30d97c94aa LLMs: DeepSeek: bits (note: vision is still not available) 2026-04-24 02:47:13 -07:00
Enrico Ros 82654a00d4 AIX: Streaming (hinting) review and Gemini Interactions API fix 2026-04-24 02:47:09 -07:00
Enrico Ros 9595f14ddc LLM: DeepSeek V4 (flash, pro) + thinking/reasoning_effort fix 2026-04-23 23:59:09 -07:00
Enrico Ros 8c496074b2 LLMs: DeepSeek: add V4 models 2026-04-23 23:30:41 -07:00
Enrico Ros 4d097d7136 LLMs: DeepSeek: add V4 support infra 2026-04-23 23:30:34 -07:00
Enrico Ros 178619d275 AI Settings: match the defaults description. Fixes #1076 2026-04-23 23:29:20 -07:00
Enrico Ros 59c8b2538d Merge pull request #1074 from tredondo/patch-1
chore: fix Zod 4 type-strictness issue (#1072)
2026-04-23 22:57:01 -07:00
Enrico Ros 443b72c52a AIX: OpenAI Responses: fix Zod 4 build error in tools .catch()
Bare `return;` produced `void`, which Zod 4 rejects for a
`.catch()` on `z.array(...).optional()` expecting `Tool[] | undefined`.
Return `undefined` explicitly, matching the existing pattern at
line 1204.

Fixes #1072
2026-04-23 22:56:19 -07:00
Enrico Ros ae13abef45 Nobody can tell @fredliubojin what to resume 2026-04-23 22:22:16 -07:00
Ted Robertson 83ae02ef9b chore: fix Zod 4 type-strictness issue (#1072) 2026-04-23 19:51:49 -07:00
27 changed files with 249 additions and 122 deletions
+6 -4
View File
@@ -583,9 +583,11 @@ export function AppChat() {
}, []); }, []);
useGlobalShortcuts('AppChat', React.useMemo(() => [ useGlobalShortcuts('AppChat', React.useMemo(() => [
// focused conversation // focused conversation (excluded when Beam is open so the keystroke passes through to the browser)
{ key: 'z', ctrl: true, shift: true, disabled: isFocusedChatEmpty, action: handleMessageRegenerateLastInFocusedPane, description: 'Retry' }, ...(beamOpenStoreInFocusedPane ? [] : [
{ key: 'b', ctrl: true, shift: true, disabled: isFocusedChatEmpty, action: handleMessageBeamLastInFocusedPane, description: 'Beam Edit' }, { key: 'z', ctrl: true, shift: true, disabled: isFocusedChatEmpty, action: handleMessageRegenerateLastInFocusedPane, description: 'Retry' },
{ key: 'b', ctrl: true, shift: true, disabled: isFocusedChatEmpty, action: handleMessageBeamLastInFocusedPane, description: 'Beam Edit' },
]),
{ key: 'o', ctrl: true, action: handleConversationsImportFormFilePicker }, { key: 'o', ctrl: true, action: handleConversationsImportFormFilePicker },
{ key: 's', ctrl: true, action: () => handleFileSaveConversation(focusedPaneConversationId) }, { key: 's', ctrl: true, action: () => handleFileSaveConversation(focusedPaneConversationId) },
{ key: 'n', ctrl: true, shift: true, action: () => handleConversationNewInFocusedPane(false, false) }, { key: 'n', ctrl: true, shift: true, action: () => handleConversationNewInFocusedPane(false, false) },
@@ -603,7 +605,7 @@ export function AppChat() {
{ key: 'p', ctrl: true, action: () => personaDropdownRef.current?.openListbox() /*, description: 'Open Persona Dropdown'*/ }, { key: 'p', ctrl: true, action: () => personaDropdownRef.current?.openListbox() /*, description: 'Open Persona Dropdown'*/ },
// focused conversation llm // focused conversation llm
{ key: 'o', ctrl: true, shift: true, action: handleOpenChatLlmOptions }, { key: 'o', ctrl: true, shift: true, action: handleOpenChatLlmOptions },
], [focusedPaneConversationId, handleConversationNewInFocusedPane, handleConversationReset, handleConversationsImportFormFilePicker, handleDeleteConversations, handleFileSaveConversation, handleMessageBeamLastInFocusedPane, handleMessageRegenerateLastInFocusedPane, handleMoveFocus, handleNavigateHistoryInFocusedPane, handleOpenChatLlmOptions, isFocusedChatEmpty])); ], [beamOpenStoreInFocusedPane, focusedPaneConversationId, handleConversationNewInFocusedPane, handleConversationReset, handleConversationsImportFormFilePicker, handleDeleteConversations, handleFileSaveConversation, handleMessageBeamLastInFocusedPane, handleMessageRegenerateLastInFocusedPane, handleMoveFocus, handleNavigateHistoryInFocusedPane, handleOpenChatLlmOptions, isFocusedChatEmpty]));
return <> return <>
@@ -16,6 +16,7 @@ const ARM_TIMEOUT_MS = 4000;
*/ */
export function BlockOpUpstreamResume(props: { export function BlockOpUpstreamResume(props: {
upstreamHandle: Exclude<DMessageGenerator['upstreamHandle'], undefined>, upstreamHandle: Exclude<DMessageGenerator['upstreamHandle'], undefined>,
pending?: boolean; // true while the message is actively streaming; labels the Delete button as "Stop"
onResume?: () => void | Promise<void>; onResume?: () => void | Promise<void>;
onCancel?: () => void | Promise<void>; onCancel?: () => void | Promise<void>;
onDelete?: () => void | Promise<void>; onDelete?: () => void | Promise<void>;
@@ -30,8 +31,8 @@ export function BlockOpUpstreamResume(props: {
// expiration: boolean is evaluated at render (may lag briefly if nothing re-renders past expiry). // expiration: boolean is evaluated at render (may lag briefly if nothing re-renders past expiry).
// TimeAgo handles its own tick for the label; the button's disabled state is the only consumer of this flag. // TimeAgo handles its own tick for the label; the button's disabled state is the only consumer of this flag.
const { expiresAt, runId = '' } = props.upstreamHandle; const { expiresAt /*, runId = ''*/ } = props.upstreamHandle;
const isExpired = expiresAt != null && Date.now() > expiresAt; // const isExpired = expiresAt != null && Date.now() > expiresAt;
// handlers // handlers
@@ -102,7 +103,7 @@ export function BlockOpUpstreamResume(props: {
{props.onResume && ( {props.onResume && (
<Tooltip title='Resume generation from last checkpoint'> <Tooltip title='Resume generation from last checkpoint'>
<Button <Button
disabled={isResuming || isCancelling || isDeleting || isExpired} disabled={isResuming || isCancelling || isDeleting}
loading={isResuming} loading={isResuming}
startDecorator={<PlayArrowRoundedIcon color='success' />} startDecorator={<PlayArrowRoundedIcon color='success' />}
onClick={handleResume} onClick={handleResume}
@@ -126,7 +127,7 @@ export function BlockOpUpstreamResume(props: {
)} )}
{props.onDelete && ( {props.onDelete && (
<Tooltip title={deleteArmed ? 'Click again to confirm - cancels the run upstream (no resume after)' : 'Cancel the upstream run'}> <Tooltip title={deleteArmed ? 'Click again to confirm - cancels the run upstream (no resume after)' : (props.pending ? 'Stop this response and cancel the upstream run' : 'Cancel the upstream run')}>
<Button <Button
loading={isDeleting} loading={isDeleting}
color={deleteArmed ? 'danger' : 'neutral'} color={deleteArmed ? 'danger' : 'neutral'}
@@ -135,7 +136,7 @@ export function BlockOpUpstreamResume(props: {
onClick={handleDelete} onClick={handleDelete}
disabled={isResuming || isCancelling || isDeleting} disabled={isResuming || isCancelling || isDeleting}
> >
{deleteArmed ? 'Confirm?' : 'Cancel'} {deleteArmed ? 'Confirm?' : (props.pending ? 'Stop' : 'Cancel')}
</Button> </Button>
</Tooltip> </Tooltip>
)} )}
@@ -898,11 +898,12 @@ export function ChatMessage(props: {
/> />
)} )}
{/* Upstream Resume - shows whenever there's a stored handle (incl. post-reload, where no error fragment is present) */} {/* Upstream Resume - shows whenever there's a stored handle (incl. post-reload, and while streaming so Stop can cancel the upstream run) */}
{!messagePendingIncomplete && props.isBottom && fromAssistant && messageGenerator?.upstreamHandle && (!!onMessageUpstreamResume || !!onMessageUpstreamDelete) && ( {props.isBottom && fromAssistant && messageGenerator?.upstreamHandle && (!!onMessageUpstreamResume || !!onMessageUpstreamDelete) && (
<BlockOpUpstreamResume <BlockOpUpstreamResume
upstreamHandle={messageGenerator.upstreamHandle} upstreamHandle={messageGenerator.upstreamHandle}
onResume={onMessageUpstreamResume ? handleUpstreamResume : undefined} pending={messagePendingIncomplete}
onResume={(!messagePendingIncomplete && onMessageUpstreamResume) ? handleUpstreamResume : undefined}
onDelete={onMessageUpstreamDelete ? handleUpstreamDelete : undefined} onDelete={onMessageUpstreamDelete ? handleUpstreamDelete : undefined}
/> />
)} )}
@@ -166,9 +166,9 @@ export function AppChatSettingsAI() {
tooltip={<> tooltip={<>
When Claude uses tools like code execution, it may produce text and image files stored in Anthropic&apos;s File API. This setting controls whether Big-AGI should automatically download and embed them in the chat. When Claude uses tools like code execution, it may produce text and image files stored in Anthropic&apos;s File API. This setting controls whether Big-AGI should automatically download and embed them in the chat.
<ul> <ul>
<li><b>Off</b>: keep as references (default).</li> <li><b>Show</b>: keep as references.</li>
<li><b>Inline</b>: download and embed text/images.</li> <li><b>Embed</b>: download and embed text/images (default).</li>
<li><b>Inline + Free</b>: embed, then delete from Anthropic to free storage.</li> <li><b>Embed + Free</b>: embed, then delete from Anthropic to free storage.</li>
</ul> </ul>
Only affects Anthropic models. Only affects Anthropic models.
</>} </>}
+1 -1
View File
@@ -23,7 +23,7 @@ export const Release = {
// this is here to trigger revalidation of data, e.g. models refresh // this is here to trigger revalidation of data, e.g. models refresh
Monotonics: { Monotonics: {
Aix: 67, Aix: 68,
NewsVersion: 204, NewsVersion: 204,
}, },
@@ -5,6 +5,7 @@ import { bareBonesPromptMixer } from '~/modules/persona/pmix/pmix';
import { SystemPurposes } from '../../data'; import { SystemPurposes } from '../../data';
import { BeamStore, createBeamVanillaStore } from '~/modules/beam/store-beam_vanilla'; import { BeamStore, createBeamVanillaStore } from '~/modules/beam/store-beam_vanilla';
import { autoConversationTitle } from '~/modules/aifn/autotitle/autoTitle';
import { useModuleBeamStore } from '~/modules/beam/store-module-beam'; import { useModuleBeamStore } from '~/modules/beam/store-module-beam';
import type { DConversationId } from '~/common/stores/chat/chat.conversation'; import type { DConversationId } from '~/common/stores/chat/chat.conversation';
@@ -275,6 +276,10 @@ export class ConversationHandler {
// close beam // close beam
terminateKeepingSettings(); terminateKeepingSettings();
// auto-title the conversation if enabled (parity with chat-persona flow — fixes #1078)
if (getChatAutoAI().autoTitleChat)
void autoConversationTitle(this.conversationId, false);
}; };
beamOpen(viewHistory, getChatLLMId(), !!destReplaceMessageId, onBeamSuccess); beamOpen(viewHistory, getChatLLMId(), !!destReplaceMessageId, onBeamSuccess);
+2 -1
View File
@@ -175,7 +175,8 @@ export const DModelParameterRegistry = {
label: 'Thinking', label: 'Thinking',
type: 'enum', type: 'enum',
description: 'Enable or disable extended thinking mode.', description: 'Enable or disable extended thinking mode.',
values: ['none', 'high'], values: ['none', 'high', 'max'],
// 'max' is for now DeepSeek V4-specific (reasoning_effort=max); other vendors restrict via enumValues
// undefined means vendor default (usually 'high', i.e. thinking enabled) // undefined means vendor default (usually 'high', i.e. thinking enabled)
}), }),
+1 -1
View File
@@ -49,7 +49,7 @@ export async function autoConversationTitle(conversationId: string, forceReplace
autoTitleLlmId, autoTitleLlmId,
'You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.', 'You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.',
`Analyze the given short conversation (every line is truncated) and extract a concise chat title that summarizes the conversation in as little as a couple of words. `Analyze the given short conversation (every line is truncated) and extract a concise chat title that summarizes the conversation in as little as a couple of words.
Only respond with the lowercase short title and nothing else. Only respond with the short title and nothing else.
\`\`\` \`\`\`
${historyLines.join('\n')} ${historyLines.join('\n')}
@@ -409,11 +409,15 @@ export async function aixCGR_ChatSequence_FromDMessagesOrThrow(
break; break;
case 'ma': case 'ma':
// Preserve reasoning continuity across turns. Two channels, any one is sufficient: // Preserve reasoning continuity across turns. Three channels, any one is sufficient:
// - Anthropic: part.textSignature / part.redactedData (bespoke fields, see Anthropic extended thinking docs) // - Anthropic: part.textSignature / part.redactedData (bespoke fields, see Anthropic extended thinking docs)
// - OpenAI/Gemini: _vnd sidecar (reasoningItem.* / thoughtSignature, generic vendor-state mechanism) // - OpenAI Responses / Gemini: _vnd sidecar (reasoningItem.* / thoughtSignature, opaque continuity handle)
// - DeepSeek V4 (OpenAI chat-completions): plain reasoning text in aText is the payload itself
const oaiReasoning = _vnd?.openai?.reasoningItem; const oaiReasoning = _vnd?.openai?.reasoningItem;
const hasReasoningHandle = aPart.textSignature || aPart.redactedData?.length || oaiReasoning?.encryptedContent || oaiReasoning?.id; const hasReasoningHandle =
(aPart.textSignature || aPart.redactedData?.length)
|| (oaiReasoning?.encryptedContent || oaiReasoning?.id)
|| (aPart.aText && aPart.aType === 'reasoning'); // DeepSeek V4 reasoning in plain text - NOTE: will send LOTS of 'ma' parts (e.g. to Gemini, which doesn't even need them)
if (hasReasoningHandle) { if (hasReasoningHandle) {
const aModelAuxPart = aPart as AixParts_ModelAuxPart; // NOTE: this is a forced cast from readonly string[] to string[], but not a big deal here const aModelAuxPart = aPart as AixParts_ModelAuxPart; // NOTE: this is a forced cast from readonly string[] to string[], but not a big deal here
modelMessage.parts.push(_vnd ? { ...aModelAuxPart, _vnd } : aModelAuxPart); modelMessage.parts.push(_vnd ? { ...aModelAuxPart, _vnd } : aModelAuxPart);
@@ -653,7 +657,7 @@ function _clientCreateAixMetaInReferenceToPart(items: DMetaReferenceItem[]): Aix
export async function clientHotFixGenerateRequest_ApplyAll(llmInterfaces: DLLM['interfaces'], aixChatGenerate: AixAPIChatGenerate_Request, modelName: string): Promise<{ export async function clientHotFixGenerateRequest_ApplyAll(llmInterfaces: DLLM['interfaces'], aixChatGenerate: AixAPIChatGenerate_Request, modelName: string): Promise<{
shallDisableStreaming: boolean; hotfixNoStream: boolean;
workaroundsCount: number; workaroundsCount: number;
}> { }> {
@@ -676,12 +680,12 @@ export async function clientHotFixGenerateRequest_ApplyAll(llmInterfaces: DLLM['
workaroundsCount += await clientHotFixGenerateRequest_ConvertWebP(aixChatGenerate, 'image/jpeg'); workaroundsCount += await clientHotFixGenerateRequest_ConvertWebP(aixChatGenerate, 'image/jpeg');
// Disable streaming for select chat models that don't support it (e.g. o1-preview (old) and o1-2024-12-17) // Disable streaming for select chat models that don't support it (e.g. o1-preview (old) and o1-2024-12-17)
const shallDisableStreaming = llmInterfaces.includes(LLM_IF_HOTFIX_NoStream); const hotfixNoStream = llmInterfaces.includes(LLM_IF_HOTFIX_NoStream);
if (workaroundsCount > 0) if (workaroundsCount > 0)
console.warn(`[DEV] Working around '${modelName}' model limitations: client-side applied ${workaroundsCount} workarounds`); console.warn(`[DEV] Working around '${modelName}' model limitations: client-side applied ${workaroundsCount} workarounds`);
return { shallDisableStreaming, workaroundsCount }; return { hotfixNoStream, workaroundsCount };
} }
@@ -37,7 +37,7 @@ export async function* clientSideChatGenerate(
return dispatch; return dispatch;
}); });
yield* executeChatGenerateWithContinuation(dispatchCreator, streaming, abortSignal, _d); yield* executeChatGenerateWithContinuation(dispatchCreator, abortSignal, _d);
} }
/** /**
@@ -48,7 +48,7 @@ export async function* clientSideReattachUpstream(
access: AixAPI_Access, access: AixAPI_Access,
resumeHandle: AixAPI_ResumeHandle, resumeHandle: AixAPI_ResumeHandle,
context: AixAPI_Context_ChatGenerate, context: AixAPI_Context_ChatGenerate,
streaming: true, streaming: boolean,
connectionOptions: AixAPI_ConnectionOptions_ChatGenerate, connectionOptions: AixAPI_ConnectionOptions_ChatGenerate,
abortSignal: AbortSignal, abortSignal: AbortSignal,
): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> { ): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> {
@@ -56,7 +56,7 @@ export async function* clientSideReattachUpstream(
const _d: AixDebugObject = _createClientDebugConfig(access, connectionOptions, context.name); const _d: AixDebugObject = _createClientDebugConfig(access, connectionOptions, context.name);
const dispatchCreator = () => createChatGenerateResumeDispatch(access, resumeHandle, streaming); const dispatchCreator = () => createChatGenerateResumeDispatch(access, resumeHandle, streaming);
yield * executeChatGenerateWithContinuation(dispatchCreator, streaming, abortSignal, _d); yield * executeChatGenerateWithContinuation(dispatchCreator, abortSignal, _d);
} }
/** /**
+28 -24
View File
@@ -342,7 +342,7 @@ export async function aixChatGenerateText_Simple(
aixContextRef: AixAPI_Context_ChatGenerate['ref'], aixContextRef: AixAPI_Context_ChatGenerate['ref'],
// optional options // optional options
clientOptions?: Partial<AixClientOptions>, // this makes the abortController optional clientOptions?: Partial<AixClientOptions>, // this makes the abortController optional
// optional callback for streaming // optional callback - if provided, streaming is activated
onTextStreamUpdate?: (text: string, isDone: boolean, generator: DMessageGenerator) => MaybePromise<void>, onTextStreamUpdate?: (text: string, isDone: boolean, generator: DMessageGenerator) => MaybePromise<void>,
): Promise<string> { ): Promise<string> {
@@ -363,14 +363,13 @@ export async function aixChatGenerateText_Simple(
// Aix Context // Aix Context
const aixContext = aixCreateChatGenerateContext(aixContextName, aixContextRef); const aixContext = aixCreateChatGenerateContext(aixContextName, aixContextRef);
// Aix Streaming - implicit if the callback is provided // Caller streaming preference - implicit: stream if a callback is provided
let aixStreaming = !!onTextStreamUpdate; const callerStreaming = !!onTextStreamUpdate;
// Client-side late stage model HotFixes // Client-side late stage model HotFixes
const { shallDisableStreaming } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id); const { hotfixNoStream } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id);
if (shallDisableStreaming || aixModel.forceNoStream) const wireStreaming = !hotfixNoStream && !aixModel.forceNoStream ? callerStreaming : false;
aixStreaming = false;
// Variable to store the final text // Variable to store the final text
@@ -398,11 +397,11 @@ export async function aixChatGenerateText_Simple(
aixModel, aixModel,
aixChatGenerate, aixChatGenerate,
aixContext, aixContext,
aixStreaming, wireStreaming,
state.generator, state.generator,
abortSignal, abortSignal,
clientOptions?.throttleParallelThreads ?? 0, clientOptions?.throttleParallelThreads ?? 0,
!aixStreaming ? undefined : async (ll: AixChatGenerateContent_LL, _isDone: boolean /* we want to issue this, in case the next action is an exception */) => { !onTextStreamUpdate ? undefined : async (ll: AixChatGenerateContent_LL, _isDone: boolean /* we want to issue this, in case the next action is an exception */) => {
_llToL2Simple(ll, state); _llToL2Simple(ll, state);
if (onTextStreamUpdate && state.text !== null) if (onTextStreamUpdate && state.text !== null)
await onTextStreamUpdate(state.text, false, state.generator); await onTextStreamUpdate(state.text, false, state.generator);
@@ -521,7 +520,7 @@ type _AixChatGenerateContent_DMessageGuts_WithOutcome = AixChatGenerateContent_D
* @param llmId - ID of the Language Model to use * @param llmId - ID of the Language Model to use
* @param aixChatGenerate - Multi-modal chat generation request specifics, including Tools and high-level metadata * @param aixChatGenerate - Multi-modal chat generation request specifics, including Tools and high-level metadata
* @param aixContext - Information about how this chat generation is being used * @param aixContext - Information about how this chat generation is being used
* @param aixStreaming - Whether to use streaming for generation * @param aixStreaming - Caller's wire-streaming preference. Subject to override by model/hotfix constraints, or dispatch constraints
* @param clientOptions - Client options for the operation * @param clientOptions - Client options for the operation
* @param onStreamingUpdate - Optional callback for streaming updates * @param onStreamingUpdate - Optional callback for streaming updates
* *
@@ -551,10 +550,9 @@ export async function aixChatGenerateContent_DMessage_orThrow<TServiceSettings e
vndAntTransformInlineFiles: aixAccess.dialect === 'anthropic' ? getVndAntInlineFiles() : undefined, vndAntTransformInlineFiles: aixAccess.dialect === 'anthropic' ? getVndAntInlineFiles() : undefined,
}); });
// Client-side late stage model HotFixes // Client-side late stage model HotFixes - collapse the caller's requested streaming preference into the effective wire-streaming decision after constraints (hotfix gate, model.forceNoStream)
const { shallDisableStreaming } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id); const { hotfixNoStream } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id);
if (shallDisableStreaming || aixModel.forceNoStream) const wireStreaming = !hotfixNoStream && !aixModel.forceNoStream ? aixStreaming : false;
aixStreaming = false;
// Legacy Note: awaited OpenAI moderation check was removed (was only on this codepath) // Legacy Note: awaited OpenAI moderation check was removed (was only on this codepath)
@@ -584,7 +582,7 @@ export async function aixChatGenerateContent_DMessage_orThrow<TServiceSettings e
aixModel, aixModel,
aixChatGenerate, aixChatGenerate,
aixContext, aixContext,
aixStreaming, wireStreaming,
dMessage.generator, dMessage.generator,
clientOptions.abortSignal, clientOptions.abortSignal,
clientOptions.throttleParallelThreads ?? 0, clientOptions.throttleParallelThreads ?? 0,
@@ -753,7 +751,7 @@ export type AixChatGenerateTerminal_LL = 'completed' | 'aborted' | 'failed';
* *
* Contract: * Contract:
* - empty fragments means no content yet, and no error * - empty fragments means no content yet, and no error
* - aixStreaming hints the source, but can be respected or not * - wireStreaming hints the wire transport (SSE vs single response), but can be respected or not by the dispatch (e.g. SSE-only APIs ignore a `false` value)
* - onReassemblyUpdate is optional, you can ignore the updates and await the final result * - onReassemblyUpdate is optional, you can ignore the updates and await the final result
* - errors become Error fragments, and they can be dialect-sent, dispatch-excepts, client-read issues or even user aborts * - errors become Error fragments, and they can be dialect-sent, dispatch-excepts, client-read issues or even user aborts
* - DOES NOT THROW, but the final accumulator may contain error fragments * - DOES NOT THROW, but the final accumulator may contain error fragments
@@ -772,7 +770,7 @@ export type AixChatGenerateTerminal_LL = 'completed' | 'aborted' | 'failed';
* - special parts include 'In Reference To' (a decorator of messages) * - special parts include 'In Reference To' (a decorator of messages)
* - other special parts include the Anthropic Caching hints, on select message * - other special parts include the Anthropic Caching hints, on select message
* @param aixContext specifies the scope of the caller, such as what's the high level objective of this call * @param aixContext specifies the scope of the caller, such as what's the high level objective of this call
* @param aixStreaming requests the source to provide incremental updates * @param wireStreaming the effective wire-level streaming decision (already collapsed from caller preference + model/hotfix constraints); drives tRPC `streaming` field and downstream dispatch body shape
* @param initialGenerator generator initial value, which will be updated for every new piece of information received * @param initialGenerator generator initial value, which will be updated for every new piece of information received
* @param abortSignal allows the caller to stop the operation * @param abortSignal allows the caller to stop the operation
* @param throttleParallelThreads allows the caller to limit the number of parallel threads * @param throttleParallelThreads allows the caller to limit the number of parallel threads
@@ -790,7 +788,7 @@ async function _aixChatGenerateContent_LL(
aixModel: AixAPI_Model, aixModel: AixAPI_Model,
aixChatGenerate: AixAPIChatGenerate_Request, aixChatGenerate: AixAPIChatGenerate_Request,
aixContext: AixAPI_Context_ChatGenerate, aixContext: AixAPI_Context_ChatGenerate,
aixStreaming: boolean, wireStreaming: boolean,
// others // others
initialGenerator: DMessageGenerator, initialGenerator: DMessageGenerator,
abortSignal: AbortSignal, abortSignal: AbortSignal,
@@ -804,10 +802,13 @@ async function _aixChatGenerateContent_LL(
const inspectorTransport = !inspectorEnabled ? undefined : aixAccess.clientSideFetch ? 'csf' : 'trpc'; const inspectorTransport = !inspectorEnabled ? undefined : aixAccess.clientSideFetch ? 'csf' : 'trpc';
const inspectorContext = !inspectorEnabled ? undefined : { contextName: aixContext.name, contextRef: aixContext.ref }; const inspectorContext = !inspectorEnabled ? undefined : { contextName: aixContext.name, contextRef: aixContext.ref };
// [DEV] Inspector - request body override // Inspector - override request body
const requestBodyOverrideJson = inspectorEnabled && aixClientDebuggerGetRBO(); const requestBodyOverrideJson = inspectorEnabled && aixClientDebuggerGetRBO();
const debugRequestBodyOverride = !requestBodyOverrideJson ? false : JSON.parse(requestBodyOverrideJson); const debugRequestBodyOverride = !requestBodyOverrideJson ? false : JSON.parse(requestBodyOverrideJson);
// Inspector - force disable streaming (note: dispatches may still override this)
if (getAixDebuggerNoStreaming()) wireStreaming = false;
/** /**
* FIXME: implement client selection of resumability - aixAccess option? * FIXME: implement client selection of resumability - aixAccess option?
* NOTE: for Gemini Deep Research, it's on by default, so both auto-reattach on network breaks (currently disabled) * NOTE: for Gemini Deep Research, it's on by default, so both auto-reattach on network breaks (currently disabled)
@@ -827,8 +828,11 @@ async function _aixChatGenerateContent_LL(
// [CSF] Pre-load client-side executor if needed - type inference works here, no need to type // [CSF] Pre-load client-side executor if needed - type inference works here, no need to type
let clientSideChatGenerate; let clientSideChatGenerate;
let clientSideReattachUpstream; let clientSideReattachUpstream;
if (aixAccess.clientSideFetch) if (aixAccess.clientSideFetch) {
({ clientSideChatGenerate, clientSideReattachUpstream } = await _loadCsfModuleOrThrow()); const csf = await _loadCsfModuleOrThrow();
clientSideChatGenerate = csf.clientSideChatGenerate;
clientSideReattachUpstream = csf.clientSideReattachUpstream;
}
// Client-side particle transforms: // Client-side particle transforms:
@@ -891,7 +895,7 @@ async function _aixChatGenerateContent_LL(
aixModel, aixModel,
aixChatGenerate, aixChatGenerate,
aixContext, aixContext,
getAixDebuggerNoStreaming() ? false : aixStreaming, wireStreaming,
aixConnectionOptions, aixConnectionOptions,
abortSignal, abortSignal,
) : ) :
@@ -901,7 +905,7 @@ async function _aixChatGenerateContent_LL(
model: aixModel, model: aixModel,
chatGenerate: aixChatGenerate, chatGenerate: aixChatGenerate,
context: aixContext, context: aixContext,
streaming: getAixDebuggerNoStreaming() ? false : aixStreaming, // [DEV] disable streaming if set in the UX (testing) streaming: wireStreaming,
connectionOptions: aixConnectionOptions, connectionOptions: aixConnectionOptions,
}, { signal: abortSignal }) }, { signal: abortSignal })
@@ -912,7 +916,7 @@ async function _aixChatGenerateContent_LL(
aixAccess, aixAccess,
accumulator_LL.generator.upstreamHandle, accumulator_LL.generator.upstreamHandle,
aixContext, aixContext,
true, // streaming - reattach is only validated for streaming for now wireStreaming,
aixConnectionOptions, aixConnectionOptions,
abortSignal, abortSignal,
) : ) :
@@ -921,7 +925,7 @@ async function _aixChatGenerateContent_LL(
access: aixAccess, access: aixAccess,
upstreamHandle: accumulator_LL.generator.upstreamHandle, upstreamHandle: accumulator_LL.generator.upstreamHandle,
context: aixContext, context: aixContext,
streaming: true, streaming: wireStreaming,
connectionOptions: aixConnectionOptions, connectionOptions: aixConnectionOptions,
}, { signal: abortSignal }) }, { signal: abortSignal })
+3 -3
View File
@@ -30,7 +30,7 @@ export const aixRouter = createTRPCRouter({
const _d = _createDebugConfig(input.access, input.connectionOptions, input.context.name); const _d = _createDebugConfig(input.access, input.connectionOptions, input.context.name);
const dispatchCreator = () => createChatGenerateDispatch(input.access, input.model, input.chatGenerate, input.streaming, !!input.connectionOptions?.enableResumability); const dispatchCreator = () => createChatGenerateDispatch(input.access, input.model, input.chatGenerate, input.streaming, !!input.connectionOptions?.enableResumability);
yield* executeChatGenerateWithContinuation(dispatchCreator, input.streaming, ctx.reqSignal, _d); yield* executeChatGenerateWithContinuation(dispatchCreator, ctx.reqSignal, _d);
}), }),
/** /**
@@ -42,14 +42,14 @@ export const aixRouter = createTRPCRouter({
access: AixWire_API.Access_schema, access: AixWire_API.Access_schema,
upstreamHandle: AixWire_API.UpstreamHandle_schema, // reattach uses a handle instead of 'model + chatGenerate' upstreamHandle: AixWire_API.UpstreamHandle_schema, // reattach uses a handle instead of 'model + chatGenerate'
context: AixWire_API.ContextChatGenerate_schema, context: AixWire_API.ContextChatGenerate_schema,
streaming: z.literal(true), // reattach is always streaming streaming: z.boolean(),
connectionOptions: AixWire_API.ConnectionOptionsChatGenerate_schema.pick({ debugDispatchRequest: true }).optional(), // debugDispatchRequest connectionOptions: AixWire_API.ConnectionOptionsChatGenerate_schema.pick({ debugDispatchRequest: true }).optional(), // debugDispatchRequest
})) }))
.mutation(async function* ({ input, ctx }) { .mutation(async function* ({ input, ctx }) {
const _d = _createDebugConfig(input.access, input.connectionOptions, input.context.name); const _d = _createDebugConfig(input.access, input.connectionOptions, input.context.name);
const dispatchCreator = () => createChatGenerateResumeDispatch(input.access, input.upstreamHandle, input.streaming); const dispatchCreator = () => createChatGenerateResumeDispatch(input.access, input.upstreamHandle, input.streaming);
yield* executeChatGenerateWithContinuation(dispatchCreator, input.streaming, ctx.reqSignal, _d); yield* executeChatGenerateWithContinuation(dispatchCreator, ctx.reqSignal, _d);
}), }),
/** /**
@@ -37,6 +37,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
const chatGenerate = aixSpillSystemToUser(_chatGenerate); const chatGenerate = aixSpillSystemToUser(_chatGenerate);
// Dialect incompatibilities -> Hotfixes // Dialect incompatibilities -> Hotfixes
// [DeepSeek, 2026-04-24] V4 doesn't require strict alternation but we keep coalescing for cleanliness; the reducer only merges assistant/user, tool messages stay separate (parallel tool_calls).
const hotFixAlternateUserAssistantRoles = openAIDialect === 'deepseek' || openAIDialect === 'perplexity'; const hotFixAlternateUserAssistantRoles = openAIDialect === 'deepseek' || openAIDialect === 'perplexity';
const hotFixRemoveEmptyMessages = openAIDialect === 'moonshot' || openAIDialect === 'perplexity'; // [Moonshot, 2026-02-10] consecutive assistant messages (empty + content) break Moonshot - coalesce to fix const hotFixRemoveEmptyMessages = openAIDialect === 'moonshot' || openAIDialect === 'perplexity'; // [Moonshot, 2026-02-10] consecutive assistant messages (empty + content) break Moonshot - coalesce to fix
const hotFixRemoveStreamOptions = openAIDialect === 'azure' || openAIDialect === 'mistral'; const hotFixRemoveStreamOptions = openAIDialect === 'azure' || openAIDialect === 'mistral';
@@ -59,7 +60,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
throw new Error('This service does not support function calls'); throw new Error('This service does not support function calls');
// Convert the chat messages to the OpenAI 4-Messages format // Convert the chat messages to the OpenAI 4-Messages format
let chatMessages = _toOpenAIMessages(chatGenerate.systemMessage, chatGenerate.chatSequence, hotFixOpenAIOFamily); let chatMessages = _toOpenAIMessages(openAIDialect, chatGenerate.systemMessage, chatGenerate.chatSequence, hotFixOpenAIOFamily);
// Apply hotfixes // Apply hotfixes
@@ -69,6 +70,13 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
if (hotFixAlternateUserAssistantRoles) if (hotFixAlternateUserAssistantRoles)
chatMessages = _fixAlternateUserAssistantRoles(chatMessages); chatMessages = _fixAlternateUserAssistantRoles(chatMessages);
// [DeepSeek, 2026-04-24] When tools are present and thinking isn't disabled, V4 demands reasoning_content on EVERY assistant message in history
// Inject '' placeholder where missing; real reasoning is attached by _toOpenAIMessages
if (openAIDialect === 'deepseek' && chatGenerate.tools?.length)
for (const m of chatMessages)
if (m.role === 'assistant' && m.reasoning_content === undefined)
m.reasoning_content = '';
// constrained output modes - both JSON and tool invocations // constrained output modes - both JSON and tool invocations
// const strictJsonOutput = !!model.strictJsonOutput; // const strictJsonOutput = !!model.strictJsonOutput;
@@ -145,18 +153,23 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
&& openAIDialect !== 'deepseek' && openAIDialect !== 'moonshot' && openAIDialect !== 'zai' // MoonShot maps to none->disabled / high->enabled && openAIDialect !== 'deepseek' && openAIDialect !== 'moonshot' && openAIDialect !== 'zai' // MoonShot maps to none->disabled / high->enabled
&& openAIDialect !== 'perplexity' // Perplexity has its own block below with stricter validation && openAIDialect !== 'perplexity' // Perplexity has its own block below with stricter validation
) { ) {
if (reasoningEffort === 'max') // domain validation // for: 'alibaba' | 'azure' | 'groq' | 'lmstudio' | 'localai' | 'mistral' | 'openai' | 'openpipe' | 'togetherai' | 'xai'
throw new Error(`OpenAI ChatCompletions API does not support '${reasoningEffort}' reasoning effort`);
payload.reasoning_effort = reasoningEffort; payload.reasoning_effort = reasoningEffort;
} }
// [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now) // [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now)
// [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode // [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode
// [DeepSeek, 2026-04-23] V4 thinking control https://api-docs.deepseek.com/guides/thinking_mode
if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) { if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) {
if (reasoningEffort !== 'none' && reasoningEffort !== 'high') // domain validation const allowedEffort = openAIDialect === 'deepseek' ? ['none', 'high', 'max'] : ['none', 'high'];
throw new Error(`${openAIDialect} only supports reasoning effort 'none' or 'high', got '${reasoningEffort}'`); if (!allowedEffort.includes(reasoningEffort)) // domain validation
throw new Error(`${openAIDialect} only supports reasoning effort ${allowedEffort.join(', ')}, got '${reasoningEffort}'`);
payload.thinking = { type: reasoningEffort === 'none' ? 'disabled' : 'enabled' }; payload.thinking = { type: reasoningEffort !== 'none' ? 'enabled' : 'disabled' };
// [DeepSeek, 2026-04-23] DeepSeek also supports effort control for reasoning-enabled requests - set it here as it was carved from the reasoningEffort setter before
if (openAIDialect === 'deepseek' && reasoningEffort !== 'none')
payload.reasoning_effort = reasoningEffort;
} }
@@ -348,19 +361,23 @@ function _fixAlternateUserAssistantRoles(chatMessages: TRequestMessages): TReque
}; };
} }
// if the current item has the same role as the last item, concatenate their content // If current item has the same role as the last, coalesce ONLY assistant/user.
// Tool/system/developer must stay separate - tool messages each pair with a tool_call_id; merging corrupts the protocol.
if (acc.length > 0) { if (acc.length > 0) {
const lastItem = acc[acc.length - 1]; const lastItem = acc[acc.length - 1];
if (lastItem.role === historyItem.role) { if (lastItem.role === historyItem.role) {
if (lastItem.role === 'assistant') { if (lastItem.role === 'assistant') {
lastItem.content += hotFixSquashTextSeparator + historyItem.content; lastItem.content += hotFixSquashTextSeparator + historyItem.content;
} else if (lastItem.role === 'user') { return acc;
}
if (lastItem.role === 'user') {
lastItem.content = [ lastItem.content = [
...(Array.isArray(lastItem.content) ? lastItem.content : [OpenAIWire_ContentParts.TextContentPart(lastItem.content)]), ...(Array.isArray(lastItem.content) ? lastItem.content : [OpenAIWire_ContentParts.TextContentPart(lastItem.content)]),
...(Array.isArray(historyItem.content) ? historyItem.content : historyItem.content ? [OpenAIWire_ContentParts.TextContentPart(historyItem.content)] : []), ...(Array.isArray(historyItem.content) ? historyItem.content : historyItem.content ? [OpenAIWire_ContentParts.TextContentPart(historyItem.content)] : []),
]; ];
return acc;
} }
return acc; // fall through to push for tool/system/developer - each stays its own message
} }
} }
@@ -442,7 +459,10 @@ function _fixVndOaiRestoreMarkdown_Inline(payload: TRequest) {
}*/ }*/
function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chatSequence: AixMessages_ChatMessage[], hotFixOpenAIo1Family: boolean): TRequestMessages { function _toOpenAIMessages(openAIDialect: OpenAIDialects, systemMessage: AixMessages_SystemMessage | null, chatSequence: AixMessages_ChatMessage[], hotFixOpenAIo1Family: boolean): TRequestMessages {
// [DeepSeek, 2026-04-24] V4 thinking-by-default - reasoning_content must round-trip on tool-call turns; payload is the 'ma' part's aText (unlike Gemini/OpenAI-Responses which carry opaque handles).
const echoDeepseekReasoning = openAIDialect === 'deepseek';
// Transform the chat messages into OpenAI's format (an array of 'system', 'user', 'assistant', and 'tool' messages) // Transform the chat messages into OpenAI's format (an array of 'system', 'user', 'assistant', and 'tool' messages)
const chatMessages: TRequestMessages = []; const chatMessages: TRequestMessages = [];
@@ -555,6 +575,8 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
break; break;
case 'model': case 'model':
// Accumulate 'ma' reasoning text across this turn; echoed below onto the assistant message if it carries tool_calls (DeepSeek only).
let pendingReasoningText = '';
for (const part of parts) { for (const part of parts) {
const currentMessage = chatMessages[chatMessages.length - 1]; const currentMessage = chatMessages[chatMessages.length - 1];
switch (part.pt) { switch (part.pt) {
@@ -630,7 +652,9 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
break; break;
case 'ma': case 'ma':
// ignore this thinking block - Anthropic only // [DeepSeek only] accumulate reasoning text for the echo-back below. Other dialects ignore 'ma' (reasoning continuity flows via _vnd opaque handles, not via this adapter).
if (echoDeepseekReasoning && part.aType === 'reasoning' && part.aText)
pendingReasoningText += part.aText;
break; break;
case 'tool_response': case 'tool_response':
@@ -651,6 +675,18 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
} }
} }
// [DeepSeek] attach accumulated reasoning to this turn's assistant message only if it carries tool_calls; plain-text turns don't need the echo per docs.
if (echoDeepseekReasoning && pendingReasoningText) {
for (let i = chatMessages.length - 1; i >= 0; i--) {
const m = chatMessages[i];
if (m.role !== 'assistant') continue;
if (m.tool_calls?.length)
m.reasoning_content = pendingReasoningText;
break; // stop at the most recent assistant message from this turn
}
}
break; break;
} }
} }
@@ -55,7 +55,6 @@ export class DispatchContinuationSignal extends Error {
*/ */
export async function* executeChatGenerateWithContinuation( export async function* executeChatGenerateWithContinuation(
dispatchCreatorFn: () => Promise<ChatGenerateDispatch>, dispatchCreatorFn: () => Promise<ChatGenerateDispatch>,
streaming: boolean,
abortSignal: AbortSignal, abortSignal: AbortSignal,
_d: AixDebugObject, _d: AixDebugObject,
): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> { ): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> {
@@ -65,7 +64,7 @@ export async function* executeChatGenerateWithContinuation(
for (let turn = 0; turn <= MAX_CONTINUATION_TURNS; turn++) { for (let turn = 0; turn <= MAX_CONTINUATION_TURNS; turn++) {
try { try {
yield* executeChatGenerateWithOperationRetry(currentCreator, streaming, abortSignal, _d); yield* executeChatGenerateWithOperationRetry(currentCreator, abortSignal, _d);
return; // normal completion return; // normal completion
} catch (error) { } catch (error) {
@@ -25,7 +25,7 @@ import { createAnthropicFileInlineTransform } from './parsers/anthropic.transfor
import { createAnthropicMessageParser, createAnthropicMessageParserNS } from './parsers/anthropic.parser'; import { createAnthropicMessageParser, createAnthropicMessageParserNS } from './parsers/anthropic.parser';
import { createBedrockConverseParserNS, createBedrockConverseStreamParser } from './parsers/bedrock-converse.parser'; import { createBedrockConverseParserNS, createBedrockConverseStreamParser } from './parsers/bedrock-converse.parser';
import { createGeminiGenerateContentResponseParser } from './parsers/gemini.parser'; import { createGeminiGenerateContentResponseParser } from './parsers/gemini.parser';
import { createGeminiInteractionsParser } from './parsers/gemini.interactions.parser'; import { createGeminiInteractionsParserSSE } from './parsers/gemini.interactions.parser';
import { createOpenAIChatCompletionsChunkParser, createOpenAIChatCompletionsParserNS } from './parsers/openai.parser'; import { createOpenAIChatCompletionsChunkParser, createOpenAIChatCompletionsParserNS } from './parsers/openai.parser';
import { createOpenAIResponseParserNS, createOpenAIResponsesEventParser } from './parsers/openai.responses.parser'; import { createOpenAIResponseParserNS, createOpenAIResponsesEventParser } from './parsers/openai.responses.parser';
@@ -37,7 +37,8 @@ export type ChatGenerateDispatch = {
/** Used by dialects that need multi-step I/O. The returned response is consumed normally via demuxerFormat/chatGenerateParse */ /** Used by dialects that need multi-step I/O. The returned response is consumed normally via demuxerFormat/chatGenerateParse */
customConnect?: (signal: AbortSignal) => Promise<Response>; customConnect?: (signal: AbortSignal) => Promise<Response>;
bodyTransform?: AixDemuxers.StreamBodyTransform; bodyTransform?: AixDemuxers.StreamBodyTransform;
demuxerFormat: AixDemuxers.StreamDemuxerFormat; /** Source of truth for the consumer mode: null = NS */
demuxerFormat: null | AixDemuxers.StreamDemuxerFormat;
chatGenerateParse: ChatGenerateParseFunction; chatGenerateParse: ChatGenerateParseFunction;
particleTransform?: ChatGenerateParticleTransformFunction; particleTransform?: ChatGenerateParticleTransformFunction;
}; };
@@ -173,6 +174,7 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
// [Gemini Interactions API - ALPHA TEST] SSE-native: POST with stream=true, upstream returns event-stream we pipe through the fast-sse demuxer. // [Gemini Interactions API - ALPHA TEST] SSE-native: POST with stream=true, upstream returns event-stream we pipe through the fast-sse demuxer.
if (model.vndGeminiAPI === 'interactions-agent') { if (model.vndGeminiAPI === 'interactions-agent') {
if (!streaming) console.warn(`[DEV] Gemini Interactions API - only supported in SSE mode, ignoring streaming=false for model ${model.id}`);
const request: ChatGenerateDispatchRequest = { const request: ChatGenerateDispatchRequest = {
...geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.postPath, false), ...geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.postPath, false),
method: 'POST', method: 'POST',
@@ -186,8 +188,9 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
if (signal.aborted) throw error; // preserve abort identity for the executor's abort classifier if (signal.aborted) throw error; // preserve abort identity for the executor's abort classifier
throw new Error(`Gemini Interactions POST: ${error?.message || 'upstream error'}`); // rewrapping TRPCFetcherError as plain Error makes the retrier treat it as non-retryable throw new Error(`Gemini Interactions POST: ${error?.message || 'upstream error'}`); // rewrapping TRPCFetcherError as plain Error makes the retrier treat it as non-retryable
}), }),
/** Upstream hardcodes stream=true + background=true (required by deep-research agents) and has no non-streaming alternative. */
demuxerFormat: 'fast-sse', demuxerFormat: 'fast-sse',
chatGenerateParse: createGeminiInteractionsParser(requestedModelName), chatGenerateParse: createGeminiInteractionsParserSSE(requestedModelName),
}; };
} }
@@ -323,11 +326,13 @@ export async function createChatGenerateResumeDispatch(access: AixAPI_Access, re
// [Gemini Interactions] Reattach via SSE stream - GET /interactions/{id}?stream=true replays all events from the start (intentional - client's ContentReassembler replaces message content on reattach; partial resume via last_event_id is deliberately NOT used). // [Gemini Interactions] Reattach via SSE stream - GET /interactions/{id}?stream=true replays all events from the start (intentional - client's ContentReassembler replaces message content on reattach; partial resume via last_event_id is deliberately NOT used).
if (resumeHandle.uht !== 'vnd.gem.interactions') if (resumeHandle.uht !== 'vnd.gem.interactions')
throw new Error(`Resume handle mismatch for gemini: expected 'vnd.gem.interactions', got '${resumeHandle.uht}'`); throw new Error(`Resume handle mismatch for gemini: expected 'vnd.gem.interactions', got '${resumeHandle.uht}'`);
if (!streaming) console.warn(`[DEV] Gemini Interactions API - Resume only supported in SSE mode, ignoring streaming=false for ${resumeHandle.runId}`);
const { url: _baseUrl, headers: _headers } = geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.getPath(resumeHandle.runId /* Gemini interaction.id */), false); const { url: _baseUrl, headers: _headers } = geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.getPath(resumeHandle.runId /* Gemini interaction.id */), false);
return { return {
request: { url: `${_baseUrl}${_baseUrl.includes('?') ? '&' : '?'}stream=true`, method: 'GET', headers: _headers }, request: { url: `${_baseUrl}${_baseUrl.includes('?') ? '&' : '?'}stream=true`, method: 'GET', headers: _headers },
/** Again, only support SSE here, for now (see comment in `createChatGenerateDispatch`) */
demuxerFormat: 'fast-sse', demuxerFormat: 'fast-sse',
chatGenerateParse: createGeminiInteractionsParser(null /* model name unknown at resume time - caller's DMessage already has it */), chatGenerateParse: createGeminiInteractionsParserSSE(null /* model name unknown at resume time - caller's DMessage already has it */),
}; };
} }
@@ -393,6 +398,21 @@ export async function executeChatGenerateDelete(access: AixAPI_Access, handle: A
case 'gemini': case 'gemini':
if (handle.uht !== 'vnd.gem.interactions') if (handle.uht !== 'vnd.gem.interactions')
throw new Error(`Delete handle mismatch for gemini: expected 'vnd.gem.interactions', got '${handle.uht}'`); throw new Error(`Delete handle mismatch for gemini: expected 'vnd.gem.interactions', got '${handle.uht}'`);
// Gemini: cancel the background run first (stops token generation), then DELETE the stored record.
// The DELETE endpoint only removes the resource; it does NOT cancel an in-flight run.
// Cancel may 404 "Method not found" on the Developer API (API-key mode, googleapis/python-genai#1971) -
// we log the outcome and proceed to DELETE so local cleanup still happens.
const { url: cancelUrl, headers: cancelHeaders } = geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.cancelPath(handle.runId), false);
try {
const cancelResp = await fetchResponseOrTRPCThrow({ url: cancelUrl, method: 'POST', body: {}, headers: cancelHeaders, signal: abortSignal, name: 'Aix.Gemini.Interactions.cancel', throwWithoutName: true });
console.log(`[AIX] Gemini.Interactions.cancel: ok=${cancelResp.ok} status=${cancelResp.status}`);
} catch (error: any) {
if (abortSignal.aborted) throw error;
const status = error instanceof TRPCFetcherError ? error.httpStatus : undefined;
console.log(`[AIX] Gemini.Interactions.cancel: failed status=${status ?? '?'} msg=${error?.message ?? 'unknown'}`);
}
({ url, headers } = geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.deletePath(handle.runId), false)); ({ url, headers } = geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.deletePath(handle.runId), false));
name = 'Aix.Gemini.Interactions.delete'; name = 'Aix.Gemini.Interactions.delete';
break; break;
@@ -26,7 +26,6 @@ import { heartbeatsWhileAwaiting } from '../heartbeatsWhileAwaiting';
*/ */
export async function* executeChatGenerateDispatch( export async function* executeChatGenerateDispatch(
dispatchCreatorFn: () => Promise<ChatGenerateDispatch>, dispatchCreatorFn: () => Promise<ChatGenerateDispatch>,
streaming: boolean,
intakeAbortSignal: AbortSignal, intakeAbortSignal: AbortSignal,
_d: AixDebugObject, _d: AixDebugObject,
parseContext?: { retriesAvailable: boolean }, parseContext?: { retriesAvailable: boolean },
@@ -59,7 +58,7 @@ export async function* executeChatGenerateDispatch(
const innerStream = (async function* () { const innerStream = (async function* () {
// Consume dispatch response // Consume dispatch response
if (!streaming) if (dispatch.demuxerFormat === null /* NS */)
yield* _consumeDispatchUnified(dispatchResponse, dispatch.chatGenerateParse, chatGenerateTx, _d, parseContext); yield* _consumeDispatchUnified(dispatchResponse, dispatch.chatGenerateParse, chatGenerateTx, _d, parseContext);
else else
yield* _consumeDispatchStream(dispatchResponse, dispatch.bodyTransform ?? null, dispatch.demuxerFormat, dispatch.chatGenerateParse, chatGenerateTx, _d, parseContext); yield* _consumeDispatchStream(dispatchResponse, dispatch.bodyTransform ?? null, dispatch.demuxerFormat, dispatch.chatGenerateParse, chatGenerateTx, _d, parseContext);
@@ -44,7 +44,6 @@ export class OperationRetrySignal extends Error {
*/ */
export async function* executeChatGenerateWithOperationRetry( export async function* executeChatGenerateWithOperationRetry(
dispatchCreatorFn: () => Promise<ChatGenerateDispatch>, dispatchCreatorFn: () => Promise<ChatGenerateDispatch>,
streaming: boolean,
abortSignal: AbortSignal, abortSignal: AbortSignal,
_d: AixDebugObject, _d: AixDebugObject,
): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> { ): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> {
@@ -55,7 +54,7 @@ export async function* executeChatGenerateWithOperationRetry(
while (true) { while (true) {
try { try {
yield* executeChatGenerateDispatch(dispatchCreatorFn, streaming, abortSignal, _d, { yield* executeChatGenerateDispatch(dispatchCreatorFn, abortSignal, _d, {
retriesAvailable: attemptNumber < maxAttempts, retriesAvailable: attemptNumber < maxAttempts,
}); });
@@ -5,6 +5,7 @@ import type { ChatGenerateParseFunction } from '../chatGenerate.dispatch';
import type { IParticleTransmitter } from './IParticleTransmitter'; import type { IParticleTransmitter } from './IParticleTransmitter';
import { GeminiInteractionsWire_API_Interactions } from '../../wiretypes/gemini.interactions.wiretypes'; import { GeminiInteractionsWire_API_Interactions } from '../../wiretypes/gemini.interactions.wiretypes';
import { IssueSymbols } from '../ChatGenerateTransmitter';
import { geminiConvertPCM2WAV } from './gemini.audioutils'; import { geminiConvertPCM2WAV } from './gemini.audioutils';
@@ -44,7 +45,7 @@ type BlockState = {
* the cursor (or from start if omitted). Our parser is position-idempotent within a single run * the cursor (or from start if omitted). Our parser is position-idempotent within a single run
* because the transmitter's state carries across events. * because the transmitter's state carries across events.
*/ */
export function createGeminiInteractionsParser(requestedModelName: string | null): ChatGenerateParseFunction { export function createGeminiInteractionsParserSSE(requestedModelName: string | null): ChatGenerateParseFunction {
const parserCreationTimestamp = Date.now(); const parserCreationTimestamp = Date.now();
let timeToFirstContent: number | undefined; let timeToFirstContent: number | undefined;
@@ -218,11 +219,16 @@ export function createGeminiInteractionsParser(requestedModelName: string | null
} }
case 'error': case 'error':
// Observed mid-stream with an empty payload between content blocks - non-fatal, the stream // Two observed shapes:
// continues with further events and eventually an interaction.complete. Silent-skip empty // 1) Empty payload mid-stream (Beta noise): the stream continues with further events and
// payloads (Beta noise); warn only when actual error info is present. // eventually an interaction.complete - silent-skip.
if (event.error?.message || event.error?.code) // 2) Populated payload with message/code: terminal upstream error (also how Gemini reports
console.warn('[GeminiInteractions] SSE error event:', event.error); // cancelled interactions: HTTP 500 to the cancel call + an error SSE on the stream).
// Surface as a dialect-terminating issue so the UI renders it and the stream ends cleanly.
if (event.error?.message || event.error?.code) {
const errorText = `${event.error.code ? `${event.error.code}: ` : ''}${event.error.message || 'Upstream error.'}`;
pt.setDialectTerminatingIssue(errorText, IssueSymbols.Generic, 'srv-warn');
}
break; break;
default: { default: {
@@ -494,6 +494,10 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
} else if (message.content !== undefined && message.content !== null) } else if (message.content !== undefined && message.content !== null)
throw new Error(`unexpected message content type: ${typeof message.content}`); throw new Error(`unexpected message content type: ${typeof message.content}`);
// [DeepSeek, 2026-04-24] Non-streaming reasoning_content -> 'ma' reasoning part (mirror of streaming path above)
if (typeof message.reasoning_content === 'string' && message.reasoning_content)
pt.appendReasoningText(message.reasoning_content);
// [OpenRouter, 2025-01-20] Handle structured reasoning_details // [OpenRouter, 2025-01-20] Handle structured reasoning_details
if (Array.isArray(message.reasoning_details)) { if (Array.isArray(message.reasoning_details)) {
for (const reasoningDetail of message.reasoning_details) { for (const reasoningDetail of message.reasoning_details) {
@@ -21,7 +21,7 @@ export namespace AixDemuxers {
* - 'fast-sse' is our own parser, optimized for performance. to be preferred when possible over 'sse' (check for full compatibility with the upstream) * - 'fast-sse' is our own parser, optimized for performance. to be preferred when possible over 'sse' (check for full compatibility with the upstream)
* - 'json-nl' is used by Ollama * - 'json-nl' is used by Ollama
*/ */
export type StreamDemuxerFormat = 'fast-sse' | 'json-nl' | null; export type StreamDemuxerFormat = 'fast-sse' | 'json-nl';
/** /**
@@ -34,8 +34,8 @@ export namespace AixDemuxers {
return createFastEventSourceDemuxer(); return createFastEventSourceDemuxer();
case 'json-nl': case 'json-nl':
return _createJsonNlDemuxer(); return _createJsonNlDemuxer();
case null: default:
return _nullStreamDemuxerWarn; throw new Error(`Unsupported stream demuxer format: ${format}`);
} }
} }
@@ -115,12 +115,3 @@ function _createJsonNlDemuxer(): AixDemuxers.StreamDemuxer {
}, },
}; };
} }
const _nullStreamDemuxerWarn: AixDemuxers.StreamDemuxer = {
demux: () => {
console.warn('Null demuxer called - shall not happen, as it is only created in non-streaming');
return [];
},
flushRemaining: () => [],
};
@@ -23,8 +23,12 @@ export namespace GeminiInteractionsWire_API_Interactions {
export const getPath = (id: string) => `/v1beta/interactions/${encodeURIComponent(id)}`; export const getPath = (id: string) => `/v1beta/interactions/${encodeURIComponent(id)}`;
// DELETE. Removes the stored record. Orthogonal to cancel; when removed the original connection may still be running and streaming
export const deletePath = (id: string) => `/v1beta/interactions/${encodeURIComponent(id)}`; export const deletePath = (id: string) => `/v1beta/interactions/${encodeURIComponent(id)}`;
// POST. Only cancels background interactions that are still running
export const cancelPath = (id: string) => `/v1beta/interactions/${encodeURIComponent(id)}/cancel`;
// -- Request Body (POST /v1beta/interactions) -- // -- Request Body (POST /v1beta/interactions) --
@@ -189,6 +189,13 @@ export namespace OpenAIWire_Messages {
/** [OpenRouter, 2025-01-20] Reasoning traces with multiple blocks (summary, text, encrypted). */ /** [OpenRouter, 2025-01-20] Reasoning traces with multiple blocks (summary, text, encrypted). */
reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).optional(), reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).optional(),
/**
* [DeepSeek, 2026-04-24] Chain-of-thought reasoning text.
* - Response: emitted by V4 thinking-by-default; parsed into a 'ma' reasoning part.
* - (this) Request: MUST be echoed back on assistant turns that carry tool_calls (otherwise HTTP 400: "The reasoning_content in the thinking mode must be passed back to the API.").
*/
reasoning_content: z.string().nullable().optional(),
// function_call: // ignored, as it's deprecated // function_call: // ignored, as it's deprecated
// name: _optionalParticipantName, // omitted by choice: generally unsupported // name: _optionalParticipantName, // omitted by choice: generally unsupported
}); });
@@ -331,7 +338,7 @@ export namespace OpenAIWire_API_Chat_Completions {
stream_options: z.object({ stream_options: z.object({
include_usage: z.boolean().optional(), // If set, an additional chunk will be streamed with a 'usage' field on the entire request. include_usage: z.boolean().optional(), // If set, an additional chunk will be streamed with a 'usage' field on the entire request.
}).optional(), }).optional(),
reasoning_effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(), // [OpenAI, 2024-12-17] [Perplexity, 2025-06-23] reasoning effort reasoning_effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(), // [OpenAI, 2024-12-17] [Perplexity, 2025-06-23] reasoning effort; [DeepSeek, 2026-04-23] 'max' added for V4
// OpenAI and [OpenRouter, 2025-01-20] Verbosity parameter - maps to output_config.effort for Anthropic models // OpenAI and [OpenRouter, 2025-01-20] Verbosity parameter - maps to output_config.effort for Anthropic models
// https://openrouter.ai/docs/api/reference/parameters#verbosity // https://openrouter.ai/docs/api/reference/parameters#verbosity
verbosity: z.enum([ verbosity: z.enum([
@@ -342,7 +349,7 @@ export namespace OpenAIWire_API_Chat_Completions {
// [OpenRouter, 2025-11-11] Unified reasoning parameter for all models // [OpenRouter, 2025-11-11] Unified reasoning parameter for all models
reasoning: z.object({ reasoning: z.object({
max_tokens: z.int().optional(), // Token-based control (Anthropic, Gemini): 1024-32000 max_tokens: z.int().optional(), // Token-based control (Anthropic, Gemini): 1024-32000
effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(), // Effort-based control (OpenAI o1/o3/GPT-5, xAI, DeepSeek): allocates % of max_tokens effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(), // Effort-based control (OpenAI o1/o3/GPT-5, xAI, DeepSeek): allocates % of max_tokens
enabled: z.boolean().optional(), // Simple enable with medium effort defaults enabled: z.boolean().optional(), // Simple enable with medium effort defaults
exclude: z.boolean().optional(), // Use reasoning internally without returning it in response exclude: z.boolean().optional(), // Use reasoning internally without returning it in response
}).optional(), }).optional(),
@@ -447,6 +454,8 @@ export namespace OpenAIWire_API_Chat_Completions {
search_after_date_filter: z.string().optional(), // Date filter in MM/DD/YYYY format search_after_date_filter: z.string().optional(), // Date filter in MM/DD/YYYY format
// [Moonshot, 2026-01-26] Kimi K2.5 thinking mode control // [Moonshot, 2026-01-26] Kimi K2.5 thinking mode control
// [Z.ai, 2025-xx] GLM thinking mode: type 'enabled' | 'disabled'
// [DeepSeek, 2026-04-23] V4 thinking mode: same binary shape; depth is controlled via top-level `reasoning_effort`
thinking: z.object({ thinking: z.object({
type: z.enum(['enabled', 'disabled']), type: z.enum(['enabled', 'disabled']),
}).optional(), }).optional(),
@@ -1641,7 +1650,7 @@ export namespace OpenAIWire_API_Responses {
// NOTE: .catch() gracefully degrades to undefined since this is a non-critical enrichment path // NOTE: .catch() gracefully degrades to undefined since this is a non-critical enrichment path
tools: z.array(OpenAIWire_Responses_Tools.Tool_schema).optional().catch((ctx) => { tools: z.array(OpenAIWire_Responses_Tools.Tool_schema).optional().catch((ctx) => {
console.warn('[DEV] AIX: OpenAI Responses: unable to parse echoed tools, ignoring:', { tools: ctx.value }); console.warn('[DEV] AIX: OpenAI Responses: unable to parse echoed tools, ignoring:', { tools: ctx.value });
return; return undefined;
}), }),
output: z.array(OpenAIWire_Responses_Items.OutputItem_schema), output: z.array(OpenAIWire_Responses_Items.OutputItem_schema),
+6
View File
@@ -76,6 +76,12 @@ const createRootSlice: StateCreator<BeamStore, [], [], RootStoreSlice> = (_set,
open: (chatHistory: Readonly<DMessage[]>, initialChatLlmId: DLLMId | null, isEditMode: boolean, callback: BeamSuccessCallback) => { open: (chatHistory: Readonly<DMessage[]>, initialChatLlmId: DLLMId | null, isEditMode: boolean, callback: BeamSuccessCallback) => {
const { isOpen: wasAlreadyOpen, terminateKeepingSettings, loadBeamConfig, hadImportedRays, setRayLlmIds, setCurrentGatherLlmId } = _get(); const { isOpen: wasAlreadyOpen, terminateKeepingSettings, loadBeamConfig, hadImportedRays, setRayLlmIds, setCurrentGatherLlmId } = _get();
// if already open, preserve the live state (rays, fusions, callback) - re-invocation must never wipe an ongoing beam
if (wasAlreadyOpen) {
console.warn('[DEV] Beam is already open');
return;
}
// reset pending operations // reset pending operations
terminateKeepingSettings(); terminateKeepingSettings();
@@ -51,6 +51,7 @@ const _oaiEffortOptions = [
] as const; ] as const;
const _miscEffortOptions = [ const _miscEffortOptions = [
{ value: 'max', label: 'Max', description: 'Hardest thinking' } as const,
{ value: 'high', label: 'On', description: 'Multi-step reasoning' } as const, { value: 'high', label: 'On', description: 'Multi-step reasoning' } as const,
{ value: 'none', label: 'Off', description: 'Disable thinking mode' } as const, { value: 'none', label: 'Off', description: 'Disable thinking mode' } as const,
{ value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const, { value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const,
@@ -378,7 +378,7 @@ const _knownGeminiModels: ({
labelOverride: 'Deep Research Preview (2026-04)', labelOverride: 'Deep Research Preview (2026-04)',
isPreview: true, isPreview: true,
chatPrice: gemini25ProPricing, // pricing not explicitly listed; using 2.5 Pro as baseline chatPrice: gemini25ProPricing, // pricing not explicitly listed; using 2.5 Pro as baseline
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions], interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
parameterSpecs: [], parameterSpecs: [],
benchmark: undefined, // Deep research model, not benchmarkable on standard tests benchmark: undefined, // Deep research model, not benchmarkable on standard tests
// 128K input, 64K output // 128K input, 64K output
@@ -390,7 +390,7 @@ const _knownGeminiModels: ({
labelOverride: 'Deep Research Max Preview (2026-04)', labelOverride: 'Deep Research Max Preview (2026-04)',
isPreview: true, isPreview: true,
chatPrice: gemini25ProPricing, // baseline estimate (see note above) chatPrice: gemini25ProPricing, // baseline estimate (see note above)
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions], interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
parameterSpecs: [], parameterSpecs: [],
benchmark: undefined, // Deep research model, not benchmarkable on standard tests benchmark: undefined, // Deep research model, not benchmarkable on standard tests
}, },
@@ -402,7 +402,7 @@ const _knownGeminiModels: ({
labelOverride: 'Deep Research Pro Preview', labelOverride: 'Deep Research Pro Preview',
isPreview: true, isPreview: true,
chatPrice: gemini25ProPricing, chatPrice: gemini25ProPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions], interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
parameterSpecs: [{ paramId: 'llmVndGeminiThinkingBudget' }], parameterSpecs: [{ paramId: 'llmVndGeminiThinkingBudget' }],
benchmark: undefined, // Deep research model, not benchmarkable on standard tests benchmark: undefined, // Deep research model, not benchmarkable on standard tests
// Note: 128K input context, 64K output context // Note: 128K input context, 64K output context
@@ -1,38 +1,70 @@
import { LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types'; import { LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types'; import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from '../../models.mappings'; import { fromManualMapping, ManualMappings } from '../../models.mappings';
const IF_3 = [LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json]; const IF_4 = [LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn];
// [DeepSeek, 2026-04-24] V4 release - https://api-docs.deepseek.com/news/news260424
// - V4-Pro: 1.6T total / 49B active params; V4-Flash: 284B total / 13B active params (Novel Attention: token-wise compression + DSA)
// - Model IDs listed by /models: deepseek-v4-flash, deepseek-v4-pro
// - 1M context is the default across services; text-only (no vision/multimodal)
// - Legacy aliases still accepted until 2026-07-24: deepseek-chat -> v4-flash (thinking disabled), deepseek-reasoner -> v4-flash (thinking enabled)
// - Reasoning control: object `thinking: { type: 'enabled'|'disabled', reasoning_effort?: 'high'|'max' }`
// (the live API also accepts type: 'adaptive', but it is undocumented and empirically behaves the same as 'enabled'
// on current builds -- deliberately not exposed here; add it once docs + semantics stabilize)
// - V3.2 endpoints no longer accessible via direct model ID (API returns only v4-flash/v4-pro)
const _knownDeepseekChatModels: ManualMappings = [ const _knownDeepseekChatModels: ManualMappings = [
// [Models and Pricing](https://api-docs.deepseek.com/quick_start/pricing) {
// [List Models](https://api-docs.deepseek.com/api/list-models) idPrefix: 'deepseek-v4-pro',
// [Release Notes - V3.2](https://api-docs.deepseek.com/news/news251201) - Released 2025-12-01 label: 'DeepSeek V4 Pro',
description: 'Premium reasoning model with 1M context. Supports extended thinking modes, JSON output, and function calling.',
contextWindow: 1_048_576, // 1M
interfaces: [...IF_4, LLM_IF_OAI_Reasoning],
parameterSpecs: [
{ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high', 'max'] },
],
maxCompletionTokens: 65536, // conservative default; docs advertise up to 384K
chatPrice: { input: 1.74, output: 3.48, cache: { cType: 'oai-ac', read: 0.145 } },
benchmark: { cbaElo: 1463 }, // lmarena: deepseek-v4-pro (thinking variant 1462, near-tied)
},
{
idPrefix: 'deepseek-v4-flash',
label: 'DeepSeek V4 Flash',
description: 'Fast general-purpose model with 1M context. Supports extended thinking modes, JSON output, and function calling.',
contextWindow: 1_048_576, // 1M
interfaces: [...IF_4, LLM_IF_OAI_Reasoning],
parameterSpecs: [
{ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high', 'max'] },
],
maxCompletionTokens: 65536, // conservative default; docs advertise up to 384K
chatPrice: { input: 0.14, output: 0.28, cache: { cType: 'oai-ac', read: 0.028 } },
benchmark: { cbaElo: 1439 }, // lmarena: deepseek-v4-flash-thinking (non-thinking variant 1433)
},
// Legacy aliases - API routes both to deepseek-v4-flash with thinking pre-set
{ {
idPrefix: 'deepseek-reasoner', idPrefix: 'deepseek-reasoner',
label: 'DeepSeek V3.2 (Reasoner)', label: 'DeepSeek Reasoner (legacy)',
description: 'Reasoning model with Chain-of-Thought capabilities, 128K context length. Supports JSON output and function calling.', description: 'Legacy alias: routes to DeepSeek V4 Flash with thinking enabled. Retires 2026-07-24.',
contextWindow: 131072, // 128K contextWindow: 1_048_576,
interfaces: [...IF_3, LLM_IF_OAI_Reasoning], interfaces: [...IF_4, LLM_IF_OAI_Reasoning],
// parameterSpecs: [ maxCompletionTokens: 65536,
// { paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] }, // not supported: this model is reasoning only chatPrice: { input: 0.14, output: 0.28, cache: { cType: 'oai-ac', read: 0.028 } },
// ], benchmark: { cbaElo: 1439 }, // lmarena: deepseek-v4-flash-thinking
maxCompletionTokens: 32768, // default, max: 65536 isLegacy: true,
chatPrice: { input: 0.28, output: 0.42, cache: { cType: 'oai-ac', read: 0.028 } },
benchmark: { cbaElo: 1425 }, // deepseek-v3.2-exp-thinking
}, },
{ {
idPrefix: 'deepseek-chat', idPrefix: 'deepseek-chat',
label: 'DeepSeek V3.2', label: 'DeepSeek Chat (legacy)',
description: 'General-purpose model with 128K context length. Supports JSON output and function calling.', description: 'Legacy alias: routes to DeepSeek V4 Flash with thinking disabled. Retires 2026-07-24.',
contextWindow: 131072, // 128K contextWindow: 1_048_576,
interfaces: IF_3, interfaces: IF_4,
maxCompletionTokens: 8192, // default is 4096, max is 8192 maxCompletionTokens: 65536,
chatPrice: { input: 0.28, output: 0.42, cache: { cType: 'oai-ac', read: 0.028 } }, chatPrice: { input: 0.14, output: 0.28, cache: { cType: 'oai-ac', read: 0.028 } },
benchmark: { cbaElo: 1424 }, // deepseek-v3.2 benchmark: { cbaElo: 1433 }, // lmarena: deepseek-v4-flash (non-thinking)
isLegacy: true,
}, },
]; ];
@@ -246,7 +246,10 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
// 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited // 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) { if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) {
// console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id); // console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id);
parameterSpecs.push({ paramId: 'llmVndMiscEffort' }); // binary thinking for these vendors // Binary thinking only: OpenRouter's unified reasoning API currently rejects 'max' (see openai.chatCompletions.ts).
// We pin enumValues here so the shared llmVndMiscEffort registry (which also includes 'max' for native DeepSeek V4)
// does not surface 'max' in the UI for OR-routed models that can't honor it.
parameterSpecs.push({ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] });
} }
break; break;