mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9fc0b39730 | |||
| 194bfe23a1 | |||
| 35110480ef | |||
| 959595e33a | |||
| a960424dfb | |||
| 0df6c7d08b | |||
| 65c841e7a7 | |||
| b21b8cc982 | |||
| aa2c4f06b7 | |||
| b8d7b4ec10 | |||
| c48520255a | |||
| 0790da989d | |||
| 506d24d2fd | |||
| 1348dbf493 | |||
| ce677f3cd9 | |||
| 39203d78e3 | |||
| 2ef7daf369 | |||
| cff3d90613 | |||
| 9f89243d7f | |||
| 784ee9a4da | |||
| 678e6b8ba1 | |||
| 30e301c496 | |||
| b22904f6bb | |||
| 3f0de7ddca | |||
| 9a6f0f9202 | |||
| 4f0bae5657 | |||
| 2101f06195 | |||
| 6d54b5594c | |||
| 36b8e5b1df | |||
| 8252d671c7 | |||
| 30d97c94aa | |||
| 82654a00d4 | |||
| 9595f14ddc | |||
| 8c496074b2 | |||
| 4d097d7136 | |||
| 178619d275 | |||
| 59c8b2538d | |||
| 443b72c52a | |||
| ae13abef45 | |||
| 83ae02ef9b |
@@ -583,9 +583,11 @@ export function AppChat() {
|
||||
}, []);
|
||||
|
||||
useGlobalShortcuts('AppChat', React.useMemo(() => [
|
||||
// focused conversation
|
||||
{ key: 'z', ctrl: true, shift: true, disabled: isFocusedChatEmpty, action: handleMessageRegenerateLastInFocusedPane, description: 'Retry' },
|
||||
{ key: 'b', ctrl: true, shift: true, disabled: isFocusedChatEmpty, action: handleMessageBeamLastInFocusedPane, description: 'Beam Edit' },
|
||||
// focused conversation (excluded when Beam is open so the keystroke passes through to the browser)
|
||||
...(beamOpenStoreInFocusedPane ? [] : [
|
||||
{ key: 'z', ctrl: true, shift: true, disabled: isFocusedChatEmpty, action: handleMessageRegenerateLastInFocusedPane, description: 'Retry' },
|
||||
{ key: 'b', ctrl: true, shift: true, disabled: isFocusedChatEmpty, action: handleMessageBeamLastInFocusedPane, description: 'Beam Edit' },
|
||||
]),
|
||||
{ key: 'o', ctrl: true, action: handleConversationsImportFormFilePicker },
|
||||
{ key: 's', ctrl: true, action: () => handleFileSaveConversation(focusedPaneConversationId) },
|
||||
{ key: 'n', ctrl: true, shift: true, action: () => handleConversationNewInFocusedPane(false, false) },
|
||||
@@ -603,7 +605,7 @@ export function AppChat() {
|
||||
{ key: 'p', ctrl: true, action: () => personaDropdownRef.current?.openListbox() /*, description: 'Open Persona Dropdown'*/ },
|
||||
// focused conversation llm
|
||||
{ key: 'o', ctrl: true, shift: true, action: handleOpenChatLlmOptions },
|
||||
], [focusedPaneConversationId, handleConversationNewInFocusedPane, handleConversationReset, handleConversationsImportFormFilePicker, handleDeleteConversations, handleFileSaveConversation, handleMessageBeamLastInFocusedPane, handleMessageRegenerateLastInFocusedPane, handleMoveFocus, handleNavigateHistoryInFocusedPane, handleOpenChatLlmOptions, isFocusedChatEmpty]));
|
||||
], [beamOpenStoreInFocusedPane, focusedPaneConversationId, handleConversationNewInFocusedPane, handleConversationReset, handleConversationsImportFormFilePicker, handleDeleteConversations, handleFileSaveConversation, handleMessageBeamLastInFocusedPane, handleMessageRegenerateLastInFocusedPane, handleMoveFocus, handleNavigateHistoryInFocusedPane, handleOpenChatLlmOptions, isFocusedChatEmpty]));
|
||||
|
||||
|
||||
return <>
|
||||
|
||||
@@ -33,7 +33,10 @@ const _styles = {
|
||||
} as const,
|
||||
'& nav > ol > li:first-of-type': {
|
||||
overflow: 'hidden',
|
||||
maxWidth: { xs: '110px', md: '140px' },
|
||||
// allow the chat title to use available space, shrinking gracefully when the bar is narrow
|
||||
// NOTE: already performed by virtue of the breadcrumb having agi-ellipsize on the crumbs
|
||||
// flexShrink: 1,
|
||||
// minWidth: '60px',
|
||||
} as const,
|
||||
|
||||
} as const,
|
||||
|
||||
@@ -16,6 +16,7 @@ const ARM_TIMEOUT_MS = 4000;
|
||||
*/
|
||||
export function BlockOpUpstreamResume(props: {
|
||||
upstreamHandle: Exclude<DMessageGenerator['upstreamHandle'], undefined>,
|
||||
pending?: boolean; // true while the message is actively streaming; labels the Delete button as "Stop"
|
||||
onResume?: () => void | Promise<void>;
|
||||
onCancel?: () => void | Promise<void>;
|
||||
onDelete?: () => void | Promise<void>;
|
||||
@@ -30,8 +31,8 @@ export function BlockOpUpstreamResume(props: {
|
||||
|
||||
// expiration: boolean is evaluated at render (may lag briefly if nothing re-renders past expiry).
|
||||
// TimeAgo handles its own tick for the label; the button's disabled state is the only consumer of this flag.
|
||||
const { expiresAt, runId = '' } = props.upstreamHandle;
|
||||
const isExpired = expiresAt != null && Date.now() > expiresAt;
|
||||
const { expiresAt /*, runId = ''*/ } = props.upstreamHandle;
|
||||
// const isExpired = expiresAt != null && Date.now() > expiresAt;
|
||||
|
||||
// handlers
|
||||
|
||||
@@ -102,7 +103,7 @@ export function BlockOpUpstreamResume(props: {
|
||||
{props.onResume && (
|
||||
<Tooltip title='Resume generation from last checkpoint'>
|
||||
<Button
|
||||
disabled={isResuming || isCancelling || isDeleting || isExpired}
|
||||
disabled={isResuming || isCancelling || isDeleting}
|
||||
loading={isResuming}
|
||||
startDecorator={<PlayArrowRoundedIcon color='success' />}
|
||||
onClick={handleResume}
|
||||
@@ -126,7 +127,7 @@ export function BlockOpUpstreamResume(props: {
|
||||
)}
|
||||
|
||||
{props.onDelete && (
|
||||
<Tooltip title={deleteArmed ? 'Click again to confirm - cancels the run upstream (no resume after)' : 'Cancel the upstream run'}>
|
||||
<Tooltip title={deleteArmed ? 'Click again to confirm - cancels the run upstream (no resume after)' : (props.pending ? 'Stop this response and cancel the upstream run' : 'Cancel the upstream run')}>
|
||||
<Button
|
||||
loading={isDeleting}
|
||||
color={deleteArmed ? 'danger' : 'neutral'}
|
||||
@@ -135,7 +136,7 @@ export function BlockOpUpstreamResume(props: {
|
||||
onClick={handleDelete}
|
||||
disabled={isResuming || isCancelling || isDeleting}
|
||||
>
|
||||
{deleteArmed ? 'Confirm?' : 'Cancel'}
|
||||
{deleteArmed ? 'Confirm?' : (props.pending ? 'Stop' : 'Cancel')}
|
||||
</Button>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
@@ -898,11 +898,12 @@ export function ChatMessage(props: {
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Upstream Resume - shows whenever there's a stored handle (incl. post-reload, where no error fragment is present) */}
|
||||
{!messagePendingIncomplete && props.isBottom && fromAssistant && messageGenerator?.upstreamHandle && (!!onMessageUpstreamResume || !!onMessageUpstreamDelete) && (
|
||||
{/* Upstream Resume - shows whenever there's a stored handle (incl. post-reload, and while streaming so Stop can cancel the upstream run) */}
|
||||
{props.isBottom && fromAssistant && messageGenerator?.upstreamHandle && (!!onMessageUpstreamResume || !!onMessageUpstreamDelete) && (
|
||||
<BlockOpUpstreamResume
|
||||
upstreamHandle={messageGenerator.upstreamHandle}
|
||||
onResume={onMessageUpstreamResume ? handleUpstreamResume : undefined}
|
||||
pending={messagePendingIncomplete}
|
||||
onResume={(!messagePendingIncomplete && onMessageUpstreamResume) ? handleUpstreamResume : undefined}
|
||||
onDelete={onMessageUpstreamDelete ? handleUpstreamDelete : undefined}
|
||||
/>
|
||||
)}
|
||||
|
||||
@@ -166,9 +166,9 @@ export function AppChatSettingsAI() {
|
||||
tooltip={<>
|
||||
When Claude uses tools like code execution, it may produce text and image files stored in Anthropic's File API. This setting controls whether Big-AGI should automatically download and embed them in the chat.
|
||||
<ul>
|
||||
<li><b>Off</b>: keep as references (default).</li>
|
||||
<li><b>Inline</b>: download and embed text/images.</li>
|
||||
<li><b>Inline + Free</b>: embed, then delete from Anthropic to free storage.</li>
|
||||
<li><b>Show</b>: keep as references.</li>
|
||||
<li><b>Embed</b>: download and embed text/images (default).</li>
|
||||
<li><b>Embed + Free</b>: embed, then delete from Anthropic to free storage.</li>
|
||||
</ul>
|
||||
Only affects Anthropic models.
|
||||
</>}
|
||||
|
||||
@@ -23,7 +23,7 @@ export const Release = {
|
||||
|
||||
// this is here to trigger revalidation of data, e.g. models refresh
|
||||
Monotonics: {
|
||||
Aix: 67,
|
||||
Aix: 69,
|
||||
NewsVersion: 204,
|
||||
},
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import { bareBonesPromptMixer } from '~/modules/persona/pmix/pmix';
|
||||
import { SystemPurposes } from '../../data';
|
||||
|
||||
import { BeamStore, createBeamVanillaStore } from '~/modules/beam/store-beam_vanilla';
|
||||
import { autoConversationTitle } from '~/modules/aifn/autotitle/autoTitle';
|
||||
import { useModuleBeamStore } from '~/modules/beam/store-module-beam';
|
||||
|
||||
import type { DConversationId } from '~/common/stores/chat/chat.conversation';
|
||||
@@ -275,6 +276,10 @@ export class ConversationHandler {
|
||||
|
||||
// close beam
|
||||
terminateKeepingSettings();
|
||||
|
||||
// auto-title the conversation if enabled (parity with chat-persona flow — fixes #1078)
|
||||
if (getChatAutoAI().autoTitleChat)
|
||||
void autoConversationTitle(this.conversationId, false);
|
||||
};
|
||||
|
||||
beamOpen(viewHistory, getChatLLMId(), !!destReplaceMessageId, onBeamSuccess);
|
||||
|
||||
@@ -175,7 +175,8 @@ export const DModelParameterRegistry = {
|
||||
label: 'Thinking',
|
||||
type: 'enum',
|
||||
description: 'Enable or disable extended thinking mode.',
|
||||
values: ['none', 'high'],
|
||||
values: ['none', 'high', 'max'],
|
||||
// 'max' is for now DeepSeek V4-specific (reasoning_effort=max); other vendors restrict via enumValues
|
||||
// undefined means vendor default (usually 'high', i.e. thinking enabled)
|
||||
}),
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ export async function autoConversationTitle(conversationId: string, forceReplace
|
||||
autoTitleLlmId,
|
||||
'You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.',
|
||||
`Analyze the given short conversation (every line is truncated) and extract a concise chat title that summarizes the conversation in as little as a couple of words.
|
||||
Only respond with the lowercase short title and nothing else.
|
||||
Only respond with the short title and nothing else.
|
||||
|
||||
\`\`\`
|
||||
${historyLines.join('\n')}
|
||||
|
||||
@@ -905,9 +905,12 @@ export class ContentReassembler {
|
||||
/**
|
||||
* Stores raw termination data from the wire - classification deferred to finalizeReassembly()
|
||||
*/
|
||||
private onCGEnd({ terminationReason, tokenStopReason }: Extract<AixWire_Particles.ChatGenerateOp, { cg: 'end' }>): void {
|
||||
private onCGEnd({ terminationReason, tokenStopReason, tokenStopError }: Extract<AixWire_Particles.ChatGenerateOp, { cg: 'end' }>): void {
|
||||
this.S.terminationReason = terminationReason;
|
||||
this.S.dialectStopReason = tokenStopReason;
|
||||
// Vendor-composed stop error, surfaced as a complementary error fragment alongside the generic classification message
|
||||
if (tokenStopError)
|
||||
this._appendErrorFragment(tokenStopError);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -409,11 +409,15 @@ export async function aixCGR_ChatSequence_FromDMessagesOrThrow(
|
||||
break;
|
||||
|
||||
case 'ma':
|
||||
// Preserve reasoning continuity across turns. Two channels, any one is sufficient:
|
||||
// Preserve reasoning continuity across turns. Three channels, any one is sufficient:
|
||||
// - Anthropic: part.textSignature / part.redactedData (bespoke fields, see Anthropic extended thinking docs)
|
||||
// - OpenAI/Gemini: _vnd sidecar (reasoningItem.* / thoughtSignature, generic vendor-state mechanism)
|
||||
// - OpenAI Responses / Gemini: _vnd sidecar (reasoningItem.* / thoughtSignature, opaque continuity handle)
|
||||
// - DeepSeek V4 (OpenAI chat-completions): plain reasoning text in aText is the payload itself
|
||||
const oaiReasoning = _vnd?.openai?.reasoningItem;
|
||||
const hasReasoningHandle = aPart.textSignature || aPart.redactedData?.length || oaiReasoning?.encryptedContent || oaiReasoning?.id;
|
||||
const hasReasoningHandle =
|
||||
(aPart.textSignature || aPart.redactedData?.length)
|
||||
|| (oaiReasoning?.encryptedContent || oaiReasoning?.id)
|
||||
|| (aPart.aText && aPart.aType === 'reasoning'); // DeepSeek V4 reasoning in plain text - NOTE: will send LOTS of 'ma' parts (e.g. to Gemini, which doesn't even need them)
|
||||
if (hasReasoningHandle) {
|
||||
const aModelAuxPart = aPart as AixParts_ModelAuxPart; // NOTE: this is a forced cast from readonly string[] to string[], but not a big deal here
|
||||
modelMessage.parts.push(_vnd ? { ...aModelAuxPart, _vnd } : aModelAuxPart);
|
||||
@@ -653,7 +657,7 @@ function _clientCreateAixMetaInReferenceToPart(items: DMetaReferenceItem[]): Aix
|
||||
|
||||
|
||||
export async function clientHotFixGenerateRequest_ApplyAll(llmInterfaces: DLLM['interfaces'], aixChatGenerate: AixAPIChatGenerate_Request, modelName: string): Promise<{
|
||||
shallDisableStreaming: boolean;
|
||||
hotfixNoStream: boolean;
|
||||
workaroundsCount: number;
|
||||
}> {
|
||||
|
||||
@@ -676,12 +680,12 @@ export async function clientHotFixGenerateRequest_ApplyAll(llmInterfaces: DLLM['
|
||||
workaroundsCount += await clientHotFixGenerateRequest_ConvertWebP(aixChatGenerate, 'image/jpeg');
|
||||
|
||||
// Disable streaming for select chat models that don't support it (e.g. o1-preview (old) and o1-2024-12-17)
|
||||
const shallDisableStreaming = llmInterfaces.includes(LLM_IF_HOTFIX_NoStream);
|
||||
const hotfixNoStream = llmInterfaces.includes(LLM_IF_HOTFIX_NoStream);
|
||||
|
||||
if (workaroundsCount > 0)
|
||||
console.warn(`[DEV] Working around '${modelName}' model limitations: client-side applied ${workaroundsCount} workarounds`);
|
||||
|
||||
return { shallDisableStreaming, workaroundsCount };
|
||||
return { hotfixNoStream, workaroundsCount };
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ export async function* clientSideChatGenerate(
|
||||
return dispatch;
|
||||
});
|
||||
|
||||
yield* executeChatGenerateWithContinuation(dispatchCreator, streaming, abortSignal, _d);
|
||||
yield* executeChatGenerateWithContinuation(dispatchCreator, abortSignal, _d);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -48,7 +48,7 @@ export async function* clientSideReattachUpstream(
|
||||
access: AixAPI_Access,
|
||||
resumeHandle: AixAPI_ResumeHandle,
|
||||
context: AixAPI_Context_ChatGenerate,
|
||||
streaming: true,
|
||||
streaming: boolean,
|
||||
connectionOptions: AixAPI_ConnectionOptions_ChatGenerate,
|
||||
abortSignal: AbortSignal,
|
||||
): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> {
|
||||
@@ -56,7 +56,7 @@ export async function* clientSideReattachUpstream(
|
||||
const _d: AixDebugObject = _createClientDebugConfig(access, connectionOptions, context.name);
|
||||
const dispatchCreator = () => createChatGenerateResumeDispatch(access, resumeHandle, streaming);
|
||||
|
||||
yield * executeChatGenerateWithContinuation(dispatchCreator, streaming, abortSignal, _d);
|
||||
yield * executeChatGenerateWithContinuation(dispatchCreator, abortSignal, _d);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -342,7 +342,7 @@ export async function aixChatGenerateText_Simple(
|
||||
aixContextRef: AixAPI_Context_ChatGenerate['ref'],
|
||||
// optional options
|
||||
clientOptions?: Partial<AixClientOptions>, // this makes the abortController optional
|
||||
// optional callback for streaming
|
||||
// optional callback - if provided, streaming is activated
|
||||
onTextStreamUpdate?: (text: string, isDone: boolean, generator: DMessageGenerator) => MaybePromise<void>,
|
||||
): Promise<string> {
|
||||
|
||||
@@ -363,14 +363,13 @@ export async function aixChatGenerateText_Simple(
|
||||
// Aix Context
|
||||
const aixContext = aixCreateChatGenerateContext(aixContextName, aixContextRef);
|
||||
|
||||
// Aix Streaming - implicit if the callback is provided
|
||||
let aixStreaming = !!onTextStreamUpdate;
|
||||
// Caller streaming preference - implicit: stream if a callback is provided
|
||||
const callerStreaming = !!onTextStreamUpdate;
|
||||
|
||||
|
||||
// Client-side late stage model HotFixes
|
||||
const { shallDisableStreaming } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id);
|
||||
if (shallDisableStreaming || aixModel.forceNoStream)
|
||||
aixStreaming = false;
|
||||
const { hotfixNoStream } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id);
|
||||
const wireStreaming = !hotfixNoStream && !aixModel.forceNoStream ? callerStreaming : false;
|
||||
|
||||
|
||||
// Variable to store the final text
|
||||
@@ -398,11 +397,11 @@ export async function aixChatGenerateText_Simple(
|
||||
aixModel,
|
||||
aixChatGenerate,
|
||||
aixContext,
|
||||
aixStreaming,
|
||||
wireStreaming,
|
||||
state.generator,
|
||||
abortSignal,
|
||||
clientOptions?.throttleParallelThreads ?? 0,
|
||||
!aixStreaming ? undefined : async (ll: AixChatGenerateContent_LL, _isDone: boolean /* we want to issue this, in case the next action is an exception */) => {
|
||||
!onTextStreamUpdate ? undefined : async (ll: AixChatGenerateContent_LL, _isDone: boolean /* we want to issue this, in case the next action is an exception */) => {
|
||||
_llToL2Simple(ll, state);
|
||||
if (onTextStreamUpdate && state.text !== null)
|
||||
await onTextStreamUpdate(state.text, false, state.generator);
|
||||
@@ -521,7 +520,7 @@ type _AixChatGenerateContent_DMessageGuts_WithOutcome = AixChatGenerateContent_D
|
||||
* @param llmId - ID of the Language Model to use
|
||||
* @param aixChatGenerate - Multi-modal chat generation request specifics, including Tools and high-level metadata
|
||||
* @param aixContext - Information about how this chat generation is being used
|
||||
* @param aixStreaming - Whether to use streaming for generation
|
||||
* @param aixStreaming - Caller's wire-streaming preference. Subject to override by model/hotfix constraints, or dispatch constraints
|
||||
* @param clientOptions - Client options for the operation
|
||||
* @param onStreamingUpdate - Optional callback for streaming updates
|
||||
*
|
||||
@@ -551,10 +550,9 @@ export async function aixChatGenerateContent_DMessage_orThrow<TServiceSettings e
|
||||
vndAntTransformInlineFiles: aixAccess.dialect === 'anthropic' ? getVndAntInlineFiles() : undefined,
|
||||
});
|
||||
|
||||
// Client-side late stage model HotFixes
|
||||
const { shallDisableStreaming } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id);
|
||||
if (shallDisableStreaming || aixModel.forceNoStream)
|
||||
aixStreaming = false;
|
||||
// Client-side late stage model HotFixes - collapse the caller's requested streaming preference into the effective wire-streaming decision after constraints (hotfix gate, model.forceNoStream)
|
||||
const { hotfixNoStream } = await clientHotFixGenerateRequest_ApplyAll(llm.interfaces, aixChatGenerate, llmParameters.llmRef || llm.id);
|
||||
const wireStreaming = !hotfixNoStream && !aixModel.forceNoStream ? aixStreaming : false;
|
||||
|
||||
// Legacy Note: awaited OpenAI moderation check was removed (was only on this codepath)
|
||||
|
||||
@@ -584,7 +582,7 @@ export async function aixChatGenerateContent_DMessage_orThrow<TServiceSettings e
|
||||
aixModel,
|
||||
aixChatGenerate,
|
||||
aixContext,
|
||||
aixStreaming,
|
||||
wireStreaming,
|
||||
dMessage.generator,
|
||||
clientOptions.abortSignal,
|
||||
clientOptions.throttleParallelThreads ?? 0,
|
||||
@@ -753,7 +751,7 @@ export type AixChatGenerateTerminal_LL = 'completed' | 'aborted' | 'failed';
|
||||
*
|
||||
* Contract:
|
||||
* - empty fragments means no content yet, and no error
|
||||
* - aixStreaming hints the source, but can be respected or not
|
||||
* - wireStreaming hints the wire transport (SSE vs single response), but can be respected or not by the dispatch (e.g. SSE-only APIs ignore a `false` value)
|
||||
* - onReassemblyUpdate is optional, you can ignore the updates and await the final result
|
||||
* - errors become Error fragments, and they can be dialect-sent, dispatch-excepts, client-read issues or even user aborts
|
||||
* - DOES NOT THROW, but the final accumulator may contain error fragments
|
||||
@@ -772,7 +770,7 @@ export type AixChatGenerateTerminal_LL = 'completed' | 'aborted' | 'failed';
|
||||
* - special parts include 'In Reference To' (a decorator of messages)
|
||||
* - other special parts include the Anthropic Caching hints, on select message
|
||||
* @param aixContext specifies the scope of the caller, such as what's the high level objective of this call
|
||||
* @param aixStreaming requests the source to provide incremental updates
|
||||
* @param wireStreaming the effective wire-level streaming decision (already collapsed from caller preference + model/hotfix constraints); drives tRPC `streaming` field and downstream dispatch body shape
|
||||
* @param initialGenerator generator initial value, which will be updated for every new piece of information received
|
||||
* @param abortSignal allows the caller to stop the operation
|
||||
* @param throttleParallelThreads allows the caller to limit the number of parallel threads
|
||||
@@ -790,7 +788,7 @@ async function _aixChatGenerateContent_LL(
|
||||
aixModel: AixAPI_Model,
|
||||
aixChatGenerate: AixAPIChatGenerate_Request,
|
||||
aixContext: AixAPI_Context_ChatGenerate,
|
||||
aixStreaming: boolean,
|
||||
wireStreaming: boolean,
|
||||
// others
|
||||
initialGenerator: DMessageGenerator,
|
||||
abortSignal: AbortSignal,
|
||||
@@ -804,10 +802,13 @@ async function _aixChatGenerateContent_LL(
|
||||
const inspectorTransport = !inspectorEnabled ? undefined : aixAccess.clientSideFetch ? 'csf' : 'trpc';
|
||||
const inspectorContext = !inspectorEnabled ? undefined : { contextName: aixContext.name, contextRef: aixContext.ref };
|
||||
|
||||
// [DEV] Inspector - request body override
|
||||
// Inspector - override request body
|
||||
const requestBodyOverrideJson = inspectorEnabled && aixClientDebuggerGetRBO();
|
||||
const debugRequestBodyOverride = !requestBodyOverrideJson ? false : JSON.parse(requestBodyOverrideJson);
|
||||
|
||||
// Inspector - force disable streaming (note: dispatches may still override this)
|
||||
if (getAixDebuggerNoStreaming()) wireStreaming = false;
|
||||
|
||||
/**
|
||||
* FIXME: implement client selection of resumability - aixAccess option?
|
||||
* NOTE: for Gemini Deep Research, it's on by default, so both auto-reattach on network breaks (currently disabled)
|
||||
@@ -827,8 +828,11 @@ async function _aixChatGenerateContent_LL(
|
||||
// [CSF] Pre-load client-side executor if needed - type inference works here, no need to type
|
||||
let clientSideChatGenerate;
|
||||
let clientSideReattachUpstream;
|
||||
if (aixAccess.clientSideFetch)
|
||||
({ clientSideChatGenerate, clientSideReattachUpstream } = await _loadCsfModuleOrThrow());
|
||||
if (aixAccess.clientSideFetch) {
|
||||
const csf = await _loadCsfModuleOrThrow();
|
||||
clientSideChatGenerate = csf.clientSideChatGenerate;
|
||||
clientSideReattachUpstream = csf.clientSideReattachUpstream;
|
||||
}
|
||||
|
||||
|
||||
// Client-side particle transforms:
|
||||
@@ -891,7 +895,7 @@ async function _aixChatGenerateContent_LL(
|
||||
aixModel,
|
||||
aixChatGenerate,
|
||||
aixContext,
|
||||
getAixDebuggerNoStreaming() ? false : aixStreaming,
|
||||
wireStreaming,
|
||||
aixConnectionOptions,
|
||||
abortSignal,
|
||||
) :
|
||||
@@ -901,7 +905,7 @@ async function _aixChatGenerateContent_LL(
|
||||
model: aixModel,
|
||||
chatGenerate: aixChatGenerate,
|
||||
context: aixContext,
|
||||
streaming: getAixDebuggerNoStreaming() ? false : aixStreaming, // [DEV] disable streaming if set in the UX (testing)
|
||||
streaming: wireStreaming,
|
||||
connectionOptions: aixConnectionOptions,
|
||||
}, { signal: abortSignal })
|
||||
|
||||
@@ -912,7 +916,7 @@ async function _aixChatGenerateContent_LL(
|
||||
aixAccess,
|
||||
accumulator_LL.generator.upstreamHandle,
|
||||
aixContext,
|
||||
true, // streaming - reattach is only validated for streaming for now
|
||||
wireStreaming,
|
||||
aixConnectionOptions,
|
||||
abortSignal,
|
||||
) :
|
||||
@@ -921,7 +925,7 @@ async function _aixChatGenerateContent_LL(
|
||||
access: aixAccess,
|
||||
upstreamHandle: accumulator_LL.generator.upstreamHandle,
|
||||
context: aixContext,
|
||||
streaming: true,
|
||||
streaming: wireStreaming,
|
||||
connectionOptions: aixConnectionOptions,
|
||||
}, { signal: abortSignal })
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import { Box, Card, Chip, Divider, Sheet, Typography } from '@mui/joy';
|
||||
import { RenderCodeMemo } from '~/modules/blocks/code/RenderCode';
|
||||
|
||||
import { ExpanderControlledBox } from '~/common/components/ExpanderControlledBox';
|
||||
import { objectDeepCloneWithStringLimit } from '~/common/util/objectUtils';
|
||||
import TimelapseIcon from '@mui/icons-material/Timelapse';
|
||||
|
||||
import type { AixClientDebugger } from './memstore-aix-client-debugger';
|
||||
@@ -184,12 +185,10 @@ export function AixDebuggerFrame(props: {
|
||||
{/* List of particles */}
|
||||
{frame.particles.map((particle, idx) => {
|
||||
|
||||
// truncated preview of particle content
|
||||
// preview of particle content: preserve structure, trim long string fields
|
||||
let jsonPreview = '';
|
||||
try {
|
||||
const content = particle.content;
|
||||
jsonPreview = JSON.stringify(content).substring(0, 1024);
|
||||
if (jsonPreview.length >= 1024) jsonPreview += '...';
|
||||
jsonPreview = JSON.stringify(objectDeepCloneWithStringLimit(particle.content, 'aix-debugger-particle', 64));
|
||||
} catch (e) {
|
||||
jsonPreview = 'Error parsing content';
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ export const aixRouter = createTRPCRouter({
|
||||
const _d = _createDebugConfig(input.access, input.connectionOptions, input.context.name);
|
||||
const dispatchCreator = () => createChatGenerateDispatch(input.access, input.model, input.chatGenerate, input.streaming, !!input.connectionOptions?.enableResumability);
|
||||
|
||||
yield* executeChatGenerateWithContinuation(dispatchCreator, input.streaming, ctx.reqSignal, _d);
|
||||
yield* executeChatGenerateWithContinuation(dispatchCreator, ctx.reqSignal, _d);
|
||||
}),
|
||||
|
||||
/**
|
||||
@@ -42,14 +42,14 @@ export const aixRouter = createTRPCRouter({
|
||||
access: AixWire_API.Access_schema,
|
||||
upstreamHandle: AixWire_API.UpstreamHandle_schema, // reattach uses a handle instead of 'model + chatGenerate'
|
||||
context: AixWire_API.ContextChatGenerate_schema,
|
||||
streaming: z.literal(true), // reattach is always streaming
|
||||
streaming: z.boolean(),
|
||||
connectionOptions: AixWire_API.ConnectionOptionsChatGenerate_schema.pick({ debugDispatchRequest: true }).optional(), // debugDispatchRequest
|
||||
}))
|
||||
.mutation(async function* ({ input, ctx }) {
|
||||
const _d = _createDebugConfig(input.access, input.connectionOptions, input.context.name);
|
||||
const dispatchCreator = () => createChatGenerateResumeDispatch(input.access, input.upstreamHandle, input.streaming);
|
||||
|
||||
yield* executeChatGenerateWithContinuation(dispatchCreator, input.streaming, ctx.reqSignal, _d);
|
||||
yield* executeChatGenerateWithContinuation(dispatchCreator, ctx.reqSignal, _d);
|
||||
}),
|
||||
|
||||
/**
|
||||
|
||||
@@ -689,7 +689,7 @@ export namespace AixWire_Particles {
|
||||
|
||||
export type ChatControlOp =
|
||||
// | { cg: 'start' } // not really used for now
|
||||
| { cg: 'end', terminationReason: CGEndReason /* we know why we're sending 'end' */, tokenStopReason?: GCTokenStopReason /* we may or not have gotten a logical token stop reason from the dispatch */ }
|
||||
| { cg: 'end', terminationReason: CGEndReason /* we know why we're sending 'end' */, tokenStopReason?: GCTokenStopReason /* we may or not have gotten a logical token stop reason from the dispatch */, tokenStopError?: string /* optional vendor-composed human-readable detail paired with tokenStopReason */ }
|
||||
| { cg: 'issue', issueId: CGIssueId, issueText: string }
|
||||
| { cg: 'aix-info', ait: 'flow-cont' /* important: establishes a checkpoint */, text: string }
|
||||
| { cg: 'aix-retry-reset', rScope: 'srv-dispatch' | 'srv-op' | 'cli-ll', rClearStrategy: 'none' | 'since-checkpoint' | 'all', reason: string, attempt: number, maxAttempts: number, delayMs: number, causeHttp?: number, causeConn?: string }
|
||||
|
||||
@@ -56,6 +56,7 @@ export class ChatGenerateTransmitter implements IParticleTransmitter {
|
||||
|
||||
// Token stop reason
|
||||
private tokenStopReason: AixWire_Particles.GCTokenStopReason | undefined = undefined;
|
||||
private tokenStopError: string | undefined = undefined;
|
||||
|
||||
// Metrics
|
||||
private accMetrics: AixWire_Particles.CGSelectMetrics | undefined = undefined;
|
||||
@@ -105,6 +106,7 @@ export class ChatGenerateTransmitter implements IParticleTransmitter {
|
||||
cg: 'end',
|
||||
terminationReason: this.terminationReason,
|
||||
tokenStopReason: this.tokenStopReason, // See NOTE above - || (dispatchOrDialectIssue ? 'cg-issue' : 'ok'),
|
||||
...(this.tokenStopError && { tokenStopError: this.tokenStopError }),
|
||||
});
|
||||
// Keep this in a terminated state, so that every subsequent call will yield errors (not implemented)
|
||||
// this.terminationReason = null;
|
||||
@@ -201,12 +203,13 @@ export class ChatGenerateTransmitter implements IParticleTransmitter {
|
||||
this.setDialectEnded('issue-dialect');
|
||||
}
|
||||
|
||||
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason) {
|
||||
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason, errorText?: string) {
|
||||
if (SERVER_DEBUG_WIRE)
|
||||
console.log('|token-stop|', reason);
|
||||
console.log('|token-stop|', reason, errorText ?? '');
|
||||
if (this.tokenStopReason && this.tokenStopReason !== reason)
|
||||
console.warn(`[Aix.${this.prettyDialect}] setTokenStopReason('${reason}'): already has token stop reason '${this.tokenStopReason}' (overriding)`);
|
||||
this.tokenStopReason = reason;
|
||||
if (errorText) this.tokenStopError = errorText;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
|
||||
|
||||
// Dialect incompatibilities -> Hotfixes
|
||||
// [DeepSeek, 2026-04-24] V4 doesn't require strict alternation but we keep coalescing for cleanliness; the reducer only merges assistant/user, tool messages stay separate (parallel tool_calls).
|
||||
const hotFixAlternateUserAssistantRoles = openAIDialect === 'deepseek' || openAIDialect === 'perplexity';
|
||||
const hotFixRemoveEmptyMessages = openAIDialect === 'moonshot' || openAIDialect === 'perplexity'; // [Moonshot, 2026-02-10] consecutive assistant messages (empty + content) break Moonshot - coalesce to fix
|
||||
const hotFixRemoveStreamOptions = openAIDialect === 'azure' || openAIDialect === 'mistral';
|
||||
@@ -59,7 +60,7 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
throw new Error('This service does not support function calls');
|
||||
|
||||
// Convert the chat messages to the OpenAI 4-Messages format
|
||||
let chatMessages = _toOpenAIMessages(chatGenerate.systemMessage, chatGenerate.chatSequence, hotFixOpenAIOFamily);
|
||||
let chatMessages = _toOpenAIMessages(openAIDialect, chatGenerate.systemMessage, chatGenerate.chatSequence, hotFixOpenAIOFamily);
|
||||
|
||||
// Apply hotfixes
|
||||
|
||||
@@ -69,6 +70,13 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
if (hotFixAlternateUserAssistantRoles)
|
||||
chatMessages = _fixAlternateUserAssistantRoles(chatMessages);
|
||||
|
||||
// [DeepSeek, 2026-04-24] When tools are present and thinking isn't disabled, V4 demands reasoning_content on EVERY assistant message in history
|
||||
// Inject '' placeholder where missing; real reasoning is attached by _toOpenAIMessages
|
||||
if (openAIDialect === 'deepseek' && chatGenerate.tools?.length)
|
||||
for (const m of chatMessages)
|
||||
if (m.role === 'assistant' && m.reasoning_content === undefined)
|
||||
m.reasoning_content = '';
|
||||
|
||||
|
||||
// constrained output modes - both JSON and tool invocations
|
||||
// const strictJsonOutput = !!model.strictJsonOutput;
|
||||
@@ -145,18 +153,23 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
|
||||
&& openAIDialect !== 'deepseek' && openAIDialect !== 'moonshot' && openAIDialect !== 'zai' // MoonShot maps to none->disabled / high->enabled
|
||||
&& openAIDialect !== 'perplexity' // Perplexity has its own block below with stricter validation
|
||||
) {
|
||||
if (reasoningEffort === 'max') // domain validation
|
||||
throw new Error(`OpenAI ChatCompletions API does not support '${reasoningEffort}' reasoning effort`);
|
||||
// for: 'alibaba' | 'azure' | 'groq' | 'lmstudio' | 'localai' | 'mistral' | 'openai' | 'openpipe' | 'togetherai' | 'xai'
|
||||
payload.reasoning_effort = reasoningEffort;
|
||||
}
|
||||
|
||||
// [Moonshot] Kimi K2.5 reasoning effort -> thinking mode (only 'none' and 'high' supported for now)
|
||||
// [Z.ai] GLM thinking mode: binary enabled/disabled (supports GLM-4.5 series and higher) - https://docs.z.ai/guides/capabilities/thinking-mode
|
||||
// [DeepSeek, 2026-04-23] V4 thinking control https://api-docs.deepseek.com/guides/thinking_mode
|
||||
if (reasoningEffort && (openAIDialect === 'deepseek' || openAIDialect === 'moonshot' || openAIDialect === 'zai')) {
|
||||
if (reasoningEffort !== 'none' && reasoningEffort !== 'high') // domain validation
|
||||
throw new Error(`${openAIDialect} only supports reasoning effort 'none' or 'high', got '${reasoningEffort}'`);
|
||||
const allowedEffort = openAIDialect === 'deepseek' ? ['none', 'high', 'max'] : ['none', 'high'];
|
||||
if (!allowedEffort.includes(reasoningEffort)) // domain validation
|
||||
throw new Error(`${openAIDialect} only supports reasoning effort ${allowedEffort.join(', ')}, got '${reasoningEffort}'`);
|
||||
|
||||
payload.thinking = { type: reasoningEffort === 'none' ? 'disabled' : 'enabled' };
|
||||
payload.thinking = { type: reasoningEffort !== 'none' ? 'enabled' : 'disabled' };
|
||||
|
||||
// [DeepSeek, 2026-04-23] DeepSeek also supports effort control for reasoning-enabled requests - set it here as it was carved from the reasoningEffort setter before
|
||||
if (openAIDialect === 'deepseek' && reasoningEffort !== 'none')
|
||||
payload.reasoning_effort = reasoningEffort;
|
||||
}
|
||||
|
||||
|
||||
@@ -348,19 +361,23 @@ function _fixAlternateUserAssistantRoles(chatMessages: TRequestMessages): TReque
|
||||
};
|
||||
}
|
||||
|
||||
// if the current item has the same role as the last item, concatenate their content
|
||||
// If current item has the same role as the last, coalesce ONLY assistant/user.
|
||||
// Tool/system/developer must stay separate - tool messages each pair with a tool_call_id; merging corrupts the protocol.
|
||||
if (acc.length > 0) {
|
||||
const lastItem = acc[acc.length - 1];
|
||||
if (lastItem.role === historyItem.role) {
|
||||
if (lastItem.role === 'assistant') {
|
||||
lastItem.content += hotFixSquashTextSeparator + historyItem.content;
|
||||
} else if (lastItem.role === 'user') {
|
||||
return acc;
|
||||
}
|
||||
if (lastItem.role === 'user') {
|
||||
lastItem.content = [
|
||||
...(Array.isArray(lastItem.content) ? lastItem.content : [OpenAIWire_ContentParts.TextContentPart(lastItem.content)]),
|
||||
...(Array.isArray(historyItem.content) ? historyItem.content : historyItem.content ? [OpenAIWire_ContentParts.TextContentPart(historyItem.content)] : []),
|
||||
];
|
||||
return acc;
|
||||
}
|
||||
return acc;
|
||||
// fall through to push for tool/system/developer - each stays its own message
|
||||
}
|
||||
}
|
||||
|
||||
@@ -442,7 +459,10 @@ function _fixVndOaiRestoreMarkdown_Inline(payload: TRequest) {
|
||||
}*/
|
||||
|
||||
|
||||
function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chatSequence: AixMessages_ChatMessage[], hotFixOpenAIo1Family: boolean): TRequestMessages {
|
||||
function _toOpenAIMessages(openAIDialect: OpenAIDialects, systemMessage: AixMessages_SystemMessage | null, chatSequence: AixMessages_ChatMessage[], hotFixOpenAIo1Family: boolean): TRequestMessages {
|
||||
|
||||
// [DeepSeek, 2026-04-24] V4 thinking-by-default - reasoning_content must round-trip on tool-call turns; payload is the 'ma' part's aText (unlike Gemini/OpenAI-Responses which carry opaque handles).
|
||||
const echoDeepseekReasoning = openAIDialect === 'deepseek';
|
||||
|
||||
// Transform the chat messages into OpenAI's format (an array of 'system', 'user', 'assistant', and 'tool' messages)
|
||||
const chatMessages: TRequestMessages = [];
|
||||
@@ -555,6 +575,8 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
|
||||
break;
|
||||
|
||||
case 'model':
|
||||
// Accumulate 'ma' reasoning text across this turn; echoed below onto the assistant message if it carries tool_calls (DeepSeek only).
|
||||
let pendingReasoningText = '';
|
||||
for (const part of parts) {
|
||||
const currentMessage = chatMessages[chatMessages.length - 1];
|
||||
switch (part.pt) {
|
||||
@@ -630,7 +652,9 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
|
||||
break;
|
||||
|
||||
case 'ma':
|
||||
// ignore this thinking block - Anthropic only
|
||||
// [DeepSeek only] accumulate reasoning text for the echo-back below. Other dialects ignore 'ma' (reasoning continuity flows via _vnd opaque handles, not via this adapter).
|
||||
if (echoDeepseekReasoning && part.aType === 'reasoning' && part.aText)
|
||||
pendingReasoningText += part.aText;
|
||||
break;
|
||||
|
||||
case 'tool_response':
|
||||
@@ -651,6 +675,18 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// [DeepSeek] attach accumulated reasoning to this turn's assistant message only if it carries tool_calls; plain-text turns don't need the echo per docs.
|
||||
if (echoDeepseekReasoning && pendingReasoningText) {
|
||||
for (let i = chatMessages.length - 1; i >= 0; i--) {
|
||||
const m = chatMessages[i];
|
||||
if (m.role !== 'assistant') continue;
|
||||
if (m.tool_calls?.length)
|
||||
m.reasoning_content = pendingReasoningText;
|
||||
break; // stop at the most recent assistant message from this turn
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,7 +55,6 @@ export class DispatchContinuationSignal extends Error {
|
||||
*/
|
||||
export async function* executeChatGenerateWithContinuation(
|
||||
dispatchCreatorFn: () => Promise<ChatGenerateDispatch>,
|
||||
streaming: boolean,
|
||||
abortSignal: AbortSignal,
|
||||
_d: AixDebugObject,
|
||||
): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> {
|
||||
@@ -65,7 +64,7 @@ export async function* executeChatGenerateWithContinuation(
|
||||
for (let turn = 0; turn <= MAX_CONTINUATION_TURNS; turn++) {
|
||||
try {
|
||||
|
||||
yield* executeChatGenerateWithOperationRetry(currentCreator, streaming, abortSignal, _d);
|
||||
yield* executeChatGenerateWithOperationRetry(currentCreator, abortSignal, _d);
|
||||
return; // normal completion
|
||||
|
||||
} catch (error) {
|
||||
|
||||
@@ -25,7 +25,7 @@ import { createAnthropicFileInlineTransform } from './parsers/anthropic.transfor
|
||||
import { createAnthropicMessageParser, createAnthropicMessageParserNS } from './parsers/anthropic.parser';
|
||||
import { createBedrockConverseParserNS, createBedrockConverseStreamParser } from './parsers/bedrock-converse.parser';
|
||||
import { createGeminiGenerateContentResponseParser } from './parsers/gemini.parser';
|
||||
import { createGeminiInteractionsParser } from './parsers/gemini.interactions.parser';
|
||||
import { createGeminiInteractionsParserSSE } from './parsers/gemini.interactions.parser';
|
||||
import { createOpenAIChatCompletionsChunkParser, createOpenAIChatCompletionsParserNS } from './parsers/openai.parser';
|
||||
import { createOpenAIResponseParserNS, createOpenAIResponsesEventParser } from './parsers/openai.responses.parser';
|
||||
|
||||
@@ -37,7 +37,8 @@ export type ChatGenerateDispatch = {
|
||||
/** Used by dialects that need multi-step I/O. The returned response is consumed normally via demuxerFormat/chatGenerateParse */
|
||||
customConnect?: (signal: AbortSignal) => Promise<Response>;
|
||||
bodyTransform?: AixDemuxers.StreamBodyTransform;
|
||||
demuxerFormat: AixDemuxers.StreamDemuxerFormat;
|
||||
/** Source of truth for the consumer mode: null = NS */
|
||||
demuxerFormat: null | AixDemuxers.StreamDemuxerFormat;
|
||||
chatGenerateParse: ChatGenerateParseFunction;
|
||||
particleTransform?: ChatGenerateParticleTransformFunction;
|
||||
};
|
||||
@@ -173,6 +174,7 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
|
||||
|
||||
// [Gemini Interactions API - ALPHA TEST] SSE-native: POST with stream=true, upstream returns event-stream we pipe through the fast-sse demuxer.
|
||||
if (model.vndGeminiAPI === 'interactions-agent') {
|
||||
if (!streaming) console.warn(`[DEV] Gemini Interactions API - only supported in SSE mode, ignoring streaming=false for model ${model.id}`);
|
||||
const request: ChatGenerateDispatchRequest = {
|
||||
...geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.postPath, false),
|
||||
method: 'POST',
|
||||
@@ -186,8 +188,9 @@ export async function createChatGenerateDispatch(access: AixAPI_Access, model: A
|
||||
if (signal.aborted) throw error; // preserve abort identity for the executor's abort classifier
|
||||
throw new Error(`Gemini Interactions POST: ${error?.message || 'upstream error'}`); // rewrapping TRPCFetcherError as plain Error makes the retrier treat it as non-retryable
|
||||
}),
|
||||
/** Upstream hardcodes stream=true + background=true (required by deep-research agents) and has no non-streaming alternative. */
|
||||
demuxerFormat: 'fast-sse',
|
||||
chatGenerateParse: createGeminiInteractionsParser(requestedModelName),
|
||||
chatGenerateParse: createGeminiInteractionsParserSSE(requestedModelName),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -323,11 +326,13 @@ export async function createChatGenerateResumeDispatch(access: AixAPI_Access, re
|
||||
// [Gemini Interactions] Reattach via SSE stream - GET /interactions/{id}?stream=true replays all events from the start (intentional - client's ContentReassembler replaces message content on reattach; partial resume via last_event_id is deliberately NOT used).
|
||||
if (resumeHandle.uht !== 'vnd.gem.interactions')
|
||||
throw new Error(`Resume handle mismatch for gemini: expected 'vnd.gem.interactions', got '${resumeHandle.uht}'`);
|
||||
if (!streaming) console.warn(`[DEV] Gemini Interactions API - Resume only supported in SSE mode, ignoring streaming=false for ${resumeHandle.runId}`);
|
||||
const { url: _baseUrl, headers: _headers } = geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.getPath(resumeHandle.runId /* Gemini interaction.id */), false);
|
||||
return {
|
||||
request: { url: `${_baseUrl}${_baseUrl.includes('?') ? '&' : '?'}stream=true`, method: 'GET', headers: _headers },
|
||||
/** Again, only support SSE here, for now (see comment in `createChatGenerateDispatch`) */
|
||||
demuxerFormat: 'fast-sse',
|
||||
chatGenerateParse: createGeminiInteractionsParser(null /* model name unknown at resume time - caller's DMessage already has it */),
|
||||
chatGenerateParse: createGeminiInteractionsParserSSE(null /* model name unknown at resume time - caller's DMessage already has it */),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -393,6 +398,21 @@ export async function executeChatGenerateDelete(access: AixAPI_Access, handle: A
|
||||
case 'gemini':
|
||||
if (handle.uht !== 'vnd.gem.interactions')
|
||||
throw new Error(`Delete handle mismatch for gemini: expected 'vnd.gem.interactions', got '${handle.uht}'`);
|
||||
|
||||
// Gemini: cancel the background run first (stops token generation), then DELETE the stored record.
|
||||
// The DELETE endpoint only removes the resource; it does NOT cancel an in-flight run.
|
||||
// Cancel may 404 "Method not found" on the Developer API (API-key mode, googleapis/python-genai#1971) -
|
||||
// we log the outcome and proceed to DELETE so local cleanup still happens.
|
||||
const { url: cancelUrl, headers: cancelHeaders } = geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.cancelPath(handle.runId), false);
|
||||
try {
|
||||
const cancelResp = await fetchResponseOrTRPCThrow({ url: cancelUrl, method: 'POST', body: {}, headers: cancelHeaders, signal: abortSignal, name: 'Aix.Gemini.Interactions.cancel', throwWithoutName: true });
|
||||
console.log(`[AIX] Gemini.Interactions.cancel: ok=${cancelResp.ok} status=${cancelResp.status}`);
|
||||
} catch (error: any) {
|
||||
if (abortSignal.aborted) throw error;
|
||||
const status = error instanceof TRPCFetcherError ? error.httpStatus : undefined;
|
||||
console.log(`[AIX] Gemini.Interactions.cancel: failed status=${status ?? '?'} msg=${error?.message ?? 'unknown'}`);
|
||||
}
|
||||
|
||||
({ url, headers } = geminiAccess(access, null, GeminiInteractionsWire_API_Interactions.deletePath(handle.runId), false));
|
||||
name = 'Aix.Gemini.Interactions.delete';
|
||||
break;
|
||||
|
||||
@@ -26,7 +26,6 @@ import { heartbeatsWhileAwaiting } from '../heartbeatsWhileAwaiting';
|
||||
*/
|
||||
export async function* executeChatGenerateDispatch(
|
||||
dispatchCreatorFn: () => Promise<ChatGenerateDispatch>,
|
||||
streaming: boolean,
|
||||
intakeAbortSignal: AbortSignal,
|
||||
_d: AixDebugObject,
|
||||
parseContext?: { retriesAvailable: boolean },
|
||||
@@ -59,7 +58,7 @@ export async function* executeChatGenerateDispatch(
|
||||
const innerStream = (async function* () {
|
||||
|
||||
// Consume dispatch response
|
||||
if (!streaming)
|
||||
if (dispatch.demuxerFormat === null /* NS */)
|
||||
yield* _consumeDispatchUnified(dispatchResponse, dispatch.chatGenerateParse, chatGenerateTx, _d, parseContext);
|
||||
else
|
||||
yield* _consumeDispatchStream(dispatchResponse, dispatch.bodyTransform ?? null, dispatch.demuxerFormat, dispatch.chatGenerateParse, chatGenerateTx, _d, parseContext);
|
||||
|
||||
@@ -44,7 +44,6 @@ export class OperationRetrySignal extends Error {
|
||||
*/
|
||||
export async function* executeChatGenerateWithOperationRetry(
|
||||
dispatchCreatorFn: () => Promise<ChatGenerateDispatch>,
|
||||
streaming: boolean,
|
||||
abortSignal: AbortSignal,
|
||||
_d: AixDebugObject,
|
||||
): AsyncGenerator<AixWire_Particles.ChatGenerateOp, void> {
|
||||
@@ -55,7 +54,7 @@ export async function* executeChatGenerateWithOperationRetry(
|
||||
while (true) {
|
||||
try {
|
||||
|
||||
yield* executeChatGenerateDispatch(dispatchCreatorFn, streaming, abortSignal, _d, {
|
||||
yield* executeChatGenerateDispatch(dispatchCreatorFn, abortSignal, _d, {
|
||||
retriesAvailable: attemptNumber < maxAttempts,
|
||||
});
|
||||
|
||||
|
||||
@@ -15,8 +15,8 @@ export interface IParticleTransmitter {
|
||||
/** End the current part and flush it, which also calls `setDialectEnded('issue-dialect')` */
|
||||
setDialectTerminatingIssue(dialectText: string, symbol: string | null, serverLog: ParticleServerLogLevel): void;
|
||||
|
||||
/** Communicates the finish reason to the client - Data only, this does not do Control, like the above */
|
||||
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason): void;
|
||||
/** Communicates the finish reason to the client - Data only. Optional `errorText` is a vendor-composed string rendered as a complementary error fragment alongside the generic classification message. */
|
||||
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason, errorText?: string): void;
|
||||
|
||||
|
||||
// Parts data //
|
||||
|
||||
@@ -404,7 +404,7 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction {
|
||||
// -> Token Stop Reason
|
||||
const tokenStopReason = _fromAnthropicStopReason(delta.stop_reason, 'message_delta');
|
||||
if (tokenStopReason !== null)
|
||||
pt.setTokenStopReason(tokenStopReason);
|
||||
pt.setTokenStopReason(tokenStopReason, _formatAnthropicStopError(delta.stop_details));
|
||||
|
||||
// NOTE: we have more fields we're not parsing yet - https://platform.claude.com/docs/en/api/typescript/messages#message_delta_usage
|
||||
if (usage?.output_tokens && messageStartTime) {
|
||||
@@ -511,6 +511,7 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
|
||||
content,
|
||||
container,
|
||||
stop_reason,
|
||||
stop_details,
|
||||
usage,
|
||||
} = AnthropicWire_API_Message_Create.Response_schema.parse(JSON.parse(fullData));
|
||||
|
||||
@@ -653,7 +654,7 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
|
||||
// -> Token Stop Reason (pause_turn already thrown above)
|
||||
const tokenStopReason = _fromAnthropicStopReason(stop_reason, 'parser_NS');
|
||||
if (tokenStopReason !== null)
|
||||
pt.setTokenStopReason(tokenStopReason);
|
||||
pt.setTokenStopReason(tokenStopReason, _formatAnthropicStopError(stop_details));
|
||||
};
|
||||
}
|
||||
|
||||
@@ -681,6 +682,19 @@ function _emitContainerState(pt: IParticleTransmitter, container: { id: string;
|
||||
});
|
||||
}
|
||||
|
||||
/** Compose a human-readable error string from Anthropic's stop_details. Returns undefined when nothing useful to surface. */
|
||||
function _formatAnthropicStopError(stopDetails: { type: string; category?: string | null; explanation?: string | null } | null | undefined): string | undefined {
|
||||
if (!stopDetails) return undefined;
|
||||
if (stopDetails.type !== 'refusal') {
|
||||
aixResilientUnknownValue('Anthropic', 'stopDetailsType', stopDetails.type);
|
||||
return undefined;
|
||||
}
|
||||
const parts: string[] = [];
|
||||
if (stopDetails.category) parts.push(`[${stopDetails.category}]`);
|
||||
if (stopDetails.explanation) parts.push(stopDetails.explanation);
|
||||
return parts.length ? `Refusal: ${parts.join(' ')}` : undefined;
|
||||
}
|
||||
|
||||
|
||||
// --- Shared server tool result handlers (used by both S and NS parsers) ---
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import type { ChatGenerateParseFunction } from '../chatGenerate.dispatch';
|
||||
import type { IParticleTransmitter } from './IParticleTransmitter';
|
||||
|
||||
import { GeminiInteractionsWire_API_Interactions } from '../../wiretypes/gemini.interactions.wiretypes';
|
||||
import { IssueSymbols } from '../ChatGenerateTransmitter';
|
||||
import { geminiConvertPCM2WAV } from './gemini.audioutils';
|
||||
|
||||
|
||||
@@ -44,7 +45,7 @@ type BlockState = {
|
||||
* the cursor (or from start if omitted). Our parser is position-idempotent within a single run
|
||||
* because the transmitter's state carries across events.
|
||||
*/
|
||||
export function createGeminiInteractionsParser(requestedModelName: string | null): ChatGenerateParseFunction {
|
||||
export function createGeminiInteractionsParserSSE(requestedModelName: string | null): ChatGenerateParseFunction {
|
||||
|
||||
const parserCreationTimestamp = Date.now();
|
||||
let timeToFirstContent: number | undefined;
|
||||
@@ -218,11 +219,16 @@ export function createGeminiInteractionsParser(requestedModelName: string | null
|
||||
}
|
||||
|
||||
case 'error':
|
||||
// Observed mid-stream with an empty payload between content blocks - non-fatal, the stream
|
||||
// continues with further events and eventually an interaction.complete. Silent-skip empty
|
||||
// payloads (Beta noise); warn only when actual error info is present.
|
||||
if (event.error?.message || event.error?.code)
|
||||
console.warn('[GeminiInteractions] SSE error event:', event.error);
|
||||
// Two observed shapes:
|
||||
// 1) Empty payload mid-stream (Beta noise): the stream continues with further events and
|
||||
// eventually an interaction.complete - silent-skip.
|
||||
// 2) Populated payload with message/code: terminal upstream error (also how Gemini reports
|
||||
// cancelled interactions: HTTP 500 to the cancel call + an error SSE on the stream).
|
||||
// Surface as a dialect-terminating issue so the UI renders it and the stream ends cleanly.
|
||||
if (event.error?.message || event.error?.code) {
|
||||
const errorText = `${event.error.code ? `${event.error.code}: ` : ''}${event.error.message || 'Upstream error.'}`;
|
||||
pt.setDialectTerminatingIssue(errorText, IssueSymbols.Generic, 'srv-warn');
|
||||
}
|
||||
break;
|
||||
|
||||
default: {
|
||||
|
||||
@@ -494,6 +494,10 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
|
||||
} else if (message.content !== undefined && message.content !== null)
|
||||
throw new Error(`unexpected message content type: ${typeof message.content}`);
|
||||
|
||||
// [DeepSeek, 2026-04-24] Non-streaming reasoning_content -> 'ma' reasoning part (mirror of streaming path above)
|
||||
if (typeof message.reasoning_content === 'string' && message.reasoning_content)
|
||||
pt.appendReasoningText(message.reasoning_content);
|
||||
|
||||
// [OpenRouter, 2025-01-20] Handle structured reasoning_details
|
||||
if (Array.isArray(message.reasoning_details)) {
|
||||
for (const reasoningDetail of message.reasoning_details) {
|
||||
|
||||
@@ -18,6 +18,21 @@ const OPENAI_RESPONSES_SAME_PART_SPACER = '\n\n';
|
||||
const INLINE_IMAGE_SKIP_RESIZE_MAX_B64_BYTES = 250_000; // skip resize for small images (e.g. code interpreter charts)
|
||||
|
||||
|
||||
/**
|
||||
* Wishlist marker: hosted tool calls (web_search_call, image_generation_call, code_interpreter_call, ...)
|
||||
* are rendered via ephemeral OperationState/inline-asset paths and are NOT round-tripped as structured
|
||||
* fragments. This breaks stateless multi-turn with reasoning models. See PRD.FUTURE-atol.md "Wishlist:
|
||||
* Hosted tool invocations as first-class fragments".
|
||||
*/
|
||||
// const _hostedToolWishlistSeen = new Set<string>();
|
||||
function _hostedToolWishlistHint(family: 'web_search' | 'image_generation' | 'code_interpreter' | 'custom_tool'): void {
|
||||
// if (_hostedToolWishlistSeen.has(family)) return;
|
||||
// _hostedToolWishlistSeen.add(family);
|
||||
// NOTE: disable the log because it's logging all the time evenrwyehre; just implement this
|
||||
// console.log(`[DEV] AIX: ATOL wishlist - hosted '${family}' call observed; not round-tripped as a structured fragment yet (see kb/product/PRD.FUTURE-atol.md)`);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Safely sanitizes a URL for display in placeholders by removing query parameters and paths
|
||||
* to prevent leaking sensitive information while keeping the domain recognizable.
|
||||
@@ -46,6 +61,11 @@ type TEventType = OpenAIWire_API_Responses.StreamingEvent['type'];
|
||||
// cached config for the image_generation hosted tool, captured at response.created
|
||||
type TImageGenToolCfg = Extract<OpenAIWire_Responses_Tools.Tool, { type: 'image_generation' }>;
|
||||
|
||||
/** Extract the image_generation tool config from the echoed tools array (API does not echo `model` per-item). Shared by streaming and non-streaming paths. */
|
||||
function _findImageGenToolCfg(tools: TResponse['tools']): TImageGenToolCfg | undefined {
|
||||
return tools?.find((t): t is TImageGenToolCfg => t.type === 'image_generation');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* We need this just to ensure events are not out of order, as out streaming is progressive
|
||||
@@ -248,8 +268,7 @@ class ResponseParserStateMachine {
|
||||
// Hosted tool config capture
|
||||
|
||||
captureHostedToolConfigs(tools: TResponse['tools']) {
|
||||
if (!tools?.length) return;
|
||||
this.#imageGenToolCfg = tools.find((t): t is TImageGenToolCfg => t.type === 'image_generation');
|
||||
this.#imageGenToolCfg = _findImageGenToolCfg(tools);
|
||||
}
|
||||
|
||||
get imageGenToolCfg() {
|
||||
@@ -448,6 +467,7 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction {
|
||||
break;
|
||||
|
||||
case 'image_generation_call':
|
||||
_hostedToolWishlistHint('image_generation');
|
||||
// -> IGC: process completed image generation using 'ii' particle for inline images
|
||||
const { id: igId, result: igResult, revised_prompt: igRevisedPrompt } = doneItem;
|
||||
const igDoneText = !igRevisedPrompt?.length ? 'Image generated'
|
||||
@@ -765,6 +785,9 @@ export function createOpenAIResponseParserNS(): ChatGenerateParseFunction {
|
||||
if (response.model)
|
||||
pt.setModelName(response.model);
|
||||
|
||||
// -> Hosted tool config capture (needed for enriching done-item particles with tool params the API does not echo per-item, e.g. image_generation.model)
|
||||
const imageGenToolCfg = _findImageGenToolCfg(response.tools);
|
||||
|
||||
// -> Upstream Handle (for remote control: resume, cancel, delete)
|
||||
// NOTE: we don't do it for full responses, because they're supposed to be 'complete' - i.e. no 'background' execution
|
||||
|
||||
@@ -875,11 +898,16 @@ export function createOpenAIResponseParserNS(): ChatGenerateParseFunction {
|
||||
pt.appendReasoningText(item.text);
|
||||
}
|
||||
|
||||
// [DEV] surface cases that diverge from our continuity round-trip expectations
|
||||
if (!reasoningId && !reasoningEC)
|
||||
console.warn('[DEV] AIX: OpenAI-Response-NS: reasoning item has neither id nor encrypted_content - no continuity handle captured for this turn', { oItem });
|
||||
else if (!reasoningEC)
|
||||
console.log('[DEV] AIX: OpenAI-Response-NS: reasoning item has id but no encrypted_content - stateless round-trip requires include:[\'reasoning.encrypted_content\'] on the request');
|
||||
|
||||
// Capture the continuity handle (encrypted_content + id) for stateless multi-turn round-tripping.
|
||||
// Attached to the ma fragment produced by the summary above; if no summary was emitted, this may
|
||||
// attach to an unrelated preceding fragment - tolerable as the worst case is a misfiled blob.
|
||||
// FIXME: make sure we are attaching to an 'ma' (i.e. reasoning text or somehting was emitted)
|
||||
if (reasoningEC || reasoningId)
|
||||
if (reasoningEC || reasoningId) {
|
||||
// Defensive: ensure an ma fragment exists as the attach target for the svs particle below (parity with the streaming path).
|
||||
pt.appendReasoningText('');
|
||||
pt.sendSetVendorState({
|
||||
p: 'svs',
|
||||
vendor: 'openai',
|
||||
@@ -890,10 +918,7 @@ export function createOpenAIResponseParserNS(): ChatGenerateParseFunction {
|
||||
},
|
||||
},
|
||||
});
|
||||
else if (!reasoningId && !reasoningEC)
|
||||
console.warn('[DEV] AIX: OpenAI-Response-NS: reasoning item has neither id nor encrypted_content - no continuity handle captured for this turn', { oItem });
|
||||
else if (!reasoningEC)
|
||||
console.log('[DEV] AIX: OpenAI-Response-NS: reasoning item has id but no encrypted_content - stateless round-trip requires include:[\'reasoning.encrypted_content\'] on the request');
|
||||
}
|
||||
break;
|
||||
|
||||
// Message contains the main 'assistant' response
|
||||
@@ -957,6 +982,7 @@ export function createOpenAIResponseParserNS(): ChatGenerateParseFunction {
|
||||
break;
|
||||
|
||||
case 'image_generation_call':
|
||||
_hostedToolWishlistHint('image_generation');
|
||||
// -> IGC: process completed image generation using 'ii' particle for inline images
|
||||
const { result: igResult, revised_prompt: igRevisedPrompt } = oItem;
|
||||
// Create inline image with base64 data
|
||||
@@ -965,7 +991,7 @@ export function createOpenAIResponseParserNS(): ChatGenerateParseFunction {
|
||||
_imageGenerationMimeType(oItem), // infer from output_format echoed in the item
|
||||
igResult,
|
||||
igRevisedPrompt || 'Generated image',
|
||||
AIX_OAI_DEFAULT_IMAGE_GEN_MODEL, // generator: non-streaming path has no captured tool config, use current default
|
||||
imageGenToolCfg?.model || AIX_OAI_DEFAULT_IMAGE_GEN_MODEL, // generator: read from echoed tools (API does not echo model per-item), fallback to current default
|
||||
igRevisedPrompt || '', // prompt used
|
||||
);
|
||||
else
|
||||
@@ -1150,6 +1176,7 @@ function _imageGenerationMimeType(item: { output_format?: string }): string {
|
||||
* - citations: High-quality links (2-3) via annotations in message content
|
||||
*/
|
||||
function _forwardDoneWebSearchCallItem(pt: IParticleTransmitter, webSearchCall: Extract<OpenAIWire_API_Responses.Response['output'][number], { type: 'web_search_call' }>, opId: string): void {
|
||||
_hostedToolWishlistHint('web_search');
|
||||
const { action, status } = webSearchCall;
|
||||
|
||||
const doneOpts = { opId, state: 'done' } as const;
|
||||
@@ -1203,6 +1230,7 @@ function _forwardDoneWebSearchCallItem(pt: IParticleTransmitter, webSearchCall:
|
||||
* - addCodeExecutionResponse for each output result
|
||||
*/
|
||||
function _forwardDoneCodeInterpreterCallItem(pt: IParticleTransmitter, codeInterpreterCall: Extract<OpenAIWire_API_Responses.Response['output'][number], { type: 'code_interpreter_call' }>): void {
|
||||
_hostedToolWishlistHint('code_interpreter');
|
||||
const { id, code, outputs, status /*,container_id*/ } = codeInterpreterCall;
|
||||
|
||||
// <- Emit code (like Gemini's executableCode)
|
||||
|
||||
@@ -21,7 +21,7 @@ export namespace AixDemuxers {
|
||||
* - 'fast-sse' is our own parser, optimized for performance. to be preferred when possible over 'sse' (check for full compatibility with the upstream)
|
||||
* - 'json-nl' is used by Ollama
|
||||
*/
|
||||
export type StreamDemuxerFormat = 'fast-sse' | 'json-nl' | null;
|
||||
export type StreamDemuxerFormat = 'fast-sse' | 'json-nl';
|
||||
|
||||
|
||||
/**
|
||||
@@ -34,8 +34,8 @@ export namespace AixDemuxers {
|
||||
return createFastEventSourceDemuxer();
|
||||
case 'json-nl':
|
||||
return _createJsonNlDemuxer();
|
||||
case null:
|
||||
return _nullStreamDemuxerWarn;
|
||||
default:
|
||||
throw new Error(`Unsupported stream demuxer format: ${format}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -115,12 +115,3 @@ function _createJsonNlDemuxer(): AixDemuxers.StreamDemuxer {
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
const _nullStreamDemuxerWarn: AixDemuxers.StreamDemuxer = {
|
||||
demux: () => {
|
||||
console.warn('Null demuxer called - shall not happen, as it is only created in non-streaming');
|
||||
return [];
|
||||
},
|
||||
flushRemaining: () => [],
|
||||
};
|
||||
|
||||
+5
-7
@@ -1,7 +1,7 @@
|
||||
<!--
|
||||
Upstream snapshot - DO NOT EDIT - run _upstream/sync.sh to refresh
|
||||
Source: https://platform.claude.com/docs/en/api/messages/create.md
|
||||
Synced: 2026-04-23
|
||||
Synced: 2026-04-24
|
||||
Consumed by: anthropic.wiretypes.ts, anthropic.parser.ts, anthropic.messageCreate.ts, anthropic.transform-fileInline.ts
|
||||
-->
|
||||
|
||||
@@ -2429,7 +2429,7 @@ Learn more about the Messages API in our [user guide](https://docs.claude.com/en
|
||||
|
||||
Configuration options for the model's output, such as the output format.
|
||||
|
||||
- `effort: optional "low" or "medium" or "high" or 2 more`
|
||||
- `effort: optional "low" or "medium" or "high" or "max"`
|
||||
|
||||
All possible effort levels.
|
||||
|
||||
@@ -2439,8 +2439,6 @@ Learn more about the Messages API in our [user guide](https://docs.claude.com/en
|
||||
|
||||
- `"high"`
|
||||
|
||||
- `"xhigh"`
|
||||
|
||||
- `"max"`
|
||||
|
||||
- `format: optional JSONOutputFormat`
|
||||
@@ -3822,15 +3820,15 @@ Learn more about the Messages API in our [user guide](https://docs.claude.com/en
|
||||
|
||||
Used to remove "long tail" low probability responses. [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
||||
|
||||
Recommended for advanced use cases only. You usually only need to use `temperature`.
|
||||
Recommended for advanced use cases only.
|
||||
|
||||
- `top_p: optional number`
|
||||
|
||||
Use nucleus sampling.
|
||||
|
||||
In nucleus sampling, we compute the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by `top_p`. You should either alter `temperature` or `top_p`, but not both.
|
||||
In nucleus sampling, we compute the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by `top_p`.
|
||||
|
||||
Recommended for advanced use cases only. You usually only need to use `temperature`.
|
||||
Recommended for advanced use cases only.
|
||||
|
||||
### Returns
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<!--
|
||||
Upstream snapshot - DO NOT EDIT - run _upstream/sync.sh to refresh
|
||||
Source: https://ai.google.dev/gemini-api/docs/deep-research.md.txt
|
||||
Synced: 2026-04-23
|
||||
Synced: 2026-04-24
|
||||
Consumed by: gemini.interactions.wiretypes.ts, gemini.interactions.parser.ts, gemini.interactionsCreate.ts, gemini.interactionsPoller.ts
|
||||
Companion: ./gemini.interactions.guide.md (the Interactions API guide)
|
||||
-->
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<!--
|
||||
Upstream snapshot - DO NOT EDIT - run _upstream/sync.sh to refresh
|
||||
Source: https://ai.google.dev/gemini-api/docs/interactions.md.txt
|
||||
Synced: 2026-04-23
|
||||
Synced: 2026-04-24
|
||||
Consumed by: gemini.interactions.wiretypes.ts, gemini.interactions.parser.ts, gemini.interactionsCreate.ts, gemini.interactionsPoller.ts
|
||||
Companion: ./gemini.interactions.spec.md (the Interactions API reference spec), ./gemini.deep-research.guide.md (the Deep Research agent guide)
|
||||
-->
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<!--
|
||||
Upstream snapshot - DO NOT EDIT - run _upstream/sync.sh to refresh
|
||||
Source: https://ai.google.dev/api/interactions-api.md.txt
|
||||
Synced: 2026-04-23
|
||||
Synced: 2026-04-24
|
||||
Consumed by: gemini.interactions.wiretypes.ts, gemini.interactions.parser.ts, gemini.interactionsCreate.ts, gemini.interactionsPoller.ts
|
||||
Companion: ./gemini.interactions.guide.md (the Interactions API guide)
|
||||
-->
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<!--
|
||||
Upstream snapshot - DO NOT EDIT - run _upstream/sync.sh to refresh
|
||||
Source: https://developers.openai.com/api/reference/resources/responses/methods/create/index.md
|
||||
Synced: 2026-04-23
|
||||
Synced: 2026-04-24
|
||||
Consumed by: openai.wiretypes.ts, openai.responses.parser.ts, openai.responsesCreate.ts
|
||||
-->
|
||||
|
||||
|
||||
@@ -13,6 +13,10 @@ const hotFixAntShipNoEmptyTextBlocks = true; // Replace empty text blocks with a
|
||||
*
|
||||
* ## Updates
|
||||
*
|
||||
* ### 2026-04-24 - API Sync: stop_details for structured refusals
|
||||
* - Response: added `stop_details` ({ type: 'refusal', category: 'cyber'|'bio'|null, explanation: string|null })
|
||||
* - event_MessageDelta.delta: added `stop_details` (arrives alongside stop_reason in streaming)
|
||||
*
|
||||
* ### 2026-03-21 - API Sync: GA tool versions, thinking display, caller updates, cache_control
|
||||
* - Tools: Added web_search_20260209 (GA), web_fetch_20260209/20260309 (GA), code_execution_20260120 (GA REPL)
|
||||
* - Request: Added top-level `cache_control` for automatic caching (Feb 2026)
|
||||
@@ -825,6 +829,16 @@ export namespace AnthropicWire_API_Message_Create {
|
||||
'model_context_window_exceeded',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Structured stop details, paired with stop_reason. Currently only populated when stop_reason === 'refusal'.
|
||||
* Both `type` and `category` are loosely typed for forward-compat - parser warns on unknown `type`.
|
||||
*/
|
||||
const StopDetails_schema = z.object({
|
||||
type: z.enum(['refusal']).or(z.string()),
|
||||
category: z.enum(['cyber', 'bio']).or(z.string()).nullish(),
|
||||
explanation: z.string().nullish(),
|
||||
});
|
||||
|
||||
/// Request
|
||||
|
||||
export type Request = z.infer<typeof Request_schema>;
|
||||
@@ -1030,6 +1044,12 @@ export namespace AnthropicWire_API_Message_Create {
|
||||
// Which custom stop sequence was generated, if any.
|
||||
stop_sequence: z.string().nullable(),
|
||||
|
||||
/**
|
||||
* Structured stop details. Present when stop_reason === 'refusal' (carries category + explanation).
|
||||
* In streaming, stop_details is null at message_start and appears on message_delta alongside stop_reason.
|
||||
*/
|
||||
stop_details: StopDetails_schema.nullish(),
|
||||
|
||||
/**
|
||||
* Billing and rate-limit usage.
|
||||
* Token counts represent the underlying cost to Anthropic's systems.
|
||||
@@ -1088,6 +1108,10 @@ export namespace AnthropicWire_API_Message_Create {
|
||||
delta: z.object({
|
||||
stop_reason: StopReason_schema.nullable(),
|
||||
stop_sequence: z.string().nullable(),
|
||||
/**
|
||||
* Structured stop details - present alongside stop_reason === 'refusal' (category + explanation).
|
||||
*/
|
||||
stop_details: StopDetails_schema.nullish(),
|
||||
/**
|
||||
* Container state updates - present when Skills/code_execution tools are used.
|
||||
* Provides container id/expiry that may differ from message_start if the container was created mid-stream.
|
||||
|
||||
@@ -23,8 +23,12 @@ export namespace GeminiInteractionsWire_API_Interactions {
|
||||
|
||||
export const getPath = (id: string) => `/v1beta/interactions/${encodeURIComponent(id)}`;
|
||||
|
||||
// DELETE. Removes the stored record. Orthogonal to cancel; when removed the original connection may still be running and streaming
|
||||
export const deletePath = (id: string) => `/v1beta/interactions/${encodeURIComponent(id)}`;
|
||||
|
||||
// POST. Only cancels background interactions that are still running
|
||||
export const cancelPath = (id: string) => `/v1beta/interactions/${encodeURIComponent(id)}/cancel`;
|
||||
|
||||
|
||||
// -- Request Body (POST /v1beta/interactions) --
|
||||
|
||||
|
||||
@@ -189,6 +189,13 @@ export namespace OpenAIWire_Messages {
|
||||
/** [OpenRouter, 2025-01-20] Reasoning traces with multiple blocks (summary, text, encrypted). */
|
||||
reasoning_details: z.array(OpenAIWire_ContentParts.OpenRouter_ReasoningDetail_schema).optional(),
|
||||
|
||||
/**
|
||||
* [DeepSeek, 2026-04-24] Chain-of-thought reasoning text.
|
||||
* - Response: emitted by V4 thinking-by-default; parsed into a 'ma' reasoning part.
|
||||
* - (this) Request: MUST be echoed back on assistant turns that carry tool_calls (otherwise HTTP 400: "The reasoning_content in the thinking mode must be passed back to the API.").
|
||||
*/
|
||||
reasoning_content: z.string().nullable().optional(),
|
||||
|
||||
// function_call: // ignored, as it's deprecated
|
||||
// name: _optionalParticipantName, // omitted by choice: generally unsupported
|
||||
});
|
||||
@@ -331,7 +338,7 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
stream_options: z.object({
|
||||
include_usage: z.boolean().optional(), // If set, an additional chunk will be streamed with a 'usage' field on the entire request.
|
||||
}).optional(),
|
||||
reasoning_effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(), // [OpenAI, 2024-12-17] [Perplexity, 2025-06-23] reasoning effort
|
||||
reasoning_effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(), // [OpenAI, 2024-12-17] [Perplexity, 2025-06-23] reasoning effort; [DeepSeek, 2026-04-23] 'max' added for V4
|
||||
// OpenAI and [OpenRouter, 2025-01-20] Verbosity parameter - maps to output_config.effort for Anthropic models
|
||||
// https://openrouter.ai/docs/api/reference/parameters#verbosity
|
||||
verbosity: z.enum([
|
||||
@@ -342,7 +349,7 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
// [OpenRouter, 2025-11-11] Unified reasoning parameter for all models
|
||||
reasoning: z.object({
|
||||
max_tokens: z.int().optional(), // Token-based control (Anthropic, Gemini): 1024-32000
|
||||
effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(), // Effort-based control (OpenAI o1/o3/GPT-5, xAI, DeepSeek): allocates % of max_tokens
|
||||
effort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']).optional(), // Effort-based control (OpenAI o1/o3/GPT-5, xAI, DeepSeek): allocates % of max_tokens
|
||||
enabled: z.boolean().optional(), // Simple enable with medium effort defaults
|
||||
exclude: z.boolean().optional(), // Use reasoning internally without returning it in response
|
||||
}).optional(),
|
||||
@@ -447,6 +454,8 @@ export namespace OpenAIWire_API_Chat_Completions {
|
||||
search_after_date_filter: z.string().optional(), // Date filter in MM/DD/YYYY format
|
||||
|
||||
// [Moonshot, 2026-01-26] Kimi K2.5 thinking mode control
|
||||
// [Z.ai, 2025-xx] GLM thinking mode: type 'enabled' | 'disabled'
|
||||
// [DeepSeek, 2026-04-23] V4 thinking mode: same binary shape; depth is controlled via top-level `reasoning_effort`
|
||||
thinking: z.object({
|
||||
type: z.enum(['enabled', 'disabled']),
|
||||
}).optional(),
|
||||
@@ -1174,9 +1183,11 @@ export namespace OpenAIWire_Responses_Items {
|
||||
// [OpenAI 2026-03-xx] DEPRECATED query might not always be present in done event
|
||||
query: z.string().optional(),
|
||||
// the output websites, if any [{"type":"url","url":"https://www.enricoros.com/"}, {"type":"url","url": "https://linkedin.com/in/enricoros/"}, ...]
|
||||
// [OpenAI 2026-04-23, GPT-5.5] new source types: { type: 'api', name: 'oai-calculator' } for hosted-tool invocations (no url)
|
||||
sources: z.array(z.object({
|
||||
type: z.literal('url').optional(), // source type
|
||||
url: z.string(),
|
||||
type: z.enum(['url', 'api']).or(z.string()).optional(), // 'url' (default) | 'api' (GPT-5.5 hosted tools) | future types
|
||||
url: z.string().nullish(), // optional: 'api' sources have no url, only name
|
||||
name: z.string().nullish(), // for 'api' sources (e.g., 'oai-calculator')
|
||||
// [OpenAI 2026-03-xx] not present anymore
|
||||
// title: z.string().optional(),
|
||||
// snippet: z.string().optional(),
|
||||
@@ -1437,6 +1448,7 @@ export namespace OpenAIWire_Responses_Tools {
|
||||
const WebSearchTool_schema = z.object({
|
||||
type: z.enum(['web_search', 'web_search_preview', 'web_search_preview_2025_03_11']),
|
||||
search_context_size: z.enum(['low', 'medium', 'high']).optional(),
|
||||
// [OpenAI 2026-04-23, GPT-5.5] API echoes user_location as `null` (not undefined) when unset - so .nullish()
|
||||
user_location: z.object({
|
||||
type: z.literal('approximate'),
|
||||
// API echoes these as `null` when unset, not omitted - so .nullish()
|
||||
@@ -1444,7 +1456,7 @@ export namespace OpenAIWire_Responses_Tools {
|
||||
country: z.string().nullish(),
|
||||
region: z.string().nullish(),
|
||||
timezone: z.string().nullish(),
|
||||
}).optional(),
|
||||
}).nullish(),
|
||||
external_web_access: z.boolean().optional(),
|
||||
});
|
||||
|
||||
@@ -1641,7 +1653,7 @@ export namespace OpenAIWire_API_Responses {
|
||||
// NOTE: .catch() gracefully degrades to undefined since this is a non-critical enrichment path
|
||||
tools: z.array(OpenAIWire_Responses_Tools.Tool_schema).optional().catch((ctx) => {
|
||||
console.warn('[DEV] AIX: OpenAI Responses: unable to parse echoed tools, ignoring:', { tools: ctx.value });
|
||||
return;
|
||||
return undefined;
|
||||
}),
|
||||
|
||||
output: z.array(OpenAIWire_Responses_Items.OutputItem_schema),
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import * as React from 'react';
|
||||
import { useShallow } from 'zustand/react/shallow';
|
||||
|
||||
import { Alert, Box, CircularProgress } from '@mui/joy';
|
||||
import { Alert, Box, Button, CircularProgress } from '@mui/joy';
|
||||
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
|
||||
import TelegramIcon from '@mui/icons-material/Telegram';
|
||||
|
||||
import { ConfirmationModal } from '~/common/components/modals/ConfirmationModal';
|
||||
import { ShortcutKey, useGlobalShortcuts } from '~/common/components/shortcuts/useGlobalShortcuts';
|
||||
@@ -204,13 +206,30 @@ export function BeamView(props: {
|
||||
isMobile={props.isMobile}
|
||||
rayIds={rayIds}
|
||||
showRayAdd={cardAdd}
|
||||
showRaysOps={(isScattering || raysReady < 2) ? undefined : raysReady}
|
||||
hadImportedRays={hadImportedRays}
|
||||
onIncreaseRayCount={handleRayIncreaseCount}
|
||||
onRaysOperation={handleRaysOperation}
|
||||
// linkedLlmId={currentGatherLlmId}
|
||||
/>
|
||||
|
||||
{/* Rays Action Bar (2+ ready beams) - sibling of the grid (NOT a grid child); an in-grid spanning element with gridColumn:'1/-1' pins all auto-fit tracks open and leaves dead whitespace when raysCount < tracksCount. Fixes #1073. */}
|
||||
{(!isScattering && raysReady >= 2) && (
|
||||
<Box sx={{ display: 'flex', justifyContent: 'center', gap: 2, mx: 'var(--Pad)' }}>
|
||||
<Button size='sm' variant='outlined' color='neutral' onClick={() => handleRaysOperation('copy')} endDecorator={<ContentCopyIcon sx={{ fontSize: 'md' }} />} sx={{
|
||||
backgroundColor: 'background.surface',
|
||||
'&:hover': { backgroundColor: 'background.popup' },
|
||||
}}>
|
||||
Copy {raysReady}
|
||||
</Button>
|
||||
<Button size='sm' variant='outlined' color='success' onClick={() => handleRaysOperation('use')} endDecorator={<TelegramIcon sx={{ fontSize: 'xl' }} />} sx={{
|
||||
justifyContent: 'space-between',
|
||||
backgroundColor: 'background.surface',
|
||||
'&:hover': { backgroundColor: 'background.popup' },
|
||||
}}>
|
||||
Use {raysReady === 2 ? 'both' : 'all ' + raysReady} messages
|
||||
</Button>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
|
||||
{/* Gapper between Rays and Merge, without compromising the auto margin of the Ray Grid */}
|
||||
<Box />
|
||||
@@ -246,9 +265,9 @@ export function BeamView(props: {
|
||||
onPositive={handleStartMergeConfirmation}
|
||||
// lowStakes
|
||||
noTitleBar
|
||||
confirmationText='Some responses are still being generated. Do you want to stop and proceed with merging the available responses now?'
|
||||
positiveActionText='Proceed with Merge'
|
||||
negativeActionText='Wait for All Responses'
|
||||
confirmationText={'Some replies are still generating. Merge what\'s ready?'}
|
||||
positiveActionText='Merge now'
|
||||
negativeActionText='Wait for all'
|
||||
negativeActionStartDecorator={
|
||||
<CircularProgress color='neutral' sx={{ '--CircularProgress-size': '24px', '--CircularProgress-trackThickness': '1px' }} />
|
||||
}
|
||||
|
||||
@@ -149,7 +149,8 @@ export function BeamFusionGrid(props: {
|
||||
</Box> : (
|
||||
<Typography level='body-sm' sx={{ opacity: 0.8 }}>
|
||||
{/*You need two or more replies for a {currentFactory?.shortLabel?.toLocaleLowerCase() ?? ''} merge.*/}
|
||||
Waiting for multiple responses.
|
||||
{/*Waiting for multiple responses.*/}
|
||||
Merge needs 2+ replies. Beam some first.
|
||||
</Typography>
|
||||
)}
|
||||
</BeamCard>
|
||||
|
||||
@@ -49,7 +49,7 @@ export async function executeGatherInstruction(_i: GatherInstruction, inputs: Ex
|
||||
if (!inputs.chatMessages.length)
|
||||
throw new Error('No conversation history available');
|
||||
if (!inputs.rayMessages.length)
|
||||
throw new Error('No responses available');
|
||||
throw new Error('Needs two Beams at least');
|
||||
for (let rayMessage of inputs.rayMessages)
|
||||
if (rayMessage.role !== 'assistant')
|
||||
throw new Error('Invalid response role');
|
||||
|
||||
@@ -58,7 +58,7 @@ export function gatherStartFusion(
|
||||
if (chatMessages.length < 1)
|
||||
return onError('No conversation history available');
|
||||
if (rayMessages.length <= 1)
|
||||
return onError('No responses available');
|
||||
return onError('Needs two Beams at least');
|
||||
if (!initialFusion.llmId)
|
||||
return onError('No Merge model selected');
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ The final output should reflect a deep understanding of the user's preferences a
|
||||
addLabel: 'Add Breakdown',
|
||||
cardTitle: 'Evaluation Table',
|
||||
Icon: TableViewRoundedIcon as typeof SvgIcon,
|
||||
description: 'Analyzes and compares AI responses, offering a structured framework to support your response choice.',
|
||||
description: 'Analyzes and compares replies, with a structured framework to support your choice.',
|
||||
createInstructions: () => [
|
||||
{
|
||||
type: 'gather',
|
||||
|
||||
@@ -3,8 +3,6 @@ import * as React from 'react';
|
||||
import type { SxProps } from '@mui/joy/styles/types';
|
||||
import { Box, Button } from '@mui/joy';
|
||||
import AddCircleOutlineRoundedIcon from '@mui/icons-material/AddCircleOutlineRounded';
|
||||
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
|
||||
import TelegramIcon from '@mui/icons-material/Telegram';
|
||||
|
||||
import type { BeamStoreApi } from '../store-beam.hooks';
|
||||
import { BeamCard } from '../BeamCard';
|
||||
@@ -32,10 +30,8 @@ export function BeamRayGrid(props: {
|
||||
hadImportedRays: boolean,
|
||||
isMobile: boolean,
|
||||
onIncreaseRayCount: () => void,
|
||||
onRaysOperation: (operation: 'copy' | 'use') => void,
|
||||
rayIds: string[],
|
||||
showRayAdd: boolean,
|
||||
showRaysOps: undefined | number,
|
||||
}) {
|
||||
|
||||
const raysCount = props.rayIds.length;
|
||||
@@ -71,25 +67,6 @@ export function BeamRayGrid(props: {
|
||||
</BeamCard>
|
||||
)}
|
||||
|
||||
{/* Multi-Use and Copy Buttons */}
|
||||
{!!props.showRaysOps && (
|
||||
<Box sx={{ gridColumn: '1 / -1', display: 'flex', justifyContent: 'center', gap: 2, mt: 2 }}>
|
||||
<Button size='sm' variant='outlined' color='neutral' onClick={() => props.onRaysOperation('copy')} endDecorator={<ContentCopyIcon sx={{ fontSize: 'md' }} />} sx={{
|
||||
backgroundColor: 'background.surface',
|
||||
'&:hover': { backgroundColor: 'background.popup' },
|
||||
}}>
|
||||
Copy {props.showRaysOps}
|
||||
</Button>
|
||||
<Button size='sm' variant='outlined' color='success' onClick={() => props.onRaysOperation('use')} endDecorator={<TelegramIcon sx={{ fontSize: 'xl' }} />} sx={{
|
||||
justifyContent: 'space-between',
|
||||
backgroundColor: 'background.surface',
|
||||
'&:hover': { backgroundColor: 'background.popup' },
|
||||
}}>
|
||||
Use {props.showRaysOps == 2 ? 'both' : 'all ' + props.showRaysOps} messages
|
||||
</Button>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{/*/!* Takes a full row *!/*/}
|
||||
{/*<Divider sx={{*/}
|
||||
{/* gridColumn: '1 / -1',*/}
|
||||
|
||||
@@ -76,6 +76,12 @@ const createRootSlice: StateCreator<BeamStore, [], [], RootStoreSlice> = (_set,
|
||||
open: (chatHistory: Readonly<DMessage[]>, initialChatLlmId: DLLMId | null, isEditMode: boolean, callback: BeamSuccessCallback) => {
|
||||
const { isOpen: wasAlreadyOpen, terminateKeepingSettings, loadBeamConfig, hadImportedRays, setRayLlmIds, setCurrentGatherLlmId } = _get();
|
||||
|
||||
// if already open, preserve the live state (rays, fusions, callback) - re-invocation must never wipe an ongoing beam
|
||||
if (wasAlreadyOpen) {
|
||||
console.warn('[DEV] Beam is already open');
|
||||
return;
|
||||
}
|
||||
|
||||
// reset pending operations
|
||||
terminateKeepingSettings();
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ const _oaiEffortOptions = [
|
||||
] as const;
|
||||
|
||||
const _miscEffortOptions = [
|
||||
{ value: 'max', label: 'Max', description: 'Hardest thinking' } as const,
|
||||
{ value: 'high', label: 'On', description: 'Multi-step reasoning' } as const,
|
||||
{ value: 'none', label: 'Off', description: 'Disable thinking mode' } as const,
|
||||
{ value: _UNSPECIFIED, label: 'Default', description: 'Model Default' } as const,
|
||||
|
||||
@@ -72,7 +72,7 @@ const geminiExpFree: ModelDescriptionSchema['chatPrice'] = {
|
||||
};
|
||||
|
||||
|
||||
// Pricing based on https://ai.google.dev/pricing (Apr 22, 2026)
|
||||
// Pricing based on https://ai.google.dev/pricing (Apr 24, 2026)
|
||||
|
||||
const gemini31FlashLitePricing: ModelDescriptionSchema['chatPrice'] = {
|
||||
input: 0.25, // text/image/video; audio is $0.50 but we don't differentiate yet
|
||||
@@ -262,8 +262,10 @@ const _knownGeminiModels: ({
|
||||
|
||||
/// Generation 3.0
|
||||
|
||||
// 3.0 Pro (Preview) - Released November 18, 2025; DEPRECATED: shutdown March 9, 2026 (still served by API as of Apr 17, 2026)
|
||||
// 3.0 Pro (Preview) - Released November 18, 2025; SHUT DOWN March 9, 2026 - now silently routed to gemini-3.1-pro-preview
|
||||
// Kept hidden (still returned by API) to avoid confusing users with a silently-redirected model.
|
||||
{
|
||||
hidden: true, // March 9, 2026: API silently routes 'gemini-3-pro-preview' to 'gemini-3.1-pro-preview' - hide to prevent user confusion
|
||||
id: 'models/gemini-3-pro-preview',
|
||||
labelOverride: 'Gemini 3 Pro Preview',
|
||||
isPreview: true,
|
||||
@@ -335,6 +337,7 @@ const _knownGeminiModels: ({
|
||||
|
||||
// 2.5 Pro (Stable) - Released June 17, 2025; DEPRECATED: shutdown June 17, 2026
|
||||
{
|
||||
hidden: true, // outperformed by 3.1 Pro (1493) and even 3 Flash (1474) - deprecated in 2 months
|
||||
id: 'models/gemini-2.5-pro',
|
||||
labelOverride: 'Gemini 2.5 Pro',
|
||||
deprecated: '2026-06-17',
|
||||
@@ -378,7 +381,7 @@ const _knownGeminiModels: ({
|
||||
labelOverride: 'Deep Research Preview (2026-04)',
|
||||
isPreview: true,
|
||||
chatPrice: gemini25ProPricing, // pricing not explicitly listed; using 2.5 Pro as baseline
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
|
||||
parameterSpecs: [],
|
||||
benchmark: undefined, // Deep research model, not benchmarkable on standard tests
|
||||
// 128K input, 64K output
|
||||
@@ -390,7 +393,7 @@ const _knownGeminiModels: ({
|
||||
labelOverride: 'Deep Research Max Preview (2026-04)',
|
||||
isPreview: true,
|
||||
chatPrice: gemini25ProPricing, // baseline estimate (see note above)
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
|
||||
parameterSpecs: [],
|
||||
benchmark: undefined, // Deep research model, not benchmarkable on standard tests
|
||||
},
|
||||
@@ -402,7 +405,7 @@ const _knownGeminiModels: ({
|
||||
labelOverride: 'Deep Research Pro Preview',
|
||||
isPreview: true,
|
||||
chatPrice: gemini25ProPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Reasoning, LLM_IF_GEM_Interactions],
|
||||
parameterSpecs: [{ paramId: 'llmVndGeminiThinkingBudget' }],
|
||||
benchmark: undefined, // Deep research model, not benchmarkable on standard tests
|
||||
// Note: 128K input context, 64K output context
|
||||
@@ -412,6 +415,7 @@ const _knownGeminiModels: ({
|
||||
|
||||
// 2.5 Flash
|
||||
{
|
||||
hidden: true, // outperformed by 3 Flash Preview (1474 vs 1411) - deprecated in 2 months
|
||||
id: 'models/gemini-2.5-flash',
|
||||
labelOverride: 'Gemini 2.5 Flash',
|
||||
deprecated: '2026-06-17',
|
||||
@@ -467,6 +471,7 @@ const _knownGeminiModels: ({
|
||||
|
||||
// 2.5 Flash-Based: Gemini Robotics-ER 1.5 Preview - Released September 25, 2025; DEPRECATED: shutdown April 30, 2026
|
||||
{
|
||||
hidden: true, // superseded by Robotics-ER 1.6 - shutdown April 30, 2026
|
||||
id: 'models/gemini-robotics-er-1.5-preview',
|
||||
labelOverride: 'Gemini Robotics-ER 1.5 Preview',
|
||||
isPreview: true,
|
||||
@@ -573,6 +578,7 @@ const _knownGeminiModels: ({
|
||||
|
||||
// 2.0 Flash - DEPRECATED: shutdown June 1, 2026 (announced Feb 18, 2026)
|
||||
{
|
||||
hidden: true, // outclassed by all Flash models in 2.5/3.x series - shutdown in ~5 weeks
|
||||
id: 'models/gemini-2.0-flash-001',
|
||||
deprecated: '2026-06-01',
|
||||
chatPrice: gemini20FlashPricing,
|
||||
@@ -580,6 +586,7 @@ const _knownGeminiModels: ({
|
||||
benchmark: { cbaElo: 1360 }, // gemini-2.0-flash-001
|
||||
},
|
||||
{
|
||||
hidden: true, // outclassed by all Flash models in 2.5/3.x series - shutdown in ~5 weeks
|
||||
id: 'models/gemini-2.0-flash',
|
||||
symLink: 'models/gemini-2.0-flash-001',
|
||||
deprecated: '2026-06-01',
|
||||
@@ -591,6 +598,7 @@ const _knownGeminiModels: ({
|
||||
|
||||
// 2.0 Flash Lite - DEPRECATED: shutdown June 1, 2026 (announced Feb 18, 2026)
|
||||
{
|
||||
hidden: true, // outclassed by 2.5/3.1 Flash-Lite - shutdown in ~5 weeks
|
||||
id: 'models/gemini-2.0-flash-lite',
|
||||
chatPrice: gemini20FlashLitePricing,
|
||||
symLink: 'models/gemini-2.0-flash-lite-001',
|
||||
@@ -599,6 +607,7 @@ const _knownGeminiModels: ({
|
||||
benchmark: { cbaElo: 1310 },
|
||||
},
|
||||
{
|
||||
hidden: true, // outclassed by 2.5/3.1 Flash-Lite - shutdown in ~5 weeks
|
||||
id: 'models/gemini-2.0-flash-lite-001',
|
||||
chatPrice: gemini20FlashLitePricing,
|
||||
deprecated: '2026-06-01',
|
||||
|
||||
@@ -1,38 +1,70 @@
|
||||
import { LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types';
|
||||
import { LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
|
||||
import { fromManualMapping, ManualMappings } from '../../models.mappings';
|
||||
|
||||
|
||||
const IF_3 = [LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json];
|
||||
const IF_4 = [LLM_IF_HOTFIX_StripImages, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn];
|
||||
|
||||
// [DeepSeek, 2026-04-24] V4 release - https://api-docs.deepseek.com/news/news260424
|
||||
// - V4-Pro: 1.6T total / 49B active params; V4-Flash: 284B total / 13B active params (Novel Attention: token-wise compression + DSA)
|
||||
// - Model IDs listed by /models: deepseek-v4-flash, deepseek-v4-pro
|
||||
// - 1M context is the default across services; text-only (no vision/multimodal)
|
||||
// - Legacy aliases still accepted until 2026-07-24: deepseek-chat -> v4-flash (thinking disabled), deepseek-reasoner -> v4-flash (thinking enabled)
|
||||
// - Reasoning control: object `thinking: { type: 'enabled'|'disabled', reasoning_effort?: 'high'|'max' }`
|
||||
// (the live API also accepts type: 'adaptive', but it is undocumented and empirically behaves the same as 'enabled'
|
||||
// on current builds -- deliberately not exposed here; add it once docs + semantics stabilize)
|
||||
// - V3.2 endpoints no longer accessible via direct model ID (API returns only v4-flash/v4-pro)
|
||||
const _knownDeepseekChatModels: ManualMappings = [
|
||||
// [Models and Pricing](https://api-docs.deepseek.com/quick_start/pricing)
|
||||
// [List Models](https://api-docs.deepseek.com/api/list-models)
|
||||
// [Release Notes - V3.2](https://api-docs.deepseek.com/news/news251201) - Released 2025-12-01
|
||||
{
|
||||
idPrefix: 'deepseek-v4-pro',
|
||||
label: 'DeepSeek V4 Pro',
|
||||
description: 'Premium reasoning model with 1M context. Supports extended thinking modes, JSON output, and function calling.',
|
||||
contextWindow: 1_048_576, // 1M
|
||||
interfaces: [...IF_4, LLM_IF_OAI_Reasoning],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high', 'max'] },
|
||||
],
|
||||
maxCompletionTokens: 65536, // conservative default; docs advertise up to 384K
|
||||
chatPrice: { input: 1.74, output: 3.48, cache: { cType: 'oai-ac', read: 0.145 } },
|
||||
benchmark: { cbaElo: 1463 }, // lmarena: deepseek-v4-pro (thinking variant 1462, near-tied)
|
||||
},
|
||||
{
|
||||
idPrefix: 'deepseek-v4-flash',
|
||||
label: 'DeepSeek V4 Flash',
|
||||
description: 'Fast general-purpose model with 1M context. Supports extended thinking modes, JSON output, and function calling.',
|
||||
contextWindow: 1_048_576, // 1M
|
||||
interfaces: [...IF_4, LLM_IF_OAI_Reasoning],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high', 'max'] },
|
||||
],
|
||||
maxCompletionTokens: 65536, // conservative default; docs advertise up to 384K
|
||||
chatPrice: { input: 0.14, output: 0.28, cache: { cType: 'oai-ac', read: 0.028 } },
|
||||
benchmark: { cbaElo: 1439 }, // lmarena: deepseek-v4-flash-thinking (non-thinking variant 1433)
|
||||
},
|
||||
// Legacy aliases - API routes both to deepseek-v4-flash with thinking pre-set
|
||||
{
|
||||
idPrefix: 'deepseek-reasoner',
|
||||
label: 'DeepSeek V3.2 (Reasoner)',
|
||||
description: 'Reasoning model with Chain-of-Thought capabilities, 128K context length. Supports JSON output and function calling.',
|
||||
contextWindow: 131072, // 128K
|
||||
interfaces: [...IF_3, LLM_IF_OAI_Reasoning],
|
||||
// parameterSpecs: [
|
||||
// { paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] }, // not supported: this model is reasoning only
|
||||
// ],
|
||||
maxCompletionTokens: 32768, // default, max: 65536
|
||||
chatPrice: { input: 0.28, output: 0.42, cache: { cType: 'oai-ac', read: 0.028 } },
|
||||
benchmark: { cbaElo: 1425 }, // deepseek-v3.2-exp-thinking
|
||||
label: 'DeepSeek Reasoner (legacy)',
|
||||
description: 'Legacy alias: routes to DeepSeek V4 Flash with thinking enabled. Retires 2026-07-24.',
|
||||
contextWindow: 1_048_576,
|
||||
interfaces: [...IF_4, LLM_IF_OAI_Reasoning],
|
||||
maxCompletionTokens: 65536,
|
||||
chatPrice: { input: 0.14, output: 0.28, cache: { cType: 'oai-ac', read: 0.028 } },
|
||||
benchmark: { cbaElo: 1439 }, // lmarena: deepseek-v4-flash-thinking
|
||||
isLegacy: true,
|
||||
},
|
||||
{
|
||||
idPrefix: 'deepseek-chat',
|
||||
label: 'DeepSeek V3.2',
|
||||
description: 'General-purpose model with 128K context length. Supports JSON output and function calling.',
|
||||
contextWindow: 131072, // 128K
|
||||
interfaces: IF_3,
|
||||
maxCompletionTokens: 8192, // default is 4096, max is 8192
|
||||
chatPrice: { input: 0.28, output: 0.42, cache: { cType: 'oai-ac', read: 0.028 } },
|
||||
benchmark: { cbaElo: 1424 }, // deepseek-v3.2
|
||||
label: 'DeepSeek Chat (legacy)',
|
||||
description: 'Legacy alias: routes to DeepSeek V4 Flash with thinking disabled. Retires 2026-07-24.',
|
||||
contextWindow: 1_048_576,
|
||||
interfaces: IF_4,
|
||||
maxCompletionTokens: 65536,
|
||||
chatPrice: { input: 0.14, output: 0.28, cache: { cType: 'oai-ac', read: 0.028 } },
|
||||
benchmark: { cbaElo: 1433 }, // lmarena: deepseek-v4-flash (non-thinking)
|
||||
isLegacy: true,
|
||||
},
|
||||
];
|
||||
|
||||
|
||||
@@ -12,6 +12,23 @@ import { fromManualMapping, KnownModel, llmDevCheckModels_DEV, ManualMappings }
|
||||
// OpenAI Model Variants
|
||||
export const hardcodedOpenAIVariants: ModelVariantMap = {
|
||||
|
||||
// GPT-5.5 with reasoning disabled (non-thinking) - supports temperature control
|
||||
'gpt-5.5-2026-04-23': {
|
||||
idVariant: '::thinking-none',
|
||||
label: 'GPT-5.5 (No-thinking)',
|
||||
hidden: true, // hidden by default as redundant, user can unhide in settings
|
||||
description: 'Supports temperature control for creative applications. GPT-5.5 with reasoning disabled (reasoning_effort=none).',
|
||||
interfaces: [LLM_IF_OAI_Responses, LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_PromptCaching], // NO LLM_IF_OAI_Reasoning, NO LLM_IF_HOTFIX_NoTemperature
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'none', hidden: true }, // factory 'none', not changeable
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiVerbosity' },
|
||||
{ paramId: 'llmVndOaiImageGeneration' },
|
||||
{ paramId: 'llmVndOaiCodeInterpreter' },
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
},
|
||||
|
||||
// GPT-5.4 with reasoning disabled (non-thinking) - supports temperature control
|
||||
'gpt-5.4-2026-03-05': {
|
||||
idVariant: '::thinking-none',
|
||||
@@ -88,6 +105,58 @@ const PS_DEEP_RESEARCH = [{ paramId: 'llmVndOaiWebSearchContext' as const, initi
|
||||
// https://platform.openai.com/docs/pricing
|
||||
export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
/// GPT-5.5 series - Released April 23, 2026
|
||||
|
||||
// GPT-5.5
|
||||
{
|
||||
idPrefix: 'gpt-5.5-2026-04-23',
|
||||
label: 'GPT-5.5 (2026-04-23)',
|
||||
description: 'New baseline for complex production workflows. Stronger task execution, more precise tool use, more efficient reasoning with fewer tokens. 1M token context.',
|
||||
contextWindow: 1050000,
|
||||
maxCompletionTokens: 128000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['none', 'low', 'medium', 'high', 'xhigh'], initialValue: 'medium' }, // medium is the new default for 5.5
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiVerbosity' },
|
||||
{ paramId: 'llmVndOaiImageGeneration' },
|
||||
{ paramId: 'llmVndOaiCodeInterpreter' },
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
chatPrice: { input: 5, cache: { cType: 'oai-ac', read: 0.5 }, output: 30 },
|
||||
// benchmark: TBD - no CBA ELO yet
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5.5',
|
||||
label: 'GPT-5.5',
|
||||
symLink: 'gpt-5.5-2026-04-23',
|
||||
},
|
||||
|
||||
// GPT-5.5 Pro
|
||||
{
|
||||
idPrefix: 'gpt-5.5-pro-2026-04-23',
|
||||
label: 'GPT-5.5 Pro (2026-04-23)',
|
||||
description: 'Most capable model for complex tasks. Uses more compute for smarter, more precise responses on the hardest problems.',
|
||||
contextWindow: 1050000,
|
||||
maxCompletionTokens: 272000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [
|
||||
{ paramId: 'llmVndOaiEffort', enumValues: ['medium', 'high', 'xhigh'] }, // Pro: no low/none
|
||||
{ paramId: 'llmVndOaiWebSearchContext' },
|
||||
{ paramId: 'llmVndOaiVerbosity' },
|
||||
{ paramId: 'llmVndOaiImageGeneration' },
|
||||
{ paramId: 'llmForceNoStream' },
|
||||
],
|
||||
chatPrice: { input: 30, output: 180 },
|
||||
// benchmark: TBD
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-5.5-pro',
|
||||
label: 'GPT-5.5 Pro',
|
||||
symLink: 'gpt-5.5-pro-2026-04-23',
|
||||
},
|
||||
|
||||
|
||||
/// GPT-5.4 series - Released March 5, 2026
|
||||
|
||||
// GPT-5.4
|
||||
@@ -250,6 +319,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5.2
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5
|
||||
idPrefix: 'gpt-5.2-2025-12-11',
|
||||
label: 'GPT-5.2 (2025-12-11)',
|
||||
description: 'Most capable model for professional work and long-running agents. Improvements in general intelligence, long-context, agentic tool-calling, and vision.',
|
||||
@@ -268,6 +338,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
benchmark: { cbaElo: 1441 }, // gpt-5.2-high
|
||||
},
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5
|
||||
idPrefix: 'gpt-5.2',
|
||||
label: 'GPT-5.2',
|
||||
symLink: 'gpt-5.2-2025-12-11',
|
||||
@@ -275,6 +346,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5.2 Codex
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.3 Codex
|
||||
idPrefix: 'gpt-5.2-codex',
|
||||
label: 'GPT-5.2 Codex',
|
||||
description: 'GPT-5.2 optimized for long-horizon, agentic coding tasks in Codex or similar environments. Supports low, medium, high, and xhigh reasoning effort settings.',
|
||||
@@ -293,6 +365,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5.2 Chat Latest
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.3 Instant
|
||||
idPrefix: 'gpt-5.2-chat-latest',
|
||||
label: 'GPT-5.2 Instant',
|
||||
description: 'GPT-5.2 model powering ChatGPT. Fast, capable for everyday work with clear improvements in info-seeking, how-tos, technical writing.',
|
||||
@@ -311,6 +384,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5.2 Pro
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5 Pro
|
||||
idPrefix: 'gpt-5.2-pro-2025-12-11',
|
||||
label: 'GPT-5.2 Pro (2025-12-11)',
|
||||
description: 'Smartest and most trustworthy option for difficult questions. Uses more compute for harder thinking on complex domains like programming.',
|
||||
@@ -328,6 +402,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// benchmark: TBD
|
||||
},
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5 Pro
|
||||
idPrefix: 'gpt-5.2-pro',
|
||||
label: 'GPT-5.2 Pro',
|
||||
symLink: 'gpt-5.2-pro-2025-12-11',
|
||||
@@ -338,6 +413,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5.1
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5
|
||||
idPrefix: 'gpt-5.1-2025-11-13',
|
||||
label: 'GPT-5.1 (2025-11-13)',
|
||||
description: 'The best model for coding and agentic tasks with configurable reasoning effort.',
|
||||
@@ -355,6 +431,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
benchmark: { cbaElo: 1455 }, // gpt-5.1-high
|
||||
},
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5
|
||||
idPrefix: 'gpt-5.1',
|
||||
label: 'GPT-5.1',
|
||||
symLink: 'gpt-5.1-2025-11-13',
|
||||
@@ -362,6 +439,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5.1 Chat Latest
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.3 Instant
|
||||
idPrefix: 'gpt-5.1-chat-latest',
|
||||
label: 'GPT-5.1 Instant',
|
||||
description: 'GPT-5.1 Instant with adaptive reasoning. More conversational with improved instruction following.',
|
||||
@@ -381,6 +459,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5.1 Codex Max
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.3 Codex
|
||||
idPrefix: 'gpt-5.1-codex-max',
|
||||
label: 'GPT-5.1 Codex Max',
|
||||
description: 'Our most intelligent coding model optimized for long-horizon, agentic coding tasks.',
|
||||
@@ -398,6 +477,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
},
|
||||
// GPT-5.1 Codex
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.3 Codex
|
||||
idPrefix: 'gpt-5.1-codex',
|
||||
label: 'GPT-5.1 Codex',
|
||||
description: 'A version of GPT-5.1 optimized for agentic coding tasks in Codex or similar environments.',
|
||||
@@ -415,6 +495,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
},
|
||||
// GPT-5.1 Codex Mini
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.3 Codex
|
||||
idPrefix: 'gpt-5.1-codex-mini',
|
||||
label: 'GPT-5.1 Codex Mini',
|
||||
description: 'Smaller, faster version of GPT-5.1 Codex for efficient coding tasks.',
|
||||
@@ -436,6 +517,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5
|
||||
idPrefix: 'gpt-5-2025-08-07',
|
||||
label: 'GPT-5 (2025-08-07)',
|
||||
description: 'The best model for coding and agentic tasks across domains.',
|
||||
@@ -453,6 +535,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
benchmark: { cbaElo: 1433 }, // gpt-5-high
|
||||
},
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5
|
||||
idPrefix: 'gpt-5',
|
||||
label: 'GPT-5',
|
||||
symLink: 'gpt-5-2025-08-07',
|
||||
@@ -460,6 +543,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5 Pro
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5 Pro
|
||||
idPrefix: 'gpt-5-pro-2025-10-06',
|
||||
label: 'GPT-5 Pro (2025-10-06)',
|
||||
description: 'Version of GPT-5 that uses more compute to produce smarter and more precise responses. Designed for tough problems.',
|
||||
@@ -471,6 +555,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// benchmark: has not been measured yet
|
||||
},
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4/5.5 Pro
|
||||
idPrefix: 'gpt-5-pro',
|
||||
label: 'GPT-5 Pro',
|
||||
symLink: 'gpt-5-pro-2025-10-06',
|
||||
@@ -511,6 +596,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5 Search API
|
||||
{
|
||||
hidden: true, // poor quality - use llmVndOaiWebSearchContext on regular models instead
|
||||
idPrefix: 'gpt-5-search-api-2025-10-14',
|
||||
label: 'GPT-5 Search API (2025-10-14)',
|
||||
description: 'Updated web search model in Chat Completions API. 60% cheaper with domain filtering support.',
|
||||
@@ -522,6 +608,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// benchmark: TBD
|
||||
},
|
||||
{
|
||||
hidden: true, // poor quality - use llmVndOaiWebSearchContext on regular models instead
|
||||
idPrefix: 'gpt-5-search-api',
|
||||
label: 'GPT-5 Search API',
|
||||
symLink: 'gpt-5-search-api-2025-10-14',
|
||||
@@ -529,6 +616,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5 mini
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4 Mini
|
||||
idPrefix: 'gpt-5-mini-2025-08-07',
|
||||
label: 'GPT-5 Mini (2025-08-07)',
|
||||
description: 'A faster, more cost-efficient version of GPT-5 for well-defined tasks.',
|
||||
@@ -540,6 +628,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
benchmark: { cbaElo: 1390 }, // gpt-5-mini-high
|
||||
},
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4 Mini
|
||||
idPrefix: 'gpt-5-mini',
|
||||
label: 'GPT-5 Mini',
|
||||
symLink: 'gpt-5-mini-2025-08-07',
|
||||
@@ -547,6 +636,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-5 nano
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4 Nano
|
||||
idPrefix: 'gpt-5-nano-2025-08-07',
|
||||
label: 'GPT-5 Nano (2025-08-07)',
|
||||
description: 'Fastest, most cost-efficient version of GPT-5 for summarization and classification tasks.',
|
||||
@@ -558,6 +648,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
benchmark: { cbaElo: 1337 }, // gpt-5-nano-high
|
||||
},
|
||||
{
|
||||
hidden: true, // superseded by GPT-5.4 Nano
|
||||
idPrefix: 'gpt-5-nano',
|
||||
label: 'GPT-5 Nano',
|
||||
symLink: 'gpt-5-nano-2025-08-07',
|
||||
@@ -608,8 +699,9 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// o4-mini-deep-research - (v1/responses API)
|
||||
{
|
||||
idPrefix: 'o4-mini-deep-research-2025-06-26',
|
||||
label: 'o4 Mini Deep Research (2025-06-26)',
|
||||
description: 'Faster, more affordable deep research model for complex, multi-step research tasks.',
|
||||
label: 'o4 Mini Deep Research [Deprecated]',
|
||||
isLegacy: true,
|
||||
description: 'Faster, more affordable deep research model for complex, multi-step research tasks. [Shutdown: 2026-07-23 - migrate to GPT-5.5 with web search.]',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
@@ -625,8 +717,9 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
/// o4-mini
|
||||
{
|
||||
idPrefix: 'o4-mini-2025-04-16',
|
||||
label: 'o4 Mini (2025-04-16)',
|
||||
description: 'Latest o4-mini model. Optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks.',
|
||||
label: 'o4 Mini [Deprecated]',
|
||||
isLegacy: true,
|
||||
description: 'Latest o4-mini model. Optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. [Shutdown: 2026-10-23 - migrate to GPT-5.4 Mini.]',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: IFS_CHAT_CACHE_REASON,
|
||||
@@ -643,8 +736,9 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// o3-deep-research - (v1/responses API)
|
||||
{
|
||||
idPrefix: 'o3-deep-research-2025-06-26',
|
||||
label: 'o3 Deep Research (2025-06-26)',
|
||||
description: 'Our most powerful deep research model for complex, multi-step research tasks.',
|
||||
label: 'o3 Deep Research [Deprecated]',
|
||||
isLegacy: true,
|
||||
description: 'Our most powerful deep research model for complex, multi-step research tasks. [Shutdown: 2026-07-23 - migrate to GPT-5.5 Pro with web search.]',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
|
||||
@@ -696,8 +790,9 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// o3-mini
|
||||
{
|
||||
idPrefix: 'o3-mini-2025-01-31',
|
||||
label: 'o3 Mini (2025-01-31)',
|
||||
description: 'Latest o3-mini model snapshot. High intelligence at the same cost and latency targets of o1-mini. Excels at science, math, and coding tasks.',
|
||||
label: 'o3 Mini [Deprecated]',
|
||||
isLegacy: true,
|
||||
description: 'Latest o3-mini model snapshot. High intelligence at the same cost and latency targets of o1-mini. Excels at science, math, and coding tasks. [Shutdown: 2026-10-23 - migrate to GPT-5.4 Mini.]',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_StripImages],
|
||||
@@ -733,8 +828,9 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// o1
|
||||
{
|
||||
idPrefix: 'o1-2024-12-17',
|
||||
label: 'o1 (2024-12-17)',
|
||||
description: 'Previous full o-series reasoning model.',
|
||||
label: 'o1 [Deprecated]',
|
||||
isLegacy: true,
|
||||
description: 'Previous full o-series reasoning model. [Shutdown: 2026-10-23 - migrate to GPT-5.5 or o3.]',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 100000,
|
||||
interfaces: IFS_CHAT_CACHE_REASON,
|
||||
@@ -788,8 +884,9 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// GPT-4.1 nano
|
||||
{
|
||||
idPrefix: 'gpt-4.1-nano-2025-04-14',
|
||||
label: 'GPT-4.1 Nano (2025-04-14)',
|
||||
description: 'Fastest, most cost-effective GPT 4.1 model. Delivers exceptional performance with low latency, ideal for tasks like classification or autocompletion.',
|
||||
label: 'GPT-4.1 Nano [Deprecated]',
|
||||
isLegacy: true,
|
||||
description: 'Fastest, most cost-effective GPT 4.1 model. Delivers exceptional performance with low latency, ideal for tasks like classification or autocompletion. [Shutdown: 2026-10-23 - migrate to GPT-5.4 Nano.]',
|
||||
contextWindow: 1047576,
|
||||
maxCompletionTokens: 32768,
|
||||
interfaces: IFS_CHAT_CACHE,
|
||||
@@ -819,6 +916,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// gpt-audio
|
||||
{
|
||||
hidden: true, // superseded by GPT Audio 1.5
|
||||
idPrefix: 'gpt-audio-2025-08-28',
|
||||
label: 'GPT Audio (2025-08-28)',
|
||||
description: 'First generally available audio model. Accepts audio inputs and outputs, and can be used in the Chat Completions REST API.',
|
||||
@@ -829,6 +927,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
// benchmark: TBD
|
||||
},
|
||||
{
|
||||
hidden: true, // superseded by GPT Audio 1.5
|
||||
idPrefix: 'gpt-audio',
|
||||
label: 'GPT Audio',
|
||||
symLink: 'gpt-audio-2025-08-28',
|
||||
@@ -1220,6 +1319,12 @@ export function openAIInjectVariants(acc: ModelDescriptionSchema[], model: Model
|
||||
|
||||
|
||||
const _manualOrderingIdPrefixes = [
|
||||
// GPT-5.5
|
||||
'gpt-5.5-20',
|
||||
'gpt-5.5-pro-20',
|
||||
'gpt-5.5-pro',
|
||||
'gpt-5.5-chat-latest',
|
||||
'gpt-5.5',
|
||||
// GPT-5.4
|
||||
'gpt-5.4-20',
|
||||
'gpt-5.4-pro-20',
|
||||
@@ -1419,6 +1524,7 @@ export function llmOrtOaiLookup(orModelName: string): OrtVendorLookupResult | un
|
||||
// typemap to known models
|
||||
const ortOaiRefMap: Record<string, string | null> = {
|
||||
// renames
|
||||
'gpt-5.5-chat': 'gpt-5.5-2026-04-23', // no chat-latest yet, map to snapshot
|
||||
'gpt-5.4-chat': 'gpt-5.4-2026-03-05', // no chat-latest yet, map to snapshot
|
||||
'gpt-5.3-chat': 'gpt-5.3-chat-latest',
|
||||
'gpt-5.2-chat': 'gpt-5.2-chat-latest',
|
||||
|
||||
@@ -246,7 +246,10 @@ export function openRouterModelToModelDescription(wireModel: object): ModelDescr
|
||||
// 0-day: xAI/Grok/Moonshot/Z.ai/DeepSeek models get default reasoning effort if not inherited
|
||||
if (interfaces.includes(LLM_IF_OAI_Reasoning) && !parameterSpecs.some(p => p.paramId === 'llmVndMiscEffort')) {
|
||||
// console.log('[DEV] openRouterModelToModelDescription: unexpected xAI/Grok/DeepSeek reasoning model:', model.id);
|
||||
parameterSpecs.push({ paramId: 'llmVndMiscEffort' }); // binary thinking for these vendors
|
||||
// Binary thinking only: OpenRouter's unified reasoning API currently rejects 'max' (see openai.chatCompletions.ts).
|
||||
// We pin enumValues here so the shared llmVndMiscEffort registry (which also includes 'max' for native DeepSeek V4)
|
||||
// does not surface 'max' in the UI for OR-routed models that can't honor it.
|
||||
parameterSpecs.push({ paramId: 'llmVndMiscEffort', enumValues: ['none', 'high'] });
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user