diff --git a/src/common/stores/llms/llms.parameters.ts b/src/common/stores/llms/llms.parameters.ts index 440ee2ef0..d355e0e71 100644 --- a/src/common/stores/llms/llms.parameters.ts +++ b/src/common/stores/llms/llms.parameters.ts @@ -69,6 +69,12 @@ export const DModelParameterRegistry = { /** * First introduced as a user-configurable parameter for the 'Verification' required by o3. * [2025-04-16] Adding parameter to disable streaming for o3, and possibly more models. + * + * [2026-01-21] OpenAI Responses API: Reasoning Summaries require organization verification. + * Per OpenAI docs, both streaming AND reasoning summaries require org verification for GPT-5/5.1/5.2. + * - https://help.openai.com/en/articles/10362446-api-model-availability-by-usage-tier-and-verification-status + * - Rather than adding a separate param, we piggyback on llmForceNoStream. + * - AIX Wire type `vndOaiReasoningSummary` is derived from `llmForceNoStream` in aix.client.ts. */ llmForceNoStream: { label: 'Disable Streaming', diff --git a/src/modules/aix/client/aix.client.ts b/src/modules/aix/client/aix.client.ts index 0a5959e11..079fc4584 100644 --- a/src/modules/aix/client/aix.client.ts +++ b/src/modules/aix/client/aix.client.ts @@ -46,7 +46,7 @@ export function aixCreateModelFromLLMOptions( // destructure input with the overrides const { - llmRef, llmTemperature, llmResponseTokens, llmTopP, + llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream, llmVndAnt1MContext, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort, llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel, llmVndGeminiThinkingLevel4, // llmVndMoonshotWebSearch, @@ -100,6 +100,7 @@ export function aixCreateModelFromLLMOptions( ...(hotfixOmitTemperature ? { temperature: null } : llmTemperature !== undefined ? { temperature: llmTemperature } : {}), ...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}), ...(llmTopP !== undefined ? { topP: llmTopP } : {}), + ...(llmForceNoStream ? { forceNoStream: true } : {}), ...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}), ...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}), ...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}), @@ -121,7 +122,10 @@ export function aixCreateModelFromLLMOptions( // ...(llmVndGeminiUrlContext === 'auto' ? { vndGeminiUrlContext: llmVndGeminiUrlContext } : {}), // ...(llmVndMoonshotWebSearch === 'auto' ? { vndMoonshotWebSearch: 'auto' } : {}), ...(llmVndOaiResponsesAPI ? { vndOaiResponsesAPI: true } : {}), - ...((llmVndOaiReasoningEffort52Pro || llmVndOaiReasoningEffort52 || llmVndOaiReasoningEffort4 || llmVndOaiReasoningEffort) ? { vndOaiReasoningEffort: llmVndOaiReasoningEffort52Pro || llmVndOaiReasoningEffort52 || llmVndOaiReasoningEffort4 || llmVndOaiReasoningEffort } : {}), + ...((llmVndOaiReasoningEffort52Pro || llmVndOaiReasoningEffort52 || llmVndOaiReasoningEffort4 || llmVndOaiReasoningEffort) ? { + vndOaiReasoningEffort: llmVndOaiReasoningEffort52Pro || llmVndOaiReasoningEffort52 || llmVndOaiReasoningEffort4 || llmVndOaiReasoningEffort, + vndOaiReasoningSummary: llmForceNoStream ? 'none' /* we disable the summaries, to not require org verification */ : 'detailed', + } : {}), ...(llmVndOaiRestoreMarkdown ? { vndOaiRestoreMarkdown: llmVndOaiRestoreMarkdown } : {}), ...(llmVndOaiVerbosity ? { vndOaiVerbosity: llmVndOaiVerbosity } : {}), ...(llmVndOaiWebSearchContext ? { vndOaiWebSearchContext: llmVndOaiWebSearchContext } : {}), diff --git a/src/modules/aix/server/api/aix.wiretypes.ts b/src/modules/aix/server/api/aix.wiretypes.ts index 3c9969937..e3f783f7a 100644 --- a/src/modules/aix/server/api/aix.wiretypes.ts +++ b/src/modules/aix/server/api/aix.wiretypes.ts @@ -472,12 +472,13 @@ export namespace AixWire_API { // Moonshot vndMoonshotWebSearch: z.enum(['auto']).optional(), // OpenAI + vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(), vndOaiResponsesAPI: z.boolean().optional(), vndOaiReasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(), + vndOaiReasoningSummary: z.enum(['none', 'detailed']).optional(), vndOaiRestoreMarkdown: z.boolean().optional(), vndOaiVerbosity: z.enum(['low', 'medium', 'high']).optional(), vndOaiWebSearchContext: z.enum(['low', 'medium', 'high']).optional(), - vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(), // OpenRouter vndOrtWebSearch: z.enum(['auto']).optional(), // Perplexity diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts index e64acae98..6950fab94 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts @@ -38,7 +38,6 @@ export function aixToOpenAIResponses( const isOpenAIOFamily = ['gpt-6', 'gpt-5', 'o4', 'o3', 'o1'].some(_id => model.id === _id || model.id.startsWith(_id + '-')); const isOpenAIChatGPT = ['gpt-5-chat'].some(_id => model.id === _id || model.id.startsWith(_id + '-')); const isOpenAIComputerUse = model.id.includes('computer-use'); - const isOpenAIO1Pro = model.id === 'o1-pro' || model.id.startsWith('o1-pro-'); const hotFixNoTemperature = isOpenAIOFamily && !isOpenAIChatGPT; const hotFixNoTruncateAuto = isOpenAIComputerUse; @@ -75,7 +74,8 @@ export function aixToOpenAIResponses( // Operations Config reasoning: !model.vndOaiReasoningEffort ? undefined : { effort: model.vndOaiReasoningEffort, - summary: !isOpenAIO1Pro ? 'detailed' : 'auto', // elevated from 'auto' (o1-pro still at 'auto') + // 'none' = omit (for unverified orgs), 'detailed' = explicit, undefined = default per model + ...(model.vndOaiReasoningSummary !== 'none' ? { summary: model.vndOaiReasoningSummary } : {}), }, // Output Config diff --git a/src/modules/llms/models-modal/LLMParametersEditor.tsx b/src/modules/llms/models-modal/LLMParametersEditor.tsx index acb680867..6dd1f407a 100644 --- a/src/modules/llms/models-modal/LLMParametersEditor.tsx +++ b/src/modules/llms/models-modal/LLMParametersEditor.tsx @@ -732,8 +732,8 @@ export function LLMParametersEditor(props: { {showParam('llmForceNoStream') && ( { if (!checked) diff --git a/src/modules/llms/server/openai/models/openai.models.ts b/src/modules/llms/server/openai/models/openai.models.ts index 9e95ce3b0..e0cb7a687 100644 --- a/src/modules/llms/server/openai/models/openai.models.ts +++ b/src/modules/llms/server/openai/models/openai.models.ts @@ -545,7 +545,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ maxCompletionTokens: 100000, trainingDataCutoff: 'Sep 30, 2023', interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature], - parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }], + parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }, { paramId: 'llmForceNoStream' }], chatPrice: { input: 150, output: 600 }, // benchmark: has not been measured yet by third parties },