mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
AIX: OpenAI: disable reasoning summaries when disabling Streaming as well. Fixes #932
This commit is contained in:
@@ -69,6 +69,12 @@ export const DModelParameterRegistry = {
|
||||
/**
|
||||
* First introduced as a user-configurable parameter for the 'Verification' required by o3.
|
||||
* [2025-04-16] Adding parameter to disable streaming for o3, and possibly more models.
|
||||
*
|
||||
* [2026-01-21] OpenAI Responses API: Reasoning Summaries require organization verification.
|
||||
* Per OpenAI docs, both streaming AND reasoning summaries require org verification for GPT-5/5.1/5.2.
|
||||
* - https://help.openai.com/en/articles/10362446-api-model-availability-by-usage-tier-and-verification-status
|
||||
* - Rather than adding a separate param, we piggyback on llmForceNoStream.
|
||||
* - AIX Wire type `vndOaiReasoningSummary` is derived from `llmForceNoStream` in aix.client.ts.
|
||||
*/
|
||||
llmForceNoStream: {
|
||||
label: 'Disable Streaming',
|
||||
|
||||
@@ -46,7 +46,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
|
||||
// destructure input with the overrides
|
||||
const {
|
||||
llmRef, llmTemperature, llmResponseTokens, llmTopP,
|
||||
llmRef, llmTemperature, llmResponseTokens, llmTopP, llmForceNoStream,
|
||||
llmVndAnt1MContext, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort,
|
||||
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel, llmVndGeminiThinkingLevel4,
|
||||
// llmVndMoonshotWebSearch,
|
||||
@@ -100,6 +100,7 @@ export function aixCreateModelFromLLMOptions(
|
||||
...(hotfixOmitTemperature ? { temperature: null } : llmTemperature !== undefined ? { temperature: llmTemperature } : {}),
|
||||
...(llmResponseTokens /* null: similar to undefined, will omit the value */ ? { maxTokens: llmResponseTokens } : {}),
|
||||
...(llmTopP !== undefined ? { topP: llmTopP } : {}),
|
||||
...(llmForceNoStream ? { forceNoStream: true } : {}),
|
||||
...(llmVndAntThinkingBudget !== undefined ? { vndAntThinkingBudget: llmVndAntThinkingBudget } : {}),
|
||||
...(llmVndAnt1MContext ? { vndAnt1MContext: llmVndAnt1MContext } : {}),
|
||||
...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}),
|
||||
@@ -121,7 +122,10 @@ export function aixCreateModelFromLLMOptions(
|
||||
// ...(llmVndGeminiUrlContext === 'auto' ? { vndGeminiUrlContext: llmVndGeminiUrlContext } : {}),
|
||||
// ...(llmVndMoonshotWebSearch === 'auto' ? { vndMoonshotWebSearch: 'auto' } : {}),
|
||||
...(llmVndOaiResponsesAPI ? { vndOaiResponsesAPI: true } : {}),
|
||||
...((llmVndOaiReasoningEffort52Pro || llmVndOaiReasoningEffort52 || llmVndOaiReasoningEffort4 || llmVndOaiReasoningEffort) ? { vndOaiReasoningEffort: llmVndOaiReasoningEffort52Pro || llmVndOaiReasoningEffort52 || llmVndOaiReasoningEffort4 || llmVndOaiReasoningEffort } : {}),
|
||||
...((llmVndOaiReasoningEffort52Pro || llmVndOaiReasoningEffort52 || llmVndOaiReasoningEffort4 || llmVndOaiReasoningEffort) ? {
|
||||
vndOaiReasoningEffort: llmVndOaiReasoningEffort52Pro || llmVndOaiReasoningEffort52 || llmVndOaiReasoningEffort4 || llmVndOaiReasoningEffort,
|
||||
vndOaiReasoningSummary: llmForceNoStream ? 'none' /* we disable the summaries, to not require org verification */ : 'detailed',
|
||||
} : {}),
|
||||
...(llmVndOaiRestoreMarkdown ? { vndOaiRestoreMarkdown: llmVndOaiRestoreMarkdown } : {}),
|
||||
...(llmVndOaiVerbosity ? { vndOaiVerbosity: llmVndOaiVerbosity } : {}),
|
||||
...(llmVndOaiWebSearchContext ? { vndOaiWebSearchContext: llmVndOaiWebSearchContext } : {}),
|
||||
|
||||
@@ -472,12 +472,13 @@ export namespace AixWire_API {
|
||||
// Moonshot
|
||||
vndMoonshotWebSearch: z.enum(['auto']).optional(),
|
||||
// OpenAI
|
||||
vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(),
|
||||
vndOaiResponsesAPI: z.boolean().optional(),
|
||||
vndOaiReasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(),
|
||||
vndOaiReasoningSummary: z.enum(['none', 'detailed']).optional(),
|
||||
vndOaiRestoreMarkdown: z.boolean().optional(),
|
||||
vndOaiVerbosity: z.enum(['low', 'medium', 'high']).optional(),
|
||||
vndOaiWebSearchContext: z.enum(['low', 'medium', 'high']).optional(),
|
||||
vndOaiImageGeneration: z.enum(['mq', 'hq', 'hq_edit', 'hq_png']).optional(),
|
||||
// OpenRouter
|
||||
vndOrtWebSearch: z.enum(['auto']).optional(),
|
||||
// Perplexity
|
||||
|
||||
@@ -38,7 +38,6 @@ export function aixToOpenAIResponses(
|
||||
const isOpenAIOFamily = ['gpt-6', 'gpt-5', 'o4', 'o3', 'o1'].some(_id => model.id === _id || model.id.startsWith(_id + '-'));
|
||||
const isOpenAIChatGPT = ['gpt-5-chat'].some(_id => model.id === _id || model.id.startsWith(_id + '-'));
|
||||
const isOpenAIComputerUse = model.id.includes('computer-use');
|
||||
const isOpenAIO1Pro = model.id === 'o1-pro' || model.id.startsWith('o1-pro-');
|
||||
|
||||
const hotFixNoTemperature = isOpenAIOFamily && !isOpenAIChatGPT;
|
||||
const hotFixNoTruncateAuto = isOpenAIComputerUse;
|
||||
@@ -75,7 +74,8 @@ export function aixToOpenAIResponses(
|
||||
// Operations Config
|
||||
reasoning: !model.vndOaiReasoningEffort ? undefined : {
|
||||
effort: model.vndOaiReasoningEffort,
|
||||
summary: !isOpenAIO1Pro ? 'detailed' : 'auto', // elevated from 'auto' (o1-pro still at 'auto')
|
||||
// 'none' = omit (for unverified orgs), 'detailed' = explicit, undefined = default per model
|
||||
...(model.vndOaiReasoningSummary !== 'none' ? { summary: model.vndOaiReasoningSummary } : {}),
|
||||
},
|
||||
|
||||
// Output Config
|
||||
|
||||
@@ -732,8 +732,8 @@ export function LLMParametersEditor(props: {
|
||||
{showParam('llmForceNoStream') && (
|
||||
<FormSwitchControl
|
||||
title='Disable Streaming'
|
||||
description='Receive complete responses'
|
||||
tooltip='Turn on to get entire responses at once. Useful for models with streaming issues, but will make responses appear slower.'
|
||||
description='For unverified OpenAI orgs'
|
||||
tooltip='Disables streaming and reasoning summaries, which both require OpenAI organization verification. Enable if you get verification errors with GPT-5 models.'
|
||||
checked={!!llmForceNoStream}
|
||||
onChange={checked => {
|
||||
if (!checked)
|
||||
|
||||
@@ -545,7 +545,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
|
||||
maxCompletionTokens: 100000,
|
||||
trainingDataCutoff: 'Sep 30, 2023',
|
||||
interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_MIN, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_NoTemperature],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }],
|
||||
parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }, { paramId: 'llmForceNoStream' }],
|
||||
chatPrice: { input: 150, output: 600 },
|
||||
// benchmark: has not been measured yet by third parties
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user