From 55c3eb4cf04afff0da91c60d9fca783136929faa Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Wed, 25 Jun 2025 07:46:48 -0700 Subject: [PATCH] AIX: OpenAI: Responses: dispatch fixes --- .../adapters/openai.responsesCreate.ts | 85 +++++++++++-------- .../dispatch/wiretypes/openai.wiretypes.ts | 33 ++++--- 2 files changed, 72 insertions(+), 46 deletions(-) diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts index a90b98f53..29e40bd5c 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts @@ -5,6 +5,10 @@ import { approxDocPart_To_String } from './anthropic.messageCreate'; import { aixDocPart_to_OpenAITextContent, aixMetaRef_to_OpenAIText, aixTexts_to_OpenAIInstructionText } from '~/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions'; +// configuration +const OPENAI_RESPONSES_DEFAULT_TRUNCATION: TRequest['truncation'] = undefined; + + type TRequest = OpenAIWire_API_Responses.Request; type TRequestInput = OpenAIWire_Responses_InputTypes.InputItem; type TRequestTool = OpenAIWire_Responses_Tools.Tool; @@ -19,51 +23,64 @@ type TRequestTool = OpenAIWire_Responses_Tools.Tool; */ export function aixToOpenAIResponses(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest { - // [OpenAI] - o models - const hotFixOpenAIOFamily = ( - model.id === 'o1' || model.id.startsWith('o1-') || - model.id === 'o3' || model.id.startsWith('o3-') || - model.id === 'o4' || model.id.startsWith('o4-') || - model.id === 'o5' || model.id.startsWith('o5-') - ); + // [OpenAI] Vendor-specific model checks + const isOpenAIOFamily = ['o1', 'o3', 'o4', 'o5'].some(m => model.id === m || model.id.startsWith(m + '-')); + const isOpenAIComputerUse = model.id.includes('computer-use'); + + const hotFixNoTemperature = isOpenAIOFamily; + const hotFixNoTruncateAuto = isOpenAIComputerUse; + + // --- + // construct the request payload + // NOTE: the zod parsing will remove the undefined values from the upstream request, enabling an easier construction + // --- - // Convert the chat messages to the OpenAI 4-Messages format const { requestInput, requestInstructions } = _toOpenAIResponsesRequestInput(chatGenerate.systemMessage, chatGenerate.chatSequence); - - // Construct the request payload const payload: TRequest = { + + // Model configuration model: model.id, + max_output_tokens: model.maxTokens ?? undefined, // response if unset: null + temperature: !hotFixNoTemperature ? model.temperature ?? undefined : undefined, + // top_p: ... below (alternative to temperature) + + // Input instructions: requestInstructions, input: requestInput, - max_output_tokens: model.maxTokens !== undefined ? model.maxTokens : undefined, - ...(model.temperature !== null ? { temperature: model.temperature !== undefined ? model.temperature : undefined } : {}), - // top_p: undefined, + + // Tools tools: chatGenerate.tools && _toOpenAIResponsesTools(chatGenerate.tools), tool_choice: chatGenerate.toolsPolicy && _toOpenAIResponsesToolChoice(chatGenerate.toolsPolicy), - // parallel_tool_calls: undefined, + // parallel_tool_calls: undefined, // response if unset: true + + // Operations Config + reasoning: !model.vndOaiReasoningEffort ? undefined : { + effort: model.vndOaiReasoningEffort, + // summary: 'detailed', // elevated from 'auto' + }, + + // Output Config // text: ... below - // reasoning: ... below + + // API state management + store: false, // default would be 'true' + // previous_response_id: undefined, + + // API options stream: streaming, - background: false, - // truncation: 'auto', // TODO: enable this by default? + // background: false, // response if unset: false + truncation: !hotFixNoTruncateAuto ? OPENAI_RESPONSES_DEFAULT_TRUNCATION : 'auto', // user: undefined, + }; - // Top-P instead of temperature + // "top-p": if present, use instead of temperature if (model.topP !== undefined) { delete payload.temperature; payload.top_p = model.topP; } - // [OpenAI] Vendor-specific reasoning effort, for o1 models only as of 2024-12-24 - if (model.vndOaiReasoningEffort) { - payload.reasoning = { - effort: model.vndOaiReasoningEffort, - summary: 'detailed', // elevated from 'auto' - }; - } - - // JSON output - with schema + // JSON output: not implemented yet - will need a schema definition (similar to the tool args definition) if (jsonOutput) { console.warn('[DEV] notImplemented: responses: jsonOutput'); // payload.text = { @@ -73,12 +90,7 @@ export function aixToOpenAIResponses(model: AixAPI_Model, chatGenerate: AixAPICh // }; } - // [OpenAI] Vendor-specific restore markdown, for newer o1 models - if (model.vndOaiRestoreMarkdown) { - console.warn('notImplemented: responses: vndOaiRestoreMarkdown'); - } - - // [OpenAI] Vendor-specific web search context and/or geolocation + // Web Search Context - TODO: check if still exists if (model.vndOaiWebSearchContext || model.userGeolocation) { console.warn('notImplemented: responses: vndOaiWebSearchContext, userGeolocation'); // payload.web_search_options = {}; @@ -94,9 +106,10 @@ export function aixToOpenAIResponses(model: AixAPI_Model, chatGenerate: AixAPICh } // Preemptive error detection with server-side payload validation before sending it upstream + // this includes stripping 'undefined' fields const validated = OpenAIWire_API_Responses.Request_schema.safeParse(payload); if (!validated.success) { - console.warn('OpenAI: invalid Responses request payload. Error:', validated.error); + console.warn('[DEV] OpenAI: invalid Responses request payload. Error:', { error: validated.error }); throw new Error(`Invalid sequence for OpenAI models: ${validated.error.errors?.[0]?.message || validated.error.message || validated.error}.`); } @@ -137,7 +150,7 @@ function _toOpenAIResponsesRequestInput(systemMessage: AixMessages_SystemMessage // We decide to adopt these schemas for the conversion (API gives us a few choices) const chatMessages: (UserMessage | ModelMessage | FunctionCallMessage | FunctionCallOutputMessage)[] = []; type UserMessage = Omit & { role: 'user' }; - type ModelMessage = Omit & { role: 'assistant' }; + type ModelMessage = Extract; type FunctionCallMessage = OpenAIWire_Responses_InputTypes.FunctionToolCall; type FunctionCallOutputMessage = OpenAIWire_Responses_InputTypes.FunctionToolCallOutput; @@ -258,7 +271,7 @@ function _toOpenAIResponsesRequestInput(systemMessage: AixMessages_SystemMessage case 'text': modelMessage().content.push({ - type: 'input_text', + type: 'output_text', text: modelPart.text, }); break; diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index 64eb4d548..92cd3b2dc 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -985,7 +985,7 @@ export namespace OpenAIWire_Responses_InputTypes { // - type: 'mcp_call' - /** + /* * Old-style Item Message, used for compatibility with older APIs. * * NOTE: Over time we will move to the 'Item' type below, but it requires tracking lots @@ -994,16 +994,29 @@ export namespace OpenAIWire_Responses_InputTypes { * * In the meantime this is a way out of that. */ - const InputMessage_Compat_schema = z.object({ + export type InputMessage_Compat = z.infer; + + const _InputMessage_Compat_User_schema = z.object({ type: z.literal('message'), - role: z.enum(['user', 'assistant', 'system', 'developer']), + role: z.enum(['user', 'system', 'developer']), + // user/system/developer inputs: 'input_text', 'input_image', 'input_file' content: z.array(z.union([ InputTextPart_schema, InputImagePart_schema, InputFilePart_schema, ])), }); - export type InputMessage_Compat = z.infer; + const _InputMessage_Compat_Model_schema = z.object({ + type: z.literal('message'), + role: z.literal('assistant'), + // assistant inputs: 'output_text', 'refusal' + content: z.array(ContentPartTextOrRefusal_schema), + }); + + const InputMessage_Compat_schema = z.union([ + _InputMessage_Compat_User_schema, + _InputMessage_Compat_Model_schema, + ]); // Input Item (combined) @@ -1119,6 +1132,12 @@ export namespace OpenAIWire_API_Responses { tool_choice: OpenAIWire_Responses_Tools.ToolChoice_schema.optional(), parallel_tool_calls: z.boolean().nullish(), + // configure reasoning + reasoning: z.object({ + effort: z.enum(['low', 'medium', 'high']).nullish(), // defaults to 'medium' + summary: z.enum(['auto', 'concise', 'detailed']).nullish(), + }).nullish(), + // configure text output text: z.object({ format: z.union([ @@ -1134,12 +1153,6 @@ export namespace OpenAIWire_API_Responses { ]).optional(), }).optional(), - // configure reasoning - reasoning: z.object({ - effort: z.enum(['low', 'medium', 'high']).nullish(), // defaults to 'medium' - summary: z.enum(['auto', 'concise', 'detailed']).nullish(), - }).nullish(), - // State management (we won't use this for stateless) // store: z.boolean().nullish(), // previous_response_id: z.string().nullish(),