diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts index b082a9b69..28ae631ac 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.responsesCreate.ts @@ -146,6 +146,31 @@ export function aixToOpenAIResponses(openAIDialect: OpenAIDialects, model: AixAP } } + // Tool: Image Generation: for testing (enable for all compatible models) + const requestImageGenerationTool = false // TODO: make this configurable + if (requestImageGenerationTool) { + if (isDialectAzure) { + // Azure OpenAI may not support image generation tool yet + console.log('[DEV] Azure OpenAI Responses: skipping image generation tool due to Azure limitations'); + } else { + // Add the image generation tool to the request + if (!payload.tools?.length) + payload.tools = []; + const imageGenerationTool: TRequestTool = { + type: 'image_generation', + // Use defaults for all optional parameters + // size: 'auto', + // quality: 'auto', + // partial_images: 3, // Enable partial image streaming for better UX + // input_fidelity: 'high', + moderation: 'low', + output_format: 'webp', + // background: 'auto', + }; + payload.tools.push(imageGenerationTool); + } + } + // [OpenAI] Vendor-specific restore markdown, for GPT-5 models and recent 'o' models if (model.vndOaiRestoreMarkdown) vndOaiRestoreMarkdown(payload); diff --git a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.responses.parser.ts b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.responses.parser.ts index 6f6c56fa4..a85d2a66c 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.responses.parser.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.responses.parser.ts @@ -355,11 +355,26 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction { } break; + case 'image_generation_call': + // -> IGC: process completed image generation using 'ii' particle for inline images + const { result: igResult, revised_prompt: igRevisedPrompt } = doneItem; + // Create inline image with base64 data + if (igResult) + pt.appendImageInline( + 'image/png', // default mime type + igResult, + igRevisedPrompt || 'Generated image', + 'gpt-image-1', // generator + igRevisedPrompt || '' // prompt used + ); + else + console.warn('[DEV] AIX: OpenAI Responses: image_generation_call done without result:', doneItem); + break; + default: const _exhaustiveCheck: never = doneItemType; // noinspection FallThroughInSwitchStatementJS // case 'custom_tool_call': - // case 'image_generation_call': // case 'code_interpreter_call': // case 'file_search_call': // OpenAI vector store - not implemented // case 'mcp_call': @@ -506,6 +521,32 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction { // -> Actual web_search_call results are handled in response.output_item.done break; + // Image Generation Call Events + // Flow: in_progress -> generating -> [partial_image]* -> completed + // NOTE: We use placeholder signals for progress, final image handled in output_item.done + + case 'response.image_generation_call.in_progress': + R.outputItemVisit(eventType, event.output_index, 'image_generation_call'); + pt.sendVoidPlaceholder('image_generation', 'Starting image generation...'); + break; + + case 'response.image_generation_call.generating': + R.outputItemVisit(eventType, event.output_index, 'image_generation_call'); + pt.sendVoidPlaceholder('image_generation', 'Generating image...'); + break; + + case 'response.image_generation_call.partial_image': + R.outputItemVisit(eventType, event.output_index, 'image_generation_call'); + // SKIP partial images to avoid duplicates - only use final result + // The final image will be handled in response.output_item.done + break; + + case 'response.image_generation_call.completed': + R.outputItemVisit(eventType, event.output_index, 'image_generation_call'); + pt.sendVoidPlaceholder('image_generation', 'Image generation completed'); + // -> Final image result is handled in response.output_item.done + break; + // 1.5 - Error @@ -737,6 +778,23 @@ export function createOpenAIResponseParserNS(): ChatGenerateParseFunction { console.warn('[DEV] notImplemented: OpenAI Responses: web_search_call', { oItem }); break; + case 'image_generation_call': + // -> IGC: process completed image generation using 'ii' particle for inline images + const { result: igResult, revised_prompt: igRevisedPrompt } = oItem; + // Create inline image with base64 data + if (igResult) + pt.appendImageInline( + 'image/png', // default mime type + igResult, + igRevisedPrompt || 'Generated image', + 'gpt-image-1', // generator + igRevisedPrompt || '' // prompt used + ); + else + console.warn('[DEV] AIX: OpenAI Responses: image_generation_call done without result:', oItem); + pt.endMessagePart(); + break; + default: const _exhaustiveCheck: never = oItemType; console.log(`[DEV] Final Response output item type: ${oItemType} (TODO: implement)`); diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index 253dea723..df67a11c8 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -1007,12 +1007,12 @@ export namespace OpenAIWire_Responses_Items { ]).optional(), }); - // const OutputImageGenerationCallItem_schema = _OutputItemBase_schema.extend({ - // type: z.literal('image_generation_call'), - // id: z.string(), // unique ID of the image generation call (output item ID) - // result: z.string().optional(), // base64 image data when completed - // prompt: z.string().optional(), // the prompt used for generation - // }); + const OutputImageGenerationCallItem_schema = _OutputItemBase_schema.extend({ + type: z.literal('image_generation_call'), + id: z.string(), // unique ID of the image generation call (output item ID) + result: z.string().optional(), // base64 image data when completed + revised_prompt: z.string().optional(), // the revised prompt used for generation + }); // const OutputCodeInterpreterCallItem_schema = _OutputItemBase_schema.extend({ // type: z.literal('code_interpreter_call'), @@ -1053,7 +1053,7 @@ export namespace OpenAIWire_Responses_Items { OutputFunctionCallItem_schema, // OutputCustomToolCallItem_schema, // plain text custom tool output OutputWebSearchCallItem_schema, - // OutputImageGenerationCallItem_schema, + OutputImageGenerationCallItem_schema, // OutputCodeInterpreterCallItem_schema, // OutputFileSearchCallItem_schema, // OutputMCPCallItem_schema, @@ -1198,22 +1198,32 @@ export namespace OpenAIWire_Responses_Tools { }).optional(), }); - // const ImageGenerationTool_schema = z.object({ - // type: z.literal('image_generation'), - // background: z.enum(['transparent', 'opaque', 'auto']).optional(), // defaults to 'auto' - // input_fidelity: z.enum(['high', 'low']).optional(), // defaults to 'low' - // input_image_mask: z.object({ - // image_url: z.string().optional(), - // file_id: z.string().optional(), - // }).optional(), - // model: z.string().optional(), // defaults to 'gpt-image-1' - // moderation: z.enum(['low', 'auto']).optional(), // defaults to 'auto' - // output_compression: z.number().int().min(0).max(100).optional(), // defaults to 100 - // output_format: z.enum(['png', 'webp', 'jpeg']).optional(), // defaults to 'png' - // partial_images: z.number().int().min(0).max(3).optional(), // defaults to 0 - // quality: z.enum(['low', 'medium', 'high', 'auto']).optional(), // defaults to 'auto' - // size: z.enum(['1024x1024', '1024x1536', '1536x1024', 'auto']).optional(), // defaults to 'auto' - // }); + const ImageGenerationTool_schema = z.object({ + type: z.literal('image_generation'), + background: z.enum(['transparent', 'opaque', 'auto']).optional(), // defaults to 'auto' + /** + * Control how much effort the model will exert to match the style and features, especially facial features, of input images. + * Defaults to 'low'. + */ + input_fidelity: z.enum(['high', 'low']).optional(), + input_image_mask: z.object({ + file_id: z.string().optional(), // File ID for the mask image + image_url: z.string().optional(), // Base64-encoded mask image + }).optional(), + /** 'gpt-image-1' (default) */ + model: z.string().optional(), + /** Note: 'low' is unconfirmed here. Defaults to 'auto' */ + moderation: z.enum(['low', 'auto']).optional(), + output_compression: z.number().min(0).max(100).int().optional(), // defaults to 100 + /** One of [png, webp, or jpeg]. Default: png. */ + output_format: z.enum(['png', 'webp', 'jpeg']).optional(), + /** Number of partial images to generate in streaming mode, from 0 (default) to 3. */ + partial_images: z.number().int().min(0).max(3).optional(), + /** Quality of the generated image. Defaults to 'auto' */ + quality: z.enum(['low', 'medium', 'high', 'auto']).optional(), + /** The size of the generated image. One of 1024x1024, 1024x1536, 1536x1024, or auto. Default: auto. */ + size: z.enum(['1024x1024', '1024x1536', '1536x1024', 'auto']).optional(), + }); // const CodeInterpreterTool_schema = z.object({ // type: z.literal('code_interpreter'), @@ -1239,7 +1249,7 @@ export namespace OpenAIWire_Responses_Tools { // CustomTool_schema, // hosted tools WebSearchTool_schema, - // ImageGenerationTool_schema, + ImageGenerationTool_schema, // CodeInterpreterTool_schema, // FileSearchTool_schema, // OpenAI vector store - not implemented // MCPTool_schema, @@ -1263,10 +1273,10 @@ export namespace OpenAIWire_Responses_Tools { type: z.enum([ // 'file_search', 'web_search_preview', 'web_search_preview_2025_03_11', + 'image_generation', // 'computer_use_preview', // 'code_interpreter', // 'mcp', - // 'image_generation', // 'local_shell' ? ]), }), @@ -1574,23 +1584,23 @@ export namespace OpenAIWire_API_Responses { // Streaming > Tool invoke > Image generation events - // const OutputImageGenerationCallInProgressEvent_schema = _OutputIndexedEvent_schema.extend({ - // type: z.literal('response.image_generation_call.in_progress'), - // }); + const OutputImageGenerationCallInProgressEvent_schema = _OutputIndexedEvent_schema.extend({ + type: z.literal('response.image_generation_call.in_progress'), + }); - // const OutputImageGenerationCallGeneratingEvent_schema = _OutputIndexedEvent_schema.extend({ - // type: z.literal('response.image_generation_call.generating'), - // }); + const OutputImageGenerationCallGeneratingEvent_schema = _OutputIndexedEvent_schema.extend({ + type: z.literal('response.image_generation_call.generating'), + }); - // const OutputImageGenerationCallPartialImageEvent_schema = _OutputIndexedEvent_schema.extend({ - // type: z.literal('response.image_generation_call.partial_image'), - // partial_image_b64: z.string(), // base64 partial image - // partial_image_index: z.number(), // 0-based index - // }); + const OutputImageGenerationCallPartialImageEvent_schema = _OutputIndexedEvent_schema.extend({ + type: z.literal('response.image_generation_call.partial_image'), + partial_image_b64: z.string(), // base64 partial image + partial_image_index: z.number(), // 0-based index + }); - // const OutputImageGenerationCallCompletedEvent_schema = _OutputIndexedEvent_schema.extend({ - // type: z.literal('response.image_generation_call.completed'), - // }); + const OutputImageGenerationCallCompletedEvent_schema = _OutputIndexedEvent_schema.extend({ + type: z.literal('response.image_generation_call.completed'), + }); // Streaming > Tool invoke > File search events (OpenAI vector store - not implemented) @@ -1737,10 +1747,10 @@ export namespace OpenAIWire_API_Responses { OutputWebSearchCallCompleted_schema, // Tool invoke > Image generation events - // OutputImageGenerationCallInProgressEvent_schema, - // OutputImageGenerationCallGeneratingEvent_schema, - // OutputImageGenerationCallPartialImageEvent_schema, - // OutputImageGenerationCallCompletedEvent_schema, + OutputImageGenerationCallInProgressEvent_schema, + OutputImageGenerationCallGeneratingEvent_schema, + OutputImageGenerationCallPartialImageEvent_schema, + OutputImageGenerationCallCompletedEvent_schema, // Tool invoke > File Search events // OutputFileSearchCallInProgressEvent_schema, // OpenAI vector store - not implemented