AIX: OpenAI Responses: built-in image_generation_call config and output

2026-05-10 21:50:14 -07:00 · 2025-09-15 22:24:13 -07:00
parent 2f2e4e36be
commit 0636ca76ea
3 changed files with 137 additions and 44 deletions
@@ -146,6 +146,31 @@ export function aixToOpenAIResponses(openAIDialect: OpenAIDialects, model: AixAP
    }
  }

+  // Tool: Image Generation: for testing (enable for all compatible models)
+  const requestImageGenerationTool = false // TODO: make this configurable
+  if (requestImageGenerationTool) {
+    if (isDialectAzure) {
+      // Azure OpenAI may not support image generation tool yet
+      console.log('[DEV] Azure OpenAI Responses: skipping image generation tool due to Azure limitations');
+    } else {
+      // Add the image generation tool to the request
+      if (!payload.tools?.length)
+        payload.tools = [];
+      const imageGenerationTool: TRequestTool = {
+        type: 'image_generation',
+        // Use defaults for all optional parameters
+        // size: 'auto',
+        // quality: 'auto',
+        // partial_images: 3, // Enable partial image streaming for better UX
+        // input_fidelity: 'high',
+        moderation: 'low',
+        output_format: 'webp',
+        // background: 'auto',
+      };
+      payload.tools.push(imageGenerationTool);
+    }
+  }
+
  // [OpenAI] Vendor-specific restore markdown, for GPT-5 models and recent 'o' models
  if (model.vndOaiRestoreMarkdown)
    vndOaiRestoreMarkdown(payload);
@@ -355,11 +355,26 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction {
            }
            break;

+          case 'image_generation_call':
+            // -> IGC: process completed image generation using 'ii' particle for inline images
+            const { result: igResult, revised_prompt: igRevisedPrompt } = doneItem;
+            // Create inline image with base64 data
+            if (igResult)
+              pt.appendImageInline(
+                'image/png', // default mime type
+                igResult,
+                igRevisedPrompt || 'Generated image',
+                'gpt-image-1', // generator
+                igRevisedPrompt || '' // prompt used
+              );
+            else
+              console.warn('[DEV] AIX: OpenAI Responses: image_generation_call done without result:', doneItem);
+            break;
+
          default:
            const _exhaustiveCheck: never = doneItemType;
          // noinspection FallThroughInSwitchStatementJS
          // case 'custom_tool_call':
-          // case 'image_generation_call':
          // case 'code_interpreter_call':
          // case 'file_search_call': // OpenAI vector store - not implemented
          // case 'mcp_call':
@@ -506,6 +521,32 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction {
        // -> Actual web_search_call results are handled in response.output_item.done
        break;

+      // Image Generation Call Events
+      // Flow: in_progress -> generating -> [partial_image]* -> completed
+      // NOTE: We use placeholder signals for progress, final image handled in output_item.done
+
+      case 'response.image_generation_call.in_progress':
+        R.outputItemVisit(eventType, event.output_index, 'image_generation_call');
+        pt.sendVoidPlaceholder('image_generation', 'Starting image generation...');
+        break;
+
+      case 'response.image_generation_call.generating':
+        R.outputItemVisit(eventType, event.output_index, 'image_generation_call');
+        pt.sendVoidPlaceholder('image_generation', 'Generating image...');
+        break;
+
+      case 'response.image_generation_call.partial_image':
+        R.outputItemVisit(eventType, event.output_index, 'image_generation_call');
+        // SKIP partial images to avoid duplicates - only use final result
+        // The final image will be handled in response.output_item.done
+        break;
+
+      case 'response.image_generation_call.completed':
+        R.outputItemVisit(eventType, event.output_index, 'image_generation_call');
+        pt.sendVoidPlaceholder('image_generation', 'Image generation completed');
+        // -> Final image result is handled in response.output_item.done
+        break;
+

      // 1.5 - Error

@@ -737,6 +778,23 @@ export function createOpenAIResponseParserNS(): ChatGenerateParseFunction {
          console.warn('[DEV] notImplemented: OpenAI Responses: web_search_call', { oItem });
          break;

+        case 'image_generation_call':
+          // -> IGC: process completed image generation using 'ii' particle for inline images
+          const { result: igResult, revised_prompt: igRevisedPrompt } = oItem;
+          // Create inline image with base64 data
+          if (igResult)
+            pt.appendImageInline(
+              'image/png', // default mime type
+              igResult,
+              igRevisedPrompt || 'Generated image',
+              'gpt-image-1', // generator
+              igRevisedPrompt || '' // prompt used
+            );
+          else
+            console.warn('[DEV] AIX: OpenAI Responses: image_generation_call done without result:', oItem);
+          pt.endMessagePart();
+          break;
+
        default:
          const _exhaustiveCheck: never = oItemType;
          console.log(`[DEV] Final Response output item type: ${oItemType} (TODO: implement)`);
@@ -1007,12 +1007,12 @@ export namespace OpenAIWire_Responses_Items {
    ]).optional(),
  });

-  // const OutputImageGenerationCallItem_schema = _OutputItemBase_schema.extend({
-  //   type: z.literal('image_generation_call'),
-  //   id: z.string(), // unique ID of the image generation call (output item ID)
-  //   result: z.string().optional(), // base64 image data when completed
-  //   prompt: z.string().optional(), // the prompt used for generation
-  // });
+  const OutputImageGenerationCallItem_schema = _OutputItemBase_schema.extend({
+    type: z.literal('image_generation_call'),
+    id: z.string(), // unique ID of the image generation call (output item ID)
+    result: z.string().optional(), // base64 image data when completed
+    revised_prompt: z.string().optional(), // the revised prompt used for generation
+  });

  // const OutputCodeInterpreterCallItem_schema = _OutputItemBase_schema.extend({
  //   type: z.literal('code_interpreter_call'),
@@ -1053,7 +1053,7 @@ export namespace OpenAIWire_Responses_Items {
    OutputFunctionCallItem_schema,
    // OutputCustomToolCallItem_schema, // plain text custom tool output
    OutputWebSearchCallItem_schema,
-    // OutputImageGenerationCallItem_schema,
+    OutputImageGenerationCallItem_schema,
    // OutputCodeInterpreterCallItem_schema,
    // OutputFileSearchCallItem_schema,
    // OutputMCPCallItem_schema,
@@ -1198,22 +1198,32 @@ export namespace OpenAIWire_Responses_Tools {
    }).optional(),
  });

-  // const ImageGenerationTool_schema = z.object({
-  //   type: z.literal('image_generation'),
-  //   background: z.enum(['transparent', 'opaque', 'auto']).optional(), // defaults to 'auto'
-  //   input_fidelity: z.enum(['high', 'low']).optional(), // defaults to 'low'
-  //   input_image_mask: z.object({
-  //     image_url: z.string().optional(),
-  //     file_id: z.string().optional(),
-  //   }).optional(),
-  //   model: z.string().optional(), // defaults to 'gpt-image-1'
-  //   moderation: z.enum(['low', 'auto']).optional(), // defaults to 'auto'
-  //   output_compression: z.number().int().min(0).max(100).optional(), // defaults to 100
-  //   output_format: z.enum(['png', 'webp', 'jpeg']).optional(), // defaults to 'png'
-  //   partial_images: z.number().int().min(0).max(3).optional(), // defaults to 0
-  //   quality: z.enum(['low', 'medium', 'high', 'auto']).optional(), // defaults to 'auto'
-  //   size: z.enum(['1024x1024', '1024x1536', '1536x1024', 'auto']).optional(), // defaults to 'auto'
-  // });
+  const ImageGenerationTool_schema = z.object({
+    type: z.literal('image_generation'),
+    background: z.enum(['transparent', 'opaque', 'auto']).optional(), // defaults to 'auto'
+    /**
+     * Control how much effort the model will exert to match the style and features, especially facial features, of input images.
+     * Defaults to 'low'.
+     */
+    input_fidelity: z.enum(['high', 'low']).optional(),
+    input_image_mask: z.object({
+      file_id: z.string().optional(), // File ID for the mask image
+      image_url: z.string().optional(), // Base64-encoded mask image
+    }).optional(),
+    /** 'gpt-image-1' (default) */
+    model: z.string().optional(),
+    /** Note: 'low' is unconfirmed here. Defaults to 'auto' */
+    moderation: z.enum(['low', 'auto']).optional(),
+    output_compression: z.number().min(0).max(100).int().optional(), // defaults to 100
+    /** One of [png, webp, or jpeg]. Default: png. */
+    output_format: z.enum(['png', 'webp', 'jpeg']).optional(),
+    /** Number of partial images to generate in streaming mode, from 0 (default) to 3. */
+    partial_images: z.number().int().min(0).max(3).optional(),
+    /** Quality of the generated image. Defaults to 'auto' */
+    quality: z.enum(['low', 'medium', 'high', 'auto']).optional(),
+    /** The size of the generated image. One of 1024x1024, 1024x1536, 1536x1024, or auto. Default: auto. */
+    size: z.enum(['1024x1024', '1024x1536', '1536x1024', 'auto']).optional(),
+  });

  // const CodeInterpreterTool_schema = z.object({
  //   type: z.literal('code_interpreter'),
@@ -1239,7 +1249,7 @@ export namespace OpenAIWire_Responses_Tools {
    // CustomTool_schema,
    // hosted tools
    WebSearchTool_schema,
-    // ImageGenerationTool_schema,
+    ImageGenerationTool_schema,
    // CodeInterpreterTool_schema,
    // FileSearchTool_schema, // OpenAI vector store - not implemented
    // MCPTool_schema,
@@ -1263,10 +1273,10 @@ export namespace OpenAIWire_Responses_Tools {
      type: z.enum([
        // 'file_search',
        'web_search_preview', 'web_search_preview_2025_03_11',
+        'image_generation',
        // 'computer_use_preview',
        // 'code_interpreter',
        // 'mcp',
-        // 'image_generation',
        // 'local_shell' ?
      ]),
    }),
@@ -1574,23 +1584,23 @@ export namespace OpenAIWire_API_Responses {

  // Streaming > Tool invoke > Image generation events

-  // const OutputImageGenerationCallInProgressEvent_schema = _OutputIndexedEvent_schema.extend({
-  //   type: z.literal('response.image_generation_call.in_progress'),
-  // });
+  const OutputImageGenerationCallInProgressEvent_schema = _OutputIndexedEvent_schema.extend({
+    type: z.literal('response.image_generation_call.in_progress'),
+  });

-  // const OutputImageGenerationCallGeneratingEvent_schema = _OutputIndexedEvent_schema.extend({
-  //   type: z.literal('response.image_generation_call.generating'),
-  // });
+  const OutputImageGenerationCallGeneratingEvent_schema = _OutputIndexedEvent_schema.extend({
+    type: z.literal('response.image_generation_call.generating'),
+  });

-  // const OutputImageGenerationCallPartialImageEvent_schema = _OutputIndexedEvent_schema.extend({
-  //   type: z.literal('response.image_generation_call.partial_image'),
-  //   partial_image_b64: z.string(), // base64 partial image
-  //   partial_image_index: z.number(), // 0-based index
-  // });
+  const OutputImageGenerationCallPartialImageEvent_schema = _OutputIndexedEvent_schema.extend({
+    type: z.literal('response.image_generation_call.partial_image'),
+    partial_image_b64: z.string(), // base64 partial image
+    partial_image_index: z.number(), // 0-based index
+  });

-  // const OutputImageGenerationCallCompletedEvent_schema = _OutputIndexedEvent_schema.extend({
-  //   type: z.literal('response.image_generation_call.completed'),
-  // });
+  const OutputImageGenerationCallCompletedEvent_schema = _OutputIndexedEvent_schema.extend({
+    type: z.literal('response.image_generation_call.completed'),
+  });

  // Streaming > Tool invoke > File search events (OpenAI vector store - not implemented)

@@ -1737,10 +1747,10 @@ export namespace OpenAIWire_API_Responses {
    OutputWebSearchCallCompleted_schema,

    // Tool invoke > Image generation events
-    // OutputImageGenerationCallInProgressEvent_schema,
-    // OutputImageGenerationCallGeneratingEvent_schema,
-    // OutputImageGenerationCallPartialImageEvent_schema,
-    // OutputImageGenerationCallCompletedEvent_schema,
+    OutputImageGenerationCallInProgressEvent_schema,
+    OutputImageGenerationCallGeneratingEvent_schema,
+    OutputImageGenerationCallPartialImageEvent_schema,
+    OutputImageGenerationCallCompletedEvent_schema,

    // Tool invoke > File Search events
    // OutputFileSearchCallInProgressEvent_schema, // OpenAI vector store - not implemented