mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
AIX: OpenAI Responses: built-in image_generation_call config and output
This commit is contained in:
@@ -146,6 +146,31 @@ export function aixToOpenAIResponses(openAIDialect: OpenAIDialects, model: AixAP
|
||||
}
|
||||
}
|
||||
|
||||
// Tool: Image Generation: for testing (enable for all compatible models)
|
||||
const requestImageGenerationTool = false // TODO: make this configurable
|
||||
if (requestImageGenerationTool) {
|
||||
if (isDialectAzure) {
|
||||
// Azure OpenAI may not support image generation tool yet
|
||||
console.log('[DEV] Azure OpenAI Responses: skipping image generation tool due to Azure limitations');
|
||||
} else {
|
||||
// Add the image generation tool to the request
|
||||
if (!payload.tools?.length)
|
||||
payload.tools = [];
|
||||
const imageGenerationTool: TRequestTool = {
|
||||
type: 'image_generation',
|
||||
// Use defaults for all optional parameters
|
||||
// size: 'auto',
|
||||
// quality: 'auto',
|
||||
// partial_images: 3, // Enable partial image streaming for better UX
|
||||
// input_fidelity: 'high',
|
||||
moderation: 'low',
|
||||
output_format: 'webp',
|
||||
// background: 'auto',
|
||||
};
|
||||
payload.tools.push(imageGenerationTool);
|
||||
}
|
||||
}
|
||||
|
||||
// [OpenAI] Vendor-specific restore markdown, for GPT-5 models and recent 'o' models
|
||||
if (model.vndOaiRestoreMarkdown)
|
||||
vndOaiRestoreMarkdown(payload);
|
||||
|
||||
@@ -355,11 +355,26 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction {
|
||||
}
|
||||
break;
|
||||
|
||||
case 'image_generation_call':
|
||||
// -> IGC: process completed image generation using 'ii' particle for inline images
|
||||
const { result: igResult, revised_prompt: igRevisedPrompt } = doneItem;
|
||||
// Create inline image with base64 data
|
||||
if (igResult)
|
||||
pt.appendImageInline(
|
||||
'image/png', // default mime type
|
||||
igResult,
|
||||
igRevisedPrompt || 'Generated image',
|
||||
'gpt-image-1', // generator
|
||||
igRevisedPrompt || '' // prompt used
|
||||
);
|
||||
else
|
||||
console.warn('[DEV] AIX: OpenAI Responses: image_generation_call done without result:', doneItem);
|
||||
break;
|
||||
|
||||
default:
|
||||
const _exhaustiveCheck: never = doneItemType;
|
||||
// noinspection FallThroughInSwitchStatementJS
|
||||
// case 'custom_tool_call':
|
||||
// case 'image_generation_call':
|
||||
// case 'code_interpreter_call':
|
||||
// case 'file_search_call': // OpenAI vector store - not implemented
|
||||
// case 'mcp_call':
|
||||
@@ -506,6 +521,32 @@ export function createOpenAIResponsesEventParser(): ChatGenerateParseFunction {
|
||||
// -> Actual web_search_call results are handled in response.output_item.done
|
||||
break;
|
||||
|
||||
// Image Generation Call Events
|
||||
// Flow: in_progress -> generating -> [partial_image]* -> completed
|
||||
// NOTE: We use placeholder signals for progress, final image handled in output_item.done
|
||||
|
||||
case 'response.image_generation_call.in_progress':
|
||||
R.outputItemVisit(eventType, event.output_index, 'image_generation_call');
|
||||
pt.sendVoidPlaceholder('image_generation', 'Starting image generation...');
|
||||
break;
|
||||
|
||||
case 'response.image_generation_call.generating':
|
||||
R.outputItemVisit(eventType, event.output_index, 'image_generation_call');
|
||||
pt.sendVoidPlaceholder('image_generation', 'Generating image...');
|
||||
break;
|
||||
|
||||
case 'response.image_generation_call.partial_image':
|
||||
R.outputItemVisit(eventType, event.output_index, 'image_generation_call');
|
||||
// SKIP partial images to avoid duplicates - only use final result
|
||||
// The final image will be handled in response.output_item.done
|
||||
break;
|
||||
|
||||
case 'response.image_generation_call.completed':
|
||||
R.outputItemVisit(eventType, event.output_index, 'image_generation_call');
|
||||
pt.sendVoidPlaceholder('image_generation', 'Image generation completed');
|
||||
// -> Final image result is handled in response.output_item.done
|
||||
break;
|
||||
|
||||
|
||||
// 1.5 - Error
|
||||
|
||||
@@ -737,6 +778,23 @@ export function createOpenAIResponseParserNS(): ChatGenerateParseFunction {
|
||||
console.warn('[DEV] notImplemented: OpenAI Responses: web_search_call', { oItem });
|
||||
break;
|
||||
|
||||
case 'image_generation_call':
|
||||
// -> IGC: process completed image generation using 'ii' particle for inline images
|
||||
const { result: igResult, revised_prompt: igRevisedPrompt } = oItem;
|
||||
// Create inline image with base64 data
|
||||
if (igResult)
|
||||
pt.appendImageInline(
|
||||
'image/png', // default mime type
|
||||
igResult,
|
||||
igRevisedPrompt || 'Generated image',
|
||||
'gpt-image-1', // generator
|
||||
igRevisedPrompt || '' // prompt used
|
||||
);
|
||||
else
|
||||
console.warn('[DEV] AIX: OpenAI Responses: image_generation_call done without result:', oItem);
|
||||
pt.endMessagePart();
|
||||
break;
|
||||
|
||||
default:
|
||||
const _exhaustiveCheck: never = oItemType;
|
||||
console.log(`[DEV] Final Response output item type: ${oItemType} (TODO: implement)`);
|
||||
|
||||
@@ -1007,12 +1007,12 @@ export namespace OpenAIWire_Responses_Items {
|
||||
]).optional(),
|
||||
});
|
||||
|
||||
// const OutputImageGenerationCallItem_schema = _OutputItemBase_schema.extend({
|
||||
// type: z.literal('image_generation_call'),
|
||||
// id: z.string(), // unique ID of the image generation call (output item ID)
|
||||
// result: z.string().optional(), // base64 image data when completed
|
||||
// prompt: z.string().optional(), // the prompt used for generation
|
||||
// });
|
||||
const OutputImageGenerationCallItem_schema = _OutputItemBase_schema.extend({
|
||||
type: z.literal('image_generation_call'),
|
||||
id: z.string(), // unique ID of the image generation call (output item ID)
|
||||
result: z.string().optional(), // base64 image data when completed
|
||||
revised_prompt: z.string().optional(), // the revised prompt used for generation
|
||||
});
|
||||
|
||||
// const OutputCodeInterpreterCallItem_schema = _OutputItemBase_schema.extend({
|
||||
// type: z.literal('code_interpreter_call'),
|
||||
@@ -1053,7 +1053,7 @@ export namespace OpenAIWire_Responses_Items {
|
||||
OutputFunctionCallItem_schema,
|
||||
// OutputCustomToolCallItem_schema, // plain text custom tool output
|
||||
OutputWebSearchCallItem_schema,
|
||||
// OutputImageGenerationCallItem_schema,
|
||||
OutputImageGenerationCallItem_schema,
|
||||
// OutputCodeInterpreterCallItem_schema,
|
||||
// OutputFileSearchCallItem_schema,
|
||||
// OutputMCPCallItem_schema,
|
||||
@@ -1198,22 +1198,32 @@ export namespace OpenAIWire_Responses_Tools {
|
||||
}).optional(),
|
||||
});
|
||||
|
||||
// const ImageGenerationTool_schema = z.object({
|
||||
// type: z.literal('image_generation'),
|
||||
// background: z.enum(['transparent', 'opaque', 'auto']).optional(), // defaults to 'auto'
|
||||
// input_fidelity: z.enum(['high', 'low']).optional(), // defaults to 'low'
|
||||
// input_image_mask: z.object({
|
||||
// image_url: z.string().optional(),
|
||||
// file_id: z.string().optional(),
|
||||
// }).optional(),
|
||||
// model: z.string().optional(), // defaults to 'gpt-image-1'
|
||||
// moderation: z.enum(['low', 'auto']).optional(), // defaults to 'auto'
|
||||
// output_compression: z.number().int().min(0).max(100).optional(), // defaults to 100
|
||||
// output_format: z.enum(['png', 'webp', 'jpeg']).optional(), // defaults to 'png'
|
||||
// partial_images: z.number().int().min(0).max(3).optional(), // defaults to 0
|
||||
// quality: z.enum(['low', 'medium', 'high', 'auto']).optional(), // defaults to 'auto'
|
||||
// size: z.enum(['1024x1024', '1024x1536', '1536x1024', 'auto']).optional(), // defaults to 'auto'
|
||||
// });
|
||||
const ImageGenerationTool_schema = z.object({
|
||||
type: z.literal('image_generation'),
|
||||
background: z.enum(['transparent', 'opaque', 'auto']).optional(), // defaults to 'auto'
|
||||
/**
|
||||
* Control how much effort the model will exert to match the style and features, especially facial features, of input images.
|
||||
* Defaults to 'low'.
|
||||
*/
|
||||
input_fidelity: z.enum(['high', 'low']).optional(),
|
||||
input_image_mask: z.object({
|
||||
file_id: z.string().optional(), // File ID for the mask image
|
||||
image_url: z.string().optional(), // Base64-encoded mask image
|
||||
}).optional(),
|
||||
/** 'gpt-image-1' (default) */
|
||||
model: z.string().optional(),
|
||||
/** Note: 'low' is unconfirmed here. Defaults to 'auto' */
|
||||
moderation: z.enum(['low', 'auto']).optional(),
|
||||
output_compression: z.number().min(0).max(100).int().optional(), // defaults to 100
|
||||
/** One of [png, webp, or jpeg]. Default: png. */
|
||||
output_format: z.enum(['png', 'webp', 'jpeg']).optional(),
|
||||
/** Number of partial images to generate in streaming mode, from 0 (default) to 3. */
|
||||
partial_images: z.number().int().min(0).max(3).optional(),
|
||||
/** Quality of the generated image. Defaults to 'auto' */
|
||||
quality: z.enum(['low', 'medium', 'high', 'auto']).optional(),
|
||||
/** The size of the generated image. One of 1024x1024, 1024x1536, 1536x1024, or auto. Default: auto. */
|
||||
size: z.enum(['1024x1024', '1024x1536', '1536x1024', 'auto']).optional(),
|
||||
});
|
||||
|
||||
// const CodeInterpreterTool_schema = z.object({
|
||||
// type: z.literal('code_interpreter'),
|
||||
@@ -1239,7 +1249,7 @@ export namespace OpenAIWire_Responses_Tools {
|
||||
// CustomTool_schema,
|
||||
// hosted tools
|
||||
WebSearchTool_schema,
|
||||
// ImageGenerationTool_schema,
|
||||
ImageGenerationTool_schema,
|
||||
// CodeInterpreterTool_schema,
|
||||
// FileSearchTool_schema, // OpenAI vector store - not implemented
|
||||
// MCPTool_schema,
|
||||
@@ -1263,10 +1273,10 @@ export namespace OpenAIWire_Responses_Tools {
|
||||
type: z.enum([
|
||||
// 'file_search',
|
||||
'web_search_preview', 'web_search_preview_2025_03_11',
|
||||
'image_generation',
|
||||
// 'computer_use_preview',
|
||||
// 'code_interpreter',
|
||||
// 'mcp',
|
||||
// 'image_generation',
|
||||
// 'local_shell' ?
|
||||
]),
|
||||
}),
|
||||
@@ -1574,23 +1584,23 @@ export namespace OpenAIWire_API_Responses {
|
||||
|
||||
// Streaming > Tool invoke > Image generation events
|
||||
|
||||
// const OutputImageGenerationCallInProgressEvent_schema = _OutputIndexedEvent_schema.extend({
|
||||
// type: z.literal('response.image_generation_call.in_progress'),
|
||||
// });
|
||||
const OutputImageGenerationCallInProgressEvent_schema = _OutputIndexedEvent_schema.extend({
|
||||
type: z.literal('response.image_generation_call.in_progress'),
|
||||
});
|
||||
|
||||
// const OutputImageGenerationCallGeneratingEvent_schema = _OutputIndexedEvent_schema.extend({
|
||||
// type: z.literal('response.image_generation_call.generating'),
|
||||
// });
|
||||
const OutputImageGenerationCallGeneratingEvent_schema = _OutputIndexedEvent_schema.extend({
|
||||
type: z.literal('response.image_generation_call.generating'),
|
||||
});
|
||||
|
||||
// const OutputImageGenerationCallPartialImageEvent_schema = _OutputIndexedEvent_schema.extend({
|
||||
// type: z.literal('response.image_generation_call.partial_image'),
|
||||
// partial_image_b64: z.string(), // base64 partial image
|
||||
// partial_image_index: z.number(), // 0-based index
|
||||
// });
|
||||
const OutputImageGenerationCallPartialImageEvent_schema = _OutputIndexedEvent_schema.extend({
|
||||
type: z.literal('response.image_generation_call.partial_image'),
|
||||
partial_image_b64: z.string(), // base64 partial image
|
||||
partial_image_index: z.number(), // 0-based index
|
||||
});
|
||||
|
||||
// const OutputImageGenerationCallCompletedEvent_schema = _OutputIndexedEvent_schema.extend({
|
||||
// type: z.literal('response.image_generation_call.completed'),
|
||||
// });
|
||||
const OutputImageGenerationCallCompletedEvent_schema = _OutputIndexedEvent_schema.extend({
|
||||
type: z.literal('response.image_generation_call.completed'),
|
||||
});
|
||||
|
||||
// Streaming > Tool invoke > File search events (OpenAI vector store - not implemented)
|
||||
|
||||
@@ -1737,10 +1747,10 @@ export namespace OpenAIWire_API_Responses {
|
||||
OutputWebSearchCallCompleted_schema,
|
||||
|
||||
// Tool invoke > Image generation events
|
||||
// OutputImageGenerationCallInProgressEvent_schema,
|
||||
// OutputImageGenerationCallGeneratingEvent_schema,
|
||||
// OutputImageGenerationCallPartialImageEvent_schema,
|
||||
// OutputImageGenerationCallCompletedEvent_schema,
|
||||
OutputImageGenerationCallInProgressEvent_schema,
|
||||
OutputImageGenerationCallGeneratingEvent_schema,
|
||||
OutputImageGenerationCallPartialImageEvent_schema,
|
||||
OutputImageGenerationCallCompletedEvent_schema,
|
||||
|
||||
// Tool invoke > File Search events
|
||||
// OutputFileSearchCallInProgressEvent_schema, // OpenAI vector store - not implemented
|
||||
|
||||
Reference in New Issue
Block a user