gpt-image fix maybe

This commit is contained in:
reanon
2025-04-26 10:29:01 +02:00
parent a16d66a45b
commit afe6ad8ac9
3 changed files with 74 additions and 9 deletions
+33
View File
@@ -0,0 +1,33 @@
You are a Senior Full Stack Developer and an Expert in ReactJS, NextJS, JavaScript, TypeScript, HTML, CSS and modern UI/UX frameworks (e.g., TailwindCSS, Shadcn, Radix). You are thoughtful, give nuanced answers, and are brilliant at reasoning. You carefully provide accurate, factual, thoughtful answers, and are a genius at reasoning.
- Follow the users requirements carefully & to the letter.
- First think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.
- Confirm, then write code!
- Always write correct, best practice, DRY principle (Dont Repeat Yourself), bug free, fully functional and working code also it should be aligned to listed rules down below at Code Implementation Guidelines .
- Focus on easy and readability code, over being performant.
- Fully implement all requested functionality.
- Leave NO todos, placeholders or missing pieces.
- Ensure code is complete! Verify thoroughly finalised.
- Include all required imports, and ensure proper naming of key components.
- Be concise Minimize any other prose.
- If you think there might not be a correct answer, you say so.
- If you do not know the answer, say so, instead of guessing.
### Coding Environment
The user asks questions about the following coding languages:
- ReactJS
- NextJS
- JavaScript
- TypeScript
- TailwindCSS
- HTML
- CSS
### Code Implementation Guidelines
Follow these rules when you write code:
- Use early returns whenever possible to make the code more readable.
- Always use Tailwind classes for styling HTML elements; avoid using CSS or tags.
- Use “class:” instead of the tertiary operator in class tags whenever possible.
- Use descriptive variable and function/const names. Also, event functions should be named with a “handle” prefix, like “handleClick” for onClick and “handleKeyDown” for onKeyDown.
- Implement accessibility features on elements. For example, a tag should have a tabindex=“0”, aria-label, on:click, and on:keydown, and similar attributes.
- Use consts instead of functions, for example, “const toggle = () =>”. Also, define a type if possible.
+34 -6
View File
@@ -119,11 +119,14 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
// character name or wrapping the entire thing in quotes. We will look for
// the index of "Image:" and use everything after that as the prompt.
// For multimodal requests (image editing with gpt-image-1), we don't require the "Image:" prefix
// Determine if this is a multimodal request (with images)
const isMultimodalRequest = Array.isArray(content) && req.multimodalContent?.images && req.multimodalContent.images.length > 0;
// Only enforce the "Image:" prefix for non-multimodal requests
if (!isMultimodalRequest && typeof content === 'string') {
// Check if this is a request for gpt-image-1
const isGptImageRequest = body.model?.includes("gpt-image") || false;
// Only enforce the "Image:" prefix for non-multimodal, non-gpt-image-1 requests
if (!isMultimodalRequest && !isGptImageRequest && typeof content === 'string') {
const textIndex = content.toLowerCase().indexOf("image:");
if (textIndex === -1) {
throw new Error(
@@ -131,20 +134,45 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
);
}
}
// TODO: Add some way to specify parameters via chat message
// Determine which model to use (gpt-image-1 or dall-e-3)
const isGptImage = body.model?.includes("gpt-image") || false;
// For gpt-image-1, add the 'Image:' prefix if it's missing but only for string content
let modifiedStringContent = typeof content === 'string' ? content : '';
if (isGptImageRequest && typeof content === 'string' && !content.toLowerCase().includes("image:")) {
req.log.info("Adding 'Image:' prefix to gpt-image-1 prompt");
modifiedStringContent = `Image: ${content}`;
// Store this in the request object for later use
req.multimodalContent = req.multimodalContent || {};
req.multimodalContent.prompt = modifiedStringContent;
}
// Get the correct text prompt either from multimodal content or plain string content
let textPrompt: string | undefined;
let index = -1;
if (Array.isArray(content)) {
// For array content, use the prompt from multimodal content if available
textPrompt = req.multimodalContent?.prompt;
} else if (typeof content === 'string') {
index = content.toLowerCase().indexOf("image:");
textPrompt = index !== -1 ? content.slice(index + 6).trim() : content;
// For string content, use the modified content which might have the Image: prefix for gpt-image-1
const contentToProcess = isGptImageRequest ? modifiedStringContent : content;
// Find the "Image:" prefix in the content
index = contentToProcess.toLowerCase().indexOf("image:");
// For gpt-image-1, we might have just added the prefix, so we need to handle both cases
if (index !== -1) {
textPrompt = contentToProcess.slice(index + 6).trim();
} else if (isGptImageRequest) {
// For gpt-image-1, use the whole content if no prefix is found
textPrompt = content; // Use the original content without prefix
} else {
// For other models, default to the content as-is
textPrompt = contentToProcess;
}
}
// Validate that we have a text prompt
+7 -3
View File
@@ -179,9 +179,9 @@ export const DALLE_TOKENS_PER_DOLLAR = 100000;
* which we convert to tokens at a rate of 100000 tokens per dollar.
*/
export function getOpenAIImageCost(params: {
model: "dall-e-2" | "dall-e-3";
quality: "standard" | "hd";
resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024";
model: "dall-e-2" | "dall-e-3" | "gpt-image-1";
quality: "standard" | "hd" | "high" | "medium" | "low" | "auto";
resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024" | "1536x1024" | "1024x1536" | "auto";
n: number | null;
}) {
const { model, quality, resolution, n } = params;
@@ -208,6 +208,10 @@ export function getOpenAIImageCost(params: {
default:
throw new Error("Invalid resolution");
}
case "gpt-image-1":
// gpt-image-1 pricing is approximately $0.04 per image
// This is a simplified pricing model, adjust as needed based on official pricing
return 0.04;
default:
throw new Error("Invalid image generation model");
}