gpt-image fix maybe

2025-04-26 10:29:01 +02:00
parent a16d66a45b
commit afe6ad8ac9
3 changed files with 74 additions and 9 deletions
@@ -0,0 +1,33 @@
+You are a Senior Full Stack Developer and an Expert in ReactJS, NextJS, JavaScript, TypeScript, HTML, CSS and modern UI/UX frameworks (e.g., TailwindCSS, Shadcn, Radix). You are thoughtful, give nuanced answers, and are brilliant at reasoning. You carefully provide accurate, factual, thoughtful answers, and are a genius at reasoning.
+
+- Follow the user’s requirements carefully & to the letter.
+- First think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.
+- Confirm, then write code!
+- Always write correct, best practice, DRY principle (Dont Repeat Yourself), bug free, fully functional and working code also it should be aligned to listed rules down below at Code Implementation Guidelines .
+- Focus on easy and readability code, over being performant.
+- Fully implement all requested functionality.
+- Leave NO todo’s, placeholders or missing pieces.
+- Ensure code is complete! Verify thoroughly finalised.
+- Include all required imports, and ensure proper naming of key components.
+- Be concise Minimize any other prose.
+- If you think there might not be a correct answer, you say so.
+- If you do not know the answer, say so, instead of guessing.
+
+### Coding Environment
+The user asks questions about the following coding languages:
+- ReactJS
+- NextJS
+- JavaScript
+- TypeScript
+- TailwindCSS
+- HTML
+- CSS
+
+### Code Implementation Guidelines
+Follow these rules when you write code:
+- Use early returns whenever possible to make the code more readable.
+- Always use Tailwind classes for styling HTML elements; avoid using CSS or tags.
+- Use “class:” instead of the tertiary operator in class tags whenever possible.
+- Use descriptive variable and function/const names. Also, event functions should be named with a “handle” prefix, like “handleClick” for onClick and “handleKeyDown” for onKeyDown.
+- Implement accessibility features on elements. For example, a tag should have a tabindex=“0”, aria-label, on:click, and on:keydown, and similar attributes.
+- Use consts instead of functions, for example, “const toggle = () =>”. Also, define a type if possible.
@@ -119,11 +119,14 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
  // character name or wrapping the entire thing in quotes. We will look for
  // the index of "Image:" and use everything after that as the prompt.

-  // For multimodal requests (image editing with gpt-image-1), we don't require the "Image:" prefix
+  // Determine if this is a multimodal request (with images)  
  const isMultimodalRequest = Array.isArray(content) && req.multimodalContent?.images && req.multimodalContent.images.length > 0;
  
-  // Only enforce the "Image:" prefix for non-multimodal requests
-  if (!isMultimodalRequest && typeof content === 'string') {
+  // Check if this is a request for gpt-image-1
+  const isGptImageRequest = body.model?.includes("gpt-image") || false;
+  
+  // Only enforce the "Image:" prefix for non-multimodal, non-gpt-image-1 requests
+  if (!isMultimodalRequest && !isGptImageRequest && typeof content === 'string') {
    const textIndex = content.toLowerCase().indexOf("image:");
    if (textIndex === -1) {
      throw new Error(
@@ -131,20 +134,45 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
      );
    }
  }
-
+  
  // TODO: Add some way to specify parameters via chat message
  // Determine which model to use (gpt-image-1 or dall-e-3)
  const isGptImage = body.model?.includes("gpt-image") || false;
  
+  // For gpt-image-1, add the 'Image:' prefix if it's missing but only for string content
+  let modifiedStringContent = typeof content === 'string' ? content : '';
+  if (isGptImageRequest && typeof content === 'string' && !content.toLowerCase().includes("image:")) {
+    req.log.info("Adding 'Image:' prefix to gpt-image-1 prompt");
+    modifiedStringContent = `Image: ${content}`;
+    // Store this in the request object for later use
+    req.multimodalContent = req.multimodalContent || {};
+    req.multimodalContent.prompt = modifiedStringContent;
+  }
+
  // Get the correct text prompt either from multimodal content or plain string content
  let textPrompt: string | undefined;
  let index = -1;
  
  if (Array.isArray(content)) {
+    // For array content, use the prompt from multimodal content if available
    textPrompt = req.multimodalContent?.prompt;
  } else if (typeof content === 'string') {
-    index = content.toLowerCase().indexOf("image:");
-    textPrompt = index !== -1 ? content.slice(index + 6).trim() : content;
+    // For string content, use the modified content which might have the Image: prefix for gpt-image-1
+    const contentToProcess = isGptImageRequest ? modifiedStringContent : content;
+    
+    // Find the "Image:" prefix in the content
+    index = contentToProcess.toLowerCase().indexOf("image:");
+    
+    // For gpt-image-1, we might have just added the prefix, so we need to handle both cases
+    if (index !== -1) {
+      textPrompt = contentToProcess.slice(index + 6).trim();
+    } else if (isGptImageRequest) {
+      // For gpt-image-1, use the whole content if no prefix is found
+      textPrompt = content; // Use the original content without prefix
+    } else {
+      // For other models, default to the content as-is
+      textPrompt = contentToProcess;
+    }
  }
  
  // Validate that we have a text prompt
@@ -179,9 +179,9 @@ export const DALLE_TOKENS_PER_DOLLAR = 100000;
 * which we convert to tokens at a rate of 100000 tokens per dollar.
 */
 export function getOpenAIImageCost(params: {
-  model: "dall-e-2" | "dall-e-3";
-  quality: "standard" | "hd";
-  resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024";
+  model: "dall-e-2" | "dall-e-3" | "gpt-image-1";
+  quality: "standard" | "hd" | "high" | "medium" | "low" | "auto";
+  resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024" | "1536x1024" | "1024x1536" | "auto";
  n: number | null;
 }) {
  const { model, quality, resolution, n } = params;
@@ -208,6 +208,10 @@ export function getOpenAIImageCost(params: {
          default:
            throw new Error("Invalid resolution");
        }
+      case "gpt-image-1":
+        // gpt-image-1 pricing is approximately $0.04 per image
+        // This is a simplified pricing model, adjust as needed based on official pricing
+        return 0.04;
      default:
        throw new Error("Invalid image generation model");
    }