prelim gpt-image (cant test, no access)
This commit is contained in:
+5
-3
@@ -30,6 +30,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
|
||||
"o3": "OpenAI o3",
|
||||
"o4-mini": "OpenAI o4 mini",
|
||||
"dall-e": "DALL-E",
|
||||
"gpt-image": "GPT Image",
|
||||
claude: "Claude (Sonnet)",
|
||||
"claude-opus": "Claude (Opus)",
|
||||
"gemini-flash": "Gemini Flash",
|
||||
@@ -63,6 +64,7 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
|
||||
"azure-o3": "Azure o3",
|
||||
"azure-o4-mini": "Azure o4 mini",
|
||||
"azure-dall-e": "Azure DALL-E",
|
||||
"azure-gpt-image": "Azure GPT Image",
|
||||
};
|
||||
|
||||
const converter = new showdown.Converter();
|
||||
@@ -213,15 +215,15 @@ function getServerTitle() {
|
||||
}
|
||||
|
||||
function buildRecentImageSection() {
|
||||
const dalleModels: ModelFamily[] = ["azure-dall-e", "dall-e"];
|
||||
const imageModels: ModelFamily[] = ["azure-dall-e", "dall-e", "gpt-image", "azure-gpt-image"];
|
||||
if (
|
||||
!config.showRecentImages ||
|
||||
dalleModels.every((f) => !config.allowedModelFamilies.includes(f))
|
||||
imageModels.every((f) => !config.allowedModelFamilies.includes(f))
|
||||
) {
|
||||
return "";
|
||||
}
|
||||
|
||||
let html = `<h2>Recent DALL-E Generations</h2>`;
|
||||
let html = `<h2>Recent Image Generations</h2>`;
|
||||
const recentImages = getLastNImages(12).reverse();
|
||||
if (recentImages.length === 0) {
|
||||
html += `<p>No images yet.</p>`;
|
||||
|
||||
@@ -11,7 +11,7 @@ import { ProxyResHandlerWithBody } from "./middleware/response";
|
||||
import { ProxyReqManager } from "./middleware/request/proxy-req-manager";
|
||||
import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory";
|
||||
|
||||
const KNOWN_MODELS = ["dall-e-2", "dall-e-3"];
|
||||
const KNOWN_MODELS = ["dall-e-2", "dall-e-3", "gpt-image-1"];
|
||||
|
||||
let modelListCache: any = null;
|
||||
let modelListValid = 0;
|
||||
@@ -58,27 +58,46 @@ function transformResponseForChat(
|
||||
req: Request
|
||||
): Record<string, any> {
|
||||
const prompt = imageBody.data[0].revised_prompt ?? req.body.prompt;
|
||||
const isGptImage = req.body.model?.includes("gpt-image") || false;
|
||||
|
||||
const content = imageBody.data
|
||||
.map((item) => {
|
||||
const { url, b64_json } = item;
|
||||
// The gpt-image-1 model always returns b64_json
|
||||
// Format will depend on output_format parameter (defaults to png)
|
||||
// For simplicity, we'll assume png if not specified
|
||||
const format = req.body.output_format || "png";
|
||||
|
||||
if (b64_json) {
|
||||
return ``;
|
||||
return ``;
|
||||
} else {
|
||||
return ``;
|
||||
}
|
||||
})
|
||||
.join("\n\n");
|
||||
|
||||
// Prepare the usage information - gpt-image-1 includes detailed token usage
|
||||
let usage = {
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: req.outputTokens,
|
||||
total_tokens: req.outputTokens,
|
||||
};
|
||||
|
||||
// If this is a gpt-image-1 response, it includes detailed usage info
|
||||
if (imageBody.usage) {
|
||||
usage = {
|
||||
prompt_tokens: imageBody.usage.input_tokens || 0,
|
||||
completion_tokens: imageBody.usage.output_tokens || 0,
|
||||
total_tokens: imageBody.usage.total_tokens || 0,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
id: "dalle-" + req.id,
|
||||
id: req.body.model?.includes("gpt-image") ? "gptimage-" + req.id : "dalle-" + req.id,
|
||||
object: "chat.completion",
|
||||
created: Date.now(),
|
||||
model: req.body.model,
|
||||
usage: {
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: req.outputTokens,
|
||||
total_tokens: req.outputTokens,
|
||||
},
|
||||
usage,
|
||||
choices: [
|
||||
{
|
||||
message: { role: "assistant", content },
|
||||
@@ -89,6 +108,56 @@ function transformResponseForChat(
|
||||
};
|
||||
}
|
||||
|
||||
// Filter parameters based on the model being used to avoid sending unsupported parameters
|
||||
function filterModelParameters(manager: ProxyReqManager) {
|
||||
const req = manager.request;
|
||||
const originalBody = req.body;
|
||||
const modelName = originalBody?.model || "";
|
||||
|
||||
// Skip if no body or it's not an object
|
||||
if (!originalBody || typeof originalBody !== 'object') return;
|
||||
|
||||
// Create a deep copy of the body to filter
|
||||
const filteredBody = { ...originalBody };
|
||||
|
||||
// Define allowed parameters for each model
|
||||
if (modelName.includes('dall-e-2')) {
|
||||
// DALL-E 2 parameters
|
||||
const allowedParams = [
|
||||
'model', 'prompt', 'n', 'size', 'response_format', 'user'
|
||||
];
|
||||
|
||||
// Remove any parameter not in the allowed list
|
||||
Object.keys(filteredBody).forEach(key => {
|
||||
if (!allowedParams.includes(key)) {
|
||||
delete filteredBody[key];
|
||||
}
|
||||
});
|
||||
|
||||
req.log.info({ model: 'dall-e-2', params: Object.keys(filteredBody) }, "Filtered parameters for DALL-E 2");
|
||||
} else if (modelName.includes('dall-e-3')) {
|
||||
// DALL-E 3 parameters
|
||||
const allowedParams = [
|
||||
'model', 'prompt', 'n', 'quality', 'size', 'style', 'response_format', 'user'
|
||||
];
|
||||
|
||||
// Remove any parameter not in the allowed list
|
||||
Object.keys(filteredBody).forEach(key => {
|
||||
if (!allowedParams.includes(key)) {
|
||||
delete filteredBody[key];
|
||||
}
|
||||
});
|
||||
|
||||
req.log.info({ model: 'dall-e-3', params: Object.keys(filteredBody) }, "Filtered parameters for DALL-E 3");
|
||||
} else if (modelName.includes('gpt-image')) {
|
||||
// For gpt-image-1, we can use all parameters
|
||||
req.log.info({ model: 'gpt-image-1', params: Object.keys(filteredBody) }, "Using all parameters for GPT Image");
|
||||
}
|
||||
|
||||
// Use the proper method to update the body
|
||||
manager.setBody(filteredBody);
|
||||
}
|
||||
|
||||
function replacePath(manager: ProxyReqManager) {
|
||||
const req = manager.request;
|
||||
const pathname = req.url.split("?")[0];
|
||||
@@ -100,7 +169,7 @@ function replacePath(manager: ProxyReqManager) {
|
||||
|
||||
const openaiImagesProxy = createQueuedProxyMiddleware({
|
||||
target: "https://api.openai.com",
|
||||
mutations: [replacePath, addKey, finalizeBody],
|
||||
mutations: [replacePath, filterModelParameters, addKey, finalizeBody],
|
||||
blockingResponseHandler: openaiImagesResponseHandler,
|
||||
});
|
||||
|
||||
@@ -116,6 +185,17 @@ openaiImagesRouter.post(
|
||||
}),
|
||||
openaiImagesProxy
|
||||
);
|
||||
// Add support for the /v1/images/edits endpoint (used by gpt-image-1 for image editing)
|
||||
openaiImagesRouter.post(
|
||||
"/v1/images/edits",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "openai-image",
|
||||
outApi: "openai-image",
|
||||
service: "openai",
|
||||
}),
|
||||
openaiImagesProxy
|
||||
);
|
||||
openaiImagesRouter.post(
|
||||
"/v1/chat/completions",
|
||||
ipLimiter,
|
||||
|
||||
@@ -1,20 +1,58 @@
|
||||
import { z } from "zod";
|
||||
import { Request } from "express";
|
||||
import { OpenAIV1ChatCompletionSchema } from "./openai";
|
||||
import { APIFormatTransformer } from "./index";
|
||||
|
||||
// Extend the Express Request type to include multimodal content
|
||||
declare global {
|
||||
namespace Express {
|
||||
interface Request {
|
||||
multimodalContent?: {
|
||||
prompt?: string;
|
||||
images?: string[];
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/images/create
|
||||
export const OpenAIV1ImagesGenerationSchema = z
|
||||
.object({
|
||||
prompt: z.string().max(4000),
|
||||
prompt: z.string().max(32000), // gpt-image-1 supports up to 32000 chars
|
||||
model: z.string().max(100).optional(),
|
||||
quality: z.enum(["standard", "hd"]).optional().default("standard"),
|
||||
n: z.number().int().min(1).max(4).optional().default(1),
|
||||
response_format: z.enum(["url", "b64_json"]).optional(),
|
||||
// Support for image inputs (multimodal capability of gpt-image-1)
|
||||
image: z.union([
|
||||
z.string(), // single image (base64 or URL)
|
||||
z.array(z.string()) // array of images
|
||||
]).optional(),
|
||||
mask: z.string().optional(), // mask image for editing
|
||||
// Different quality options based on model
|
||||
quality: z
|
||||
.union([
|
||||
z.enum(["standard", "hd"]), // dall-e-3 options
|
||||
z.enum(["high", "medium", "low"]), // gpt-image-1 options
|
||||
z.literal("auto") // default for gpt-image-1
|
||||
])
|
||||
.optional()
|
||||
.default("standard"),
|
||||
n: z.number().int().min(1).max(10).optional().default(1), // gpt-image-1 supports up to 10
|
||||
response_format: z.enum(["url", "b64_json"]).optional(), // Note: gpt-image-1 always returns b64_json
|
||||
// Enhanced size options for gpt-image-1
|
||||
size: z
|
||||
.enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"])
|
||||
.union([
|
||||
// dalle models
|
||||
z.enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]),
|
||||
// gpt-image-1 models (adds landscape, portrait, auto)
|
||||
z.enum(["1024x1024", "1536x1024", "1024x1536", "auto"])
|
||||
])
|
||||
.optional()
|
||||
.default("1024x1024"),
|
||||
style: z.enum(["vivid", "natural"]).optional().default("vivid"),
|
||||
style: z.enum(["vivid", "natural"]).optional().default("vivid"), // dall-e-3 only
|
||||
// New gpt-image-1 specific parameters
|
||||
background: z.enum(["transparent", "opaque", "auto"]).optional(), // gpt-image-1 only
|
||||
moderation: z.enum(["low", "auto"]).optional(), // gpt-image-1 only
|
||||
output_compression: z.number().int().min(0).max(100).optional(), // gpt-image-1 only
|
||||
output_format: z.enum(["png", "jpeg", "webp"]).optional(), // gpt-image-1 only
|
||||
user: z.string().max(500).optional(),
|
||||
})
|
||||
.strip();
|
||||
@@ -34,9 +72,41 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
|
||||
}
|
||||
|
||||
const { messages } = result.data;
|
||||
const prompt = messages.filter((m) => m.role === "user").pop()?.content;
|
||||
if (Array.isArray(prompt)) {
|
||||
throw new Error("Image generation prompt must be a text message.");
|
||||
const userMessage = messages.filter((m) => m.role === "user").pop();
|
||||
if (!userMessage) {
|
||||
throw new Error("No user message found in the request.");
|
||||
}
|
||||
|
||||
const content = userMessage.content;
|
||||
|
||||
// Handle array content (multimodal content with text and images)
|
||||
if (Array.isArray(content)) {
|
||||
const textParts: string[] = [];
|
||||
const imageParts: string[] = [];
|
||||
|
||||
// Process content parts, extracting text and images
|
||||
content.forEach(part => {
|
||||
if (typeof part === 'string') {
|
||||
textParts.push(part);
|
||||
} else if (part.type === 'image_url') {
|
||||
// Extract image URL or base64 data from the content
|
||||
const imageUrl = typeof part.image_url === 'string'
|
||||
? part.image_url
|
||||
: part.image_url.url;
|
||||
imageParts.push(imageUrl);
|
||||
}
|
||||
});
|
||||
|
||||
// Join all text parts to form the prompt
|
||||
const prompt = textParts.join('\n');
|
||||
|
||||
// For gpt-image-1, we'll pass both the text prompt and image(s)
|
||||
req.multimodalContent = {
|
||||
prompt,
|
||||
images: imageParts
|
||||
};
|
||||
} else if (typeof content !== 'string') {
|
||||
throw new Error("Image generation prompt must be a text message or multimodal content.");
|
||||
}
|
||||
|
||||
if (body.stream) {
|
||||
@@ -49,20 +119,172 @@ export const transformOpenAIToOpenAIImage: APIFormatTransformer<
|
||||
// character name or wrapping the entire thing in quotes. We will look for
|
||||
// the index of "Image:" and use everything after that as the prompt.
|
||||
|
||||
const index = prompt?.toLowerCase().indexOf("image:");
|
||||
if (index === -1 || !prompt) {
|
||||
throw new Error(
|
||||
`Start your prompt with 'Image:' followed by a description of the image you want to generate (received: ${prompt}).`
|
||||
);
|
||||
// For multimodal requests (image editing with gpt-image-1), we don't require the "Image:" prefix
|
||||
const isMultimodalRequest = Array.isArray(content) && req.multimodalContent?.images && req.multimodalContent.images.length > 0;
|
||||
|
||||
// Only enforce the "Image:" prefix for non-multimodal requests
|
||||
if (!isMultimodalRequest && typeof content === 'string') {
|
||||
const textIndex = content.toLowerCase().indexOf("image:");
|
||||
if (textIndex === -1) {
|
||||
throw new Error(
|
||||
`Start your prompt with 'Image:' followed by a description of the image you want to generate (received: ${content}).`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Add some way to specify parameters via chat message
|
||||
const transformed = {
|
||||
model: body.model.includes("dall-e") ? body.model : "dall-e-3",
|
||||
quality: "standard",
|
||||
size: "1024x1024",
|
||||
response_format: "url",
|
||||
prompt: prompt.slice(index! + 6).trim(),
|
||||
// Determine which model to use (gpt-image-1 or dall-e-3)
|
||||
const isGptImage = body.model?.includes("gpt-image") || false;
|
||||
|
||||
// Get the correct text prompt either from multimodal content or plain string content
|
||||
let textPrompt: string | undefined;
|
||||
let index = -1;
|
||||
|
||||
if (Array.isArray(content)) {
|
||||
textPrompt = req.multimodalContent?.prompt;
|
||||
} else if (typeof content === 'string') {
|
||||
index = content.toLowerCase().indexOf("image:");
|
||||
textPrompt = index !== -1 ? content.slice(index + 6).trim() : content;
|
||||
}
|
||||
|
||||
// Validate that we have a text prompt
|
||||
if (!textPrompt) {
|
||||
throw new Error("No text prompt found in the request.");
|
||||
}
|
||||
|
||||
// Determine the exact model being used
|
||||
let modelName = "dall-e-2"; // Default
|
||||
|
||||
if (isGptImage) {
|
||||
modelName = "gpt-image-1";
|
||||
} else if (body.model?.includes("dall-e-3")) {
|
||||
modelName = "dall-e-3";
|
||||
} else if (body.model?.includes("dall-e-2")) {
|
||||
modelName = "dall-e-2";
|
||||
} else {
|
||||
// If no specific model requested, default to dall-e-3
|
||||
modelName = "dall-e-3";
|
||||
}
|
||||
|
||||
// Start with basic parameters common to all models
|
||||
const transformed: any = {
|
||||
model: modelName,
|
||||
prompt: textPrompt,
|
||||
};
|
||||
|
||||
// Add model-specific parameters
|
||||
if (modelName === "gpt-image-1") {
|
||||
// GPT Image specific parameters
|
||||
transformed.quality = "auto"; // Default quality for gpt-image-1
|
||||
transformed.size = "1024x1024"; // Default size (square)
|
||||
transformed.moderation = "low"; // Always set moderation to low for gpt-image-1
|
||||
|
||||
// Optional GPT Image parameters
|
||||
if (body.background) transformed.background = body.background;
|
||||
if (body.output_format) transformed.output_format = body.output_format;
|
||||
if (body.output_compression) transformed.output_compression = body.output_compression;
|
||||
|
||||
// Handle specific quality settings for gpt-image-1
|
||||
if (body.quality && ["high", "medium", "low", "auto"].includes(body.quality)) {
|
||||
transformed.quality = body.quality;
|
||||
}
|
||||
|
||||
// Handle specific size settings for gpt-image-1
|
||||
if (body.size && ["1024x1024", "1536x1024", "1024x1536", "auto"].includes(body.size)) {
|
||||
transformed.size = body.size;
|
||||
}
|
||||
|
||||
// No response_format for gpt-image-1 as it always returns b64_json
|
||||
} else if (modelName === "dall-e-3") {
|
||||
// DALL-E 3 specific parameters
|
||||
transformed.size = "1024x1024"; // Default size
|
||||
transformed.response_format = "url"; // Default format
|
||||
transformed.quality = "standard"; // Default quality
|
||||
|
||||
// Handle DALL-E 3 style parameter
|
||||
if (body.style && ["vivid", "natural"].includes(body.style)) {
|
||||
transformed.style = body.style;
|
||||
} else {
|
||||
transformed.style = "vivid"; // Default style
|
||||
}
|
||||
|
||||
// Handle specific quality settings for dall-e-3
|
||||
if (body.quality && ["standard", "hd"].includes(body.quality)) {
|
||||
transformed.quality = body.quality;
|
||||
}
|
||||
|
||||
// Handle specific size settings for dall-e-3
|
||||
if (body.size && ["1024x1024", "1792x1024", "1024x1792"].includes(body.size)) {
|
||||
transformed.size = body.size;
|
||||
}
|
||||
} else {
|
||||
// DALL-E 2 specific parameters
|
||||
transformed.size = "1024x1024"; // Default size
|
||||
transformed.response_format = "url"; // Default format
|
||||
|
||||
// NO quality parameter for dall-e-2
|
||||
// Explicitly remove the quality parameter before sending
|
||||
delete transformed.quality;
|
||||
|
||||
// Handle specific size settings for dall-e-2
|
||||
if (body.size && ["256x256", "512x512", "1024x1024"].includes(body.size)) {
|
||||
transformed.size = body.size;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle common parameters
|
||||
if (body.n && !isNaN(parseInt(body.n))) {
|
||||
// For dall-e-3, only n=1 is supported
|
||||
if (modelName === "dall-e-3" && parseInt(body.n) > 1) {
|
||||
transformed.n = 1;
|
||||
} else {
|
||||
transformed.n = parseInt(body.n);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle response_format for non-gpt-image models
|
||||
if (!isGptImage && body.response_format && ["url", "b64_json"].includes(body.response_format)) {
|
||||
transformed.response_format = body.response_format;
|
||||
}
|
||||
|
||||
// If this is gpt-image-1 and we have image content, add it to the transformed request
|
||||
if (isGptImage && req.multimodalContent?.images && req.multimodalContent.images.length > 0) {
|
||||
// For the edit endpoint, we need to format the images properly
|
||||
transformed.image = req.multimodalContent.images.length === 1
|
||||
? req.multimodalContent.images[0]
|
||||
: req.multimodalContent.images;
|
||||
|
||||
// Any request with images for gpt-image-1 should use the edits endpoint
|
||||
req.log.info(`${req.multimodalContent.images.length} image(s) detected for gpt-image-1, using images/edits endpoint`);
|
||||
if (req.path.startsWith("/v1/chat/completions")) {
|
||||
req.url = req.url.replace("/v1/chat/completions", "/v1/images/edits");
|
||||
}
|
||||
}
|
||||
// For dall-e-2, we need to make sure we don't introduce unsupported parameters
|
||||
// due to default values in the schema. Let's bypass Zod schema validation here
|
||||
// for dall-e-2 and only include the supported parameters.
|
||||
if (modelName === "dall-e-2") {
|
||||
// Only include parameters that dall-e-2 supports
|
||||
const filteredTransformed: any = {};
|
||||
|
||||
// List of parameters supported by dall-e-2
|
||||
const supportedParams = [
|
||||
"model", "prompt", "n", "size", "response_format", "user"
|
||||
];
|
||||
|
||||
// Copy only supported parameters
|
||||
for (const param of supportedParams) {
|
||||
if (transformed[param] !== undefined) {
|
||||
filteredTransformed[param] = transformed[param];
|
||||
}
|
||||
}
|
||||
|
||||
// Log what we're sending
|
||||
req.log.info({ params: Object.keys(filteredTransformed) }, "Filtered parameters for dall-e-2");
|
||||
|
||||
return filteredTransformed;
|
||||
}
|
||||
|
||||
// For other models, use the schema as normal
|
||||
return OpenAIV1ImagesGenerationSchema.parse(transformed);
|
||||
};
|
||||
|
||||
@@ -13,9 +13,19 @@ export type OpenAIImageGenerationResult = {
|
||||
created: number;
|
||||
data: {
|
||||
revised_prompt?: string;
|
||||
url: string;
|
||||
b64_json: string;
|
||||
url?: string; // gpt-image-1 doesn't return URLs, only b64_json
|
||||
b64_json?: string;
|
||||
}[];
|
||||
// Added for gpt-image-1 responses
|
||||
usage?: {
|
||||
total_tokens: number;
|
||||
input_tokens: number;
|
||||
output_tokens: number;
|
||||
input_tokens_details?: {
|
||||
text_tokens: number;
|
||||
image_tokens: number;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
async function downloadImage(url: string) {
|
||||
@@ -65,11 +75,16 @@ export async function mirrorGeneratedImage(
|
||||
let mirror: string;
|
||||
if (item.b64_json) {
|
||||
mirror = await saveB64Image(item.b64_json);
|
||||
} else {
|
||||
} else if (item.url) {
|
||||
mirror = await downloadImage(item.url);
|
||||
} else {
|
||||
req.log.warn("No image data found in response");
|
||||
continue;
|
||||
}
|
||||
// Set the URL to our mirrored version
|
||||
item.url = `${host}/user_content/${path.basename(mirror)}`;
|
||||
await createThumbnail(mirror);
|
||||
// Add to image history with the local URL
|
||||
addToImageHistory({
|
||||
url: item.url,
|
||||
prompt,
|
||||
|
||||
@@ -84,6 +84,7 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
|
||||
"azure-o3Tokens": 0,
|
||||
"azure-o4-miniTokens": 0,
|
||||
"azure-dall-eTokens": 0,
|
||||
"azure-gpt-imageTokens": 0,
|
||||
modelIds: [],
|
||||
};
|
||||
this.keys.push(newKey);
|
||||
|
||||
@@ -124,6 +124,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
"o3Tokens": 0,
|
||||
"o4-miniTokens": 0,
|
||||
"dall-eTokens": 0,
|
||||
"gpt-imageTokens": 0,
|
||||
modelIds: [],
|
||||
};
|
||||
this.keys.push(newKey);
|
||||
|
||||
@@ -34,7 +34,8 @@ export type OpenAIModelFamily =
|
||||
| "o3-mini"
|
||||
| "o3"
|
||||
| "o4-mini"
|
||||
| "dall-e";
|
||||
| "dall-e"
|
||||
| "gpt-image";
|
||||
export type AnthropicModelFamily = "claude" | "claude-opus";
|
||||
export type GoogleAIModelFamily =
|
||||
| "gemini-flash"
|
||||
@@ -84,6 +85,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
|
||||
"o3",
|
||||
"o4-mini",
|
||||
"dall-e",
|
||||
"gpt-image",
|
||||
"claude",
|
||||
"claude-opus",
|
||||
"gemini-flash",
|
||||
@@ -117,6 +119,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
|
||||
"azure-o3-mini",
|
||||
"azure-o3",
|
||||
"azure-o4-mini",
|
||||
"azure-gpt-image",
|
||||
] as const);
|
||||
|
||||
export const LLM_SERVICES = (<A extends readonly LLMService[]>(
|
||||
@@ -154,6 +157,7 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
"o3": "openai",
|
||||
"o4-mini": "openai",
|
||||
"dall-e": "openai",
|
||||
"gpt-image": "openai",
|
||||
claude: "anthropic",
|
||||
"claude-opus": "anthropic",
|
||||
"aws-claude": "aws",
|
||||
@@ -180,6 +184,7 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
"azure-o3-mini": "azure",
|
||||
"azure-o3": "azure",
|
||||
"azure-o4-mini": "azure",
|
||||
"azure-gpt-image": "azure",
|
||||
"gemini-flash": "google-ai",
|
||||
"gemini-pro": "google-ai",
|
||||
"gemini-ultra": "google-ai",
|
||||
@@ -189,9 +194,10 @@ export const MODEL_FAMILY_SERVICE: {
|
||||
"mistral-large": "mistral-ai",
|
||||
};
|
||||
|
||||
export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e"];
|
||||
export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e", "gpt-image", "azure-gpt-image"];
|
||||
|
||||
export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
|
||||
"^gpt-image(-\\d+)?(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt-image",
|
||||
"^gpt-4\\.5(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt45",
|
||||
"^gpt-4\\.1(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41",
|
||||
"^gpt-4\\.1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41-mini",
|
||||
|
||||
@@ -83,6 +83,16 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
case "dall-e":
|
||||
cost = 0.00001;
|
||||
break;
|
||||
case "azure-gpt-image":
|
||||
case "gpt-image":
|
||||
// gpt-image-1 pricing:
|
||||
// Text input tokens: $5 per 1M tokens
|
||||
// Image input tokens: $10 per 1M tokens
|
||||
// Image output tokens: $40 per 1M tokens
|
||||
// Weighted average assuming a mix of text/image input and output
|
||||
// Typical cost is $0.02-$0.19 per image depending on quality
|
||||
cost = 0.000018; // Balanced estimate accounting for input/output mix
|
||||
break;
|
||||
case "aws-claude":
|
||||
case "gcp-claude":
|
||||
case "claude":
|
||||
|
||||
Reference in New Issue
Block a user