mistral update
This commit is contained in:
@@ -1,11 +1,17 @@
|
||||
import { RequestPreprocessor } from "../index";
|
||||
import { countTokens } from "../../../../shared/tokenization";
|
||||
import { assertNever } from "../../../../shared/utils";
|
||||
import { OpenAIChatMessage } from "../../../../shared/api-schemas";
|
||||
import { GoogleAIChatMessage } from "../../../../shared/api-schemas/google-ai";
|
||||
import {
|
||||
GoogleAIChatMessage,
|
||||
MistralAIChatMessage,
|
||||
OpenAIChatMessage,
|
||||
} from "../../../../shared/api-schemas";
|
||||
AnthropicChatMessage,
|
||||
flattenAnthropicMessages,
|
||||
} from "../../../../shared/api-schemas/anthropic";
|
||||
import {
|
||||
MistralAIChatMessage,
|
||||
ContentItem,
|
||||
isMistralVisionModel
|
||||
} from "../../../../shared/api-schemas/mistral-ai";
|
||||
|
||||
/**
|
||||
* Given a request with an already-transformed body, counts the number of
|
||||
@@ -61,9 +67,47 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
|
||||
case "mistral-ai":
|
||||
case "mistral-text": {
|
||||
req.outputTokens = req.body.max_tokens;
|
||||
const prompt: string | MistralAIChatMessage[] =
|
||||
req.body.messages ?? req.body.prompt;
|
||||
|
||||
// Handle multimodal content (vision) in Mistral models
|
||||
const isVisionModel = isMistralVisionModel(req.body.model);
|
||||
const messages = req.body.messages;
|
||||
|
||||
// Check if this is a vision request with images
|
||||
const hasImageContent = Array.isArray(messages) && messages.some(
|
||||
(msg: MistralAIChatMessage) => Array.isArray(msg.content) &&
|
||||
msg.content.some((item: ContentItem) => item.type === "image_url")
|
||||
);
|
||||
|
||||
// For vision content, we add a fixed token count per image
|
||||
// This is an estimate as the actual token count depends on image size and complexity
|
||||
const TOKENS_PER_IMAGE = 1200; // Conservative estimate
|
||||
let imageTokens = 0;
|
||||
|
||||
if (hasImageContent && Array.isArray(messages)) {
|
||||
// Count images in the request
|
||||
for (const msg of messages) {
|
||||
if (Array.isArray(msg.content)) {
|
||||
const imageCount = msg.content.filter(
|
||||
(item: ContentItem) => item.type === "image_url"
|
||||
).length;
|
||||
imageTokens += imageCount * TOKENS_PER_IMAGE;
|
||||
}
|
||||
}
|
||||
|
||||
req.log.debug(
|
||||
{ imageCount: imageTokens / TOKENS_PER_IMAGE, tokenEstimate: imageTokens },
|
||||
"Estimated token count for Mistral vision images"
|
||||
);
|
||||
}
|
||||
|
||||
const prompt: string | MistralAIChatMessage[] = messages ?? req.body.prompt;
|
||||
result = await countTokens({ req, prompt, service });
|
||||
|
||||
// Add the image tokens to the total count
|
||||
if (imageTokens > 0) {
|
||||
result.token_count += imageTokens;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case "openai-image": {
|
||||
|
||||
@@ -4,7 +4,7 @@ import {
|
||||
API_REQUEST_TRANSFORMERS,
|
||||
} from "../../../../shared/api-schemas";
|
||||
import { BadRequestError } from "../../../../shared/errors";
|
||||
import { fixMistralPrompt } from "../../../../shared/api-schemas/mistral-ai";
|
||||
import { fixMistralPrompt, isMistralVisionModel } from "../../../../shared/api-schemas/mistral-ai";
|
||||
import {
|
||||
isImageGenerationRequest,
|
||||
isTextGenerationRequest,
|
||||
@@ -117,12 +117,66 @@ function applyMistralPromptFixes(req: Request): void {
|
||||
// mistral prompt and try to fix it if it fails. It will be re-validated
|
||||
// after this function returns.
|
||||
const result = API_REQUEST_VALIDATORS["mistral-ai"].parse(req.body);
|
||||
|
||||
// Check if this is a vision model request
|
||||
const isVisionModel = isMistralVisionModel(req.body.model);
|
||||
|
||||
// Check if the request contains image content
|
||||
const hasImageContent = result.messages?.some((msg: {content: string | any[]}) =>
|
||||
Array.isArray(msg.content) &&
|
||||
msg.content.some((item: any) => item.type === "image_url")
|
||||
);
|
||||
|
||||
// For vision requests, normalize the image_url format
|
||||
if (hasImageContent && Array.isArray(result.messages)) {
|
||||
// Process each message with image content
|
||||
result.messages.forEach((msg: any) => {
|
||||
if (Array.isArray(msg.content)) {
|
||||
// Process each content item
|
||||
msg.content.forEach((item: any) => {
|
||||
if (item.type === "image_url") {
|
||||
// Normalize the image_url field to a string format that Mistral expects
|
||||
if (typeof item.image_url === "object") {
|
||||
// If it's an object, extract the URL or base64 data
|
||||
if (item.image_url.url) {
|
||||
item.image_url = item.image_url.url;
|
||||
} else if (item.image_url.data) {
|
||||
item.image_url = item.image_url.data;
|
||||
}
|
||||
|
||||
req.log.info(
|
||||
{ model: req.body.model },
|
||||
"Normalized object-format image_url to string format"
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Apply Mistral prompt fixes while preserving multimodal content
|
||||
req.body.messages = fixMistralPrompt(result.messages);
|
||||
req.log.info(
|
||||
{ n: req.body.messages.length, prev: result.messages.length },
|
||||
{
|
||||
n: req.body.messages.length,
|
||||
prev: result.messages.length,
|
||||
isVisionModel,
|
||||
hasImageContent
|
||||
},
|
||||
"Applied Mistral chat prompt fixes."
|
||||
);
|
||||
|
||||
// If this is a vision model with image content, it MUST use the chat API
|
||||
// and cannot be converted to text completions
|
||||
if (hasImageContent) {
|
||||
req.log.info(
|
||||
{ model: req.body.model },
|
||||
"Detected Mistral vision request with image content. Keeping as chat format."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// If the prompt relies on `prefix: true` for the last message, we need to
|
||||
// convert it to a text completions request because AWS Mistral support for
|
||||
// this feature is broken.
|
||||
|
||||
+50
-27
@@ -20,38 +20,61 @@ import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middlewa
|
||||
// months of releasing them so this list is hard to keep up to date. 2024-07-28
|
||||
// https://docs.mistral.ai/platform/endpoints
|
||||
export const KNOWN_MISTRAL_AI_MODELS = [
|
||||
/*
|
||||
Mistral Nemo
|
||||
"A 12B model built with the partnership with Nvidia. It is easy to use and a
|
||||
drop-in replacement in any system using Mistral 7B that it supersedes."
|
||||
*/
|
||||
/* Premier models */
|
||||
// Mistral Large (top-tier reasoning model)
|
||||
"mistral-large-latest",
|
||||
"mistral-large-2411",
|
||||
"mistral-large-2407",
|
||||
"mistral-large-2402", // older version
|
||||
|
||||
// Pixtral Large (multimodal/vision model)
|
||||
"pixtral-large-latest",
|
||||
"pixtral-large-2411",
|
||||
|
||||
// Mistral Saba (language-specialized model)
|
||||
"mistral-saba-latest",
|
||||
"mistral-saba-2502",
|
||||
|
||||
// Codestral (code model)
|
||||
"codestral-latest",
|
||||
"codestral-2501",
|
||||
"codestral-2405",
|
||||
|
||||
// Ministral models (edge models)
|
||||
"ministral-8b-latest",
|
||||
"ministral-8b-2410",
|
||||
"ministral-3b-latest",
|
||||
"ministral-3b-2410",
|
||||
|
||||
// Embedding & Moderation
|
||||
"mistral-embed",
|
||||
"mistral-embed-2312",
|
||||
"mistral-moderation-latest",
|
||||
"mistral-moderation-2411",
|
||||
|
||||
/* Free models */
|
||||
// Mistral Small (with vision in latest version)
|
||||
"mistral-small-latest",
|
||||
"mistral-small-2503", // v3.1 with vision
|
||||
"mistral-small-2402", // older version
|
||||
|
||||
// Pixtral 12B (vision model)
|
||||
"pixtral-12b-latest",
|
||||
"pixtral-12b-2409",
|
||||
|
||||
/* Research & Open Models */
|
||||
// Mistral Nemo
|
||||
"open-mistral-nemo",
|
||||
"open-mistral-nemo-2407",
|
||||
/*
|
||||
Mistral Large
|
||||
"Our flagship model with state-of-the-art reasoning, knowledge, and coding
|
||||
capabilities."
|
||||
*/
|
||||
"mistral-large-latest",
|
||||
"mistral-large-2407",
|
||||
"mistral-large-2402", // deprecated
|
||||
/*
|
||||
Codestral
|
||||
"A cutting-edge generative model that has been specifically designed and
|
||||
optimized for code generation tasks, including fill-in-the-middle and code
|
||||
completion."
|
||||
note: this uses a separate bidi completion endpoint that is not implemented
|
||||
*/
|
||||
"codestral-latest",
|
||||
"codestral-2405",
|
||||
/* So-called "Research Models" */
|
||||
|
||||
// Earlier Mixtral & Mistral models
|
||||
"open-mistral-7b",
|
||||
"open-mixtral-8x7b",
|
||||
"open-mistral-8x22b",
|
||||
"open-mixtral-8x22b",
|
||||
"open-codestral-mamba",
|
||||
/* Deprecated production models */
|
||||
"mistral-small-latest",
|
||||
"mistral-small-2402",
|
||||
"mathstral",
|
||||
|
||||
/* Legacy/deprecated models */
|
||||
"mistral-medium-latest",
|
||||
"mistral-medium-2312",
|
||||
"mistral-tiny",
|
||||
|
||||
@@ -4,9 +4,59 @@ import { Template } from "@huggingface/jinja";
|
||||
import { APIFormatTransformer } from "./index";
|
||||
import { logger } from "../../logger";
|
||||
|
||||
// Define the content types for multimodal messages
|
||||
export const TextContentSchema = z.object({
|
||||
type: z.literal("text"),
|
||||
text: z.string()
|
||||
});
|
||||
|
||||
export const ImageUrlContentSchema = z.object({
|
||||
type: z.literal("image_url"),
|
||||
image_url: z.union([
|
||||
// URL format (https://...)
|
||||
z.string().url(),
|
||||
// Base64 format (data:image/jpeg;base64,...)
|
||||
z.string().regex(/^data:image\/(jpeg|png|gif|webp);base64,/),
|
||||
// Object format (might contain detail or url properties)
|
||||
z.record(z.any()),
|
||||
// Allow any string for maximum compatibility
|
||||
z.string()
|
||||
])
|
||||
});
|
||||
|
||||
export const ContentItemSchema = z.union([TextContentSchema, ImageUrlContentSchema]);
|
||||
|
||||
// Export types for the content schemas
|
||||
export type TextContent = z.infer<typeof TextContentSchema>;
|
||||
export type ImageUrlContent = z.infer<typeof ImageUrlContentSchema>;
|
||||
export type ContentItem = z.infer<typeof ContentItemSchema>;
|
||||
|
||||
// List of Mistral models with vision capabilities
|
||||
export const MISTRAL_VISION_MODELS = [
|
||||
"pixtral-12b-2409",
|
||||
"pixtral-12b-latest",
|
||||
"pixtral-large-2411",
|
||||
"pixtral-large-latest",
|
||||
"mistral-small-2503",
|
||||
"mistral-small-latest"
|
||||
];
|
||||
|
||||
// Helper function to check if a model supports vision
|
||||
export function isMistralVisionModel(model: string): boolean {
|
||||
return MISTRAL_VISION_MODELS.some(visionModel =>
|
||||
model === visionModel ||
|
||||
model.startsWith(`${visionModel}-`)
|
||||
);
|
||||
}
|
||||
|
||||
// Main Mistral chat message schema
|
||||
const MistralChatMessageSchema = z.object({
|
||||
role: z.enum(["system", "user", "assistant", "tool"]), // TODO: implement tools
|
||||
content: z.string(),
|
||||
// Support both string content (for backwards compatibility) and array of content items (for multimodal)
|
||||
content: z.union([
|
||||
z.string(),
|
||||
z.array(ContentItemSchema)
|
||||
]),
|
||||
prefix: z.boolean().optional(),
|
||||
});
|
||||
|
||||
@@ -107,7 +157,26 @@ export function fixMistralPrompt(
|
||||
// Consolidate multiple messages from the same role
|
||||
const last = acc[acc.length - 1];
|
||||
if (last.role === copy.role) {
|
||||
last.content += "\n\n" + copy.content;
|
||||
// Handle different content types for consolidation
|
||||
if (typeof last.content === "string" && typeof copy.content === "string") {
|
||||
// Both are strings, concatenate them
|
||||
last.content += "\n\n" + copy.content;
|
||||
} else if (Array.isArray(last.content) && typeof copy.content === "string") {
|
||||
// Add the string content as a new text content item
|
||||
last.content.push({
|
||||
type: "text",
|
||||
text: copy.content
|
||||
});
|
||||
} else if (typeof last.content === "string" && Array.isArray(copy.content)) {
|
||||
// Convert last.content to array and append copy.content items
|
||||
last.content = [
|
||||
{ type: "text", text: last.content },
|
||||
...copy.content
|
||||
];
|
||||
} else if (Array.isArray(last.content) && Array.isArray(copy.content)) {
|
||||
// Both are arrays, concatenate them
|
||||
last.content = [...last.content, ...copy.content];
|
||||
}
|
||||
} else {
|
||||
acc.push(copy);
|
||||
}
|
||||
@@ -125,18 +194,41 @@ export function fixMistralPrompt(
|
||||
|
||||
let jinjaTemplate: Template;
|
||||
let renderTemplate: (messages: MistralAIChatMessage[]) => string;
|
||||
|
||||
// Helper function to convert multimodal content to string format for text-only models
|
||||
function contentToString(content: string | any[]): string {
|
||||
if (typeof content === "string") {
|
||||
return content;
|
||||
} else if (Array.isArray(content)) {
|
||||
// For multimodal content, extract only the text parts
|
||||
// Images are not supported in text-only templates
|
||||
return content
|
||||
.filter(item => item.type === "text")
|
||||
.map(item => (item as any).text)
|
||||
.join("\n\n");
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function renderMistralPrompt(messages: MistralAIChatMessage[]) {
|
||||
if (!jinjaTemplate) {
|
||||
logger.warn("Lazy loading mistral chat template...");
|
||||
const { chatTemplate, bosToken, eosToken } =
|
||||
require("./templates/mistral-template").MISTRAL_TEMPLATE;
|
||||
jinjaTemplate = new Template(chatTemplate);
|
||||
renderTemplate = (messages) =>
|
||||
jinjaTemplate.render({
|
||||
messages,
|
||||
renderTemplate = (messages) => {
|
||||
// We need to convert any multimodal content to string format for the template
|
||||
const textOnlyMessages = messages.map(msg => ({
|
||||
...msg,
|
||||
content: contentToString(msg.content)
|
||||
}));
|
||||
|
||||
return jinjaTemplate.render({
|
||||
messages: textOnlyMessages,
|
||||
bos_token: bosToken,
|
||||
eos_token: eosToken,
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
return renderTemplate(messages);
|
||||
@@ -145,6 +237,9 @@ function renderMistralPrompt(messages: MistralAIChatMessage[]) {
|
||||
/**
|
||||
* Attempts to convert a Mistral chat completions request to a text completions,
|
||||
* using the official prompt template published by Mistral.
|
||||
*
|
||||
* Note: This transformation is only applicable for text-only models.
|
||||
* Multimodal/vision models (Pixtral, etc.) cannot use this transformation.
|
||||
*/
|
||||
export const transformMistralChatToText: APIFormatTransformer<
|
||||
typeof MistralAIV1TextCompletionsSchema
|
||||
@@ -159,8 +254,24 @@ export const transformMistralChatToText: APIFormatTransformer<
|
||||
throw result.error;
|
||||
}
|
||||
|
||||
const { messages, ...rest } = result.data;
|
||||
const prompt = renderMistralPrompt(messages);
|
||||
// Check if this is a vision request (contains any image_url content items)
|
||||
const { messages, model, ...rest } = result.data;
|
||||
const hasVisionContent = messages.some(msg =>
|
||||
Array.isArray(msg.content) &&
|
||||
msg.content.some(item => item.type === "image_url")
|
||||
);
|
||||
|
||||
return { ...rest, prompt, messages: undefined };
|
||||
// Cannot transform vision requests to text completions
|
||||
if (hasVisionContent) {
|
||||
req.log.warn(
|
||||
{ model },
|
||||
"Cannot transform Mistral vision request to text completions format"
|
||||
);
|
||||
throw new Error(
|
||||
"Vision requests (with image_url content) cannot be transformed to text completions format"
|
||||
);
|
||||
}
|
||||
|
||||
const prompt = renderMistralPrompt(messages);
|
||||
return { ...rest, model, prompt, messages: undefined };
|
||||
};
|
||||
|
||||
+36
-2
@@ -248,22 +248,56 @@ export function getGoogleAIModelFamily(model: string): GoogleAIModelFamily {
|
||||
}
|
||||
|
||||
export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
|
||||
const prunedModel = model.replace(/-(latest|\d{4})$/, "");
|
||||
const prunedModel = model.replace(/-(latest|\d{4}(-\d{2}){0,2})$/, "");
|
||||
|
||||
// Premier models (higher tier)
|
||||
switch (prunedModel) {
|
||||
// Existing direct matches
|
||||
case "mistral-tiny":
|
||||
case "mistral-small":
|
||||
case "mistral-medium":
|
||||
case "mistral-large":
|
||||
return prunedModel as MistralAIModelFamily;
|
||||
|
||||
// Premier models - Large tier
|
||||
case "mistral-large":
|
||||
case "pixtral-large":
|
||||
return "mistral-large";
|
||||
|
||||
// Premier models - Medium tier
|
||||
case "mistral-saba":
|
||||
return "mistral-medium";
|
||||
|
||||
// Premier models - Small tier
|
||||
case "codestral":
|
||||
case "ministral-8b":
|
||||
case "mistral-embed":
|
||||
case "mistral-moderation":
|
||||
return "mistral-small";
|
||||
|
||||
// Premier models - Tiny tier
|
||||
case "ministral-3b":
|
||||
return "mistral-tiny";
|
||||
|
||||
// Free models - Tiny tier
|
||||
case "open-mistral-7b":
|
||||
return "mistral-tiny";
|
||||
|
||||
// Free models - Small tier
|
||||
case "mistral-small":
|
||||
case "pixtral":
|
||||
case "pixtral-12b":
|
||||
case "open-mistral-nemo":
|
||||
case "open-mixtral-8x7b":
|
||||
case "codestral":
|
||||
case "open-codestral-mamba":
|
||||
case "mathstral":
|
||||
return "mistral-small";
|
||||
|
||||
// Free models - Medium tier
|
||||
case "open-mixtral-8x22b":
|
||||
return "mistral-medium";
|
||||
|
||||
// Default to small if unknown
|
||||
default:
|
||||
return "mistral-small";
|
||||
}
|
||||
|
||||
+12
-4
@@ -105,19 +105,27 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||
break;
|
||||
case "aws-mistral-tiny":
|
||||
case "mistral-tiny":
|
||||
cost = 0.0000003;
|
||||
// Using Ministral 3B pricing: $0.04/1M input tokens, $0.04/1M output tokens
|
||||
// For edge/tiny models, a more balanced 1:1 ratio is used
|
||||
cost = 0.00000004;
|
||||
break;
|
||||
case "aws-mistral-small":
|
||||
case "mistral-small":
|
||||
cost = 0.00000035;
|
||||
// Using Codestral pricing: $0.3/1M input, $0.9/1M output (highest in category)
|
||||
// Weighted average for 1:3 input:output ratio
|
||||
cost = 0.00000075;
|
||||
break;
|
||||
case "aws-mistral-medium":
|
||||
case "mistral-medium":
|
||||
cost = 0.000004;
|
||||
// Using Mistral Saba pricing: $0.2/1M input, $0.6/1M output
|
||||
// Weighted average for 1:3 input:output ratio
|
||||
cost = 0.0000005;
|
||||
break;
|
||||
case "aws-mistral-large":
|
||||
case "mistral-large":
|
||||
cost = 0.000012;
|
||||
// Using Mistral Large/Pixtral Large pricing: $2/1M input, $6/1M output
|
||||
// Weighted average for 1:3 input:output ratio
|
||||
cost = 0.000005;
|
||||
break;
|
||||
case "gemini-flash":
|
||||
cost = 0.0000002326;
|
||||
|
||||
Reference in New Issue
Block a user