mistral update

This commit is contained in:
reanon
2025-04-30 20:03:40 +02:00
parent 80d09f470b
commit c1cb395020
6 changed files with 323 additions and 49 deletions
@@ -1,11 +1,17 @@
import { RequestPreprocessor } from "../index";
import { countTokens } from "../../../../shared/tokenization";
import { assertNever } from "../../../../shared/utils";
import { OpenAIChatMessage } from "../../../../shared/api-schemas";
import { GoogleAIChatMessage } from "../../../../shared/api-schemas/google-ai";
import {
GoogleAIChatMessage,
MistralAIChatMessage,
OpenAIChatMessage,
} from "../../../../shared/api-schemas";
AnthropicChatMessage,
flattenAnthropicMessages,
} from "../../../../shared/api-schemas/anthropic";
import {
MistralAIChatMessage,
ContentItem,
isMistralVisionModel
} from "../../../../shared/api-schemas/mistral-ai";
/**
* Given a request with an already-transformed body, counts the number of
@@ -61,9 +67,47 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
case "mistral-ai":
case "mistral-text": {
req.outputTokens = req.body.max_tokens;
const prompt: string | MistralAIChatMessage[] =
req.body.messages ?? req.body.prompt;
// Handle multimodal content (vision) in Mistral models
const isVisionModel = isMistralVisionModel(req.body.model);
const messages = req.body.messages;
// Check if this is a vision request with images
const hasImageContent = Array.isArray(messages) && messages.some(
(msg: MistralAIChatMessage) => Array.isArray(msg.content) &&
msg.content.some((item: ContentItem) => item.type === "image_url")
);
// For vision content, we add a fixed token count per image
// This is an estimate as the actual token count depends on image size and complexity
const TOKENS_PER_IMAGE = 1200; // Conservative estimate
let imageTokens = 0;
if (hasImageContent && Array.isArray(messages)) {
// Count images in the request
for (const msg of messages) {
if (Array.isArray(msg.content)) {
const imageCount = msg.content.filter(
(item: ContentItem) => item.type === "image_url"
).length;
imageTokens += imageCount * TOKENS_PER_IMAGE;
}
}
req.log.debug(
{ imageCount: imageTokens / TOKENS_PER_IMAGE, tokenEstimate: imageTokens },
"Estimated token count for Mistral vision images"
);
}
const prompt: string | MistralAIChatMessage[] = messages ?? req.body.prompt;
result = await countTokens({ req, prompt, service });
// Add the image tokens to the total count
if (imageTokens > 0) {
result.token_count += imageTokens;
}
break;
}
case "openai-image": {
@@ -4,7 +4,7 @@ import {
API_REQUEST_TRANSFORMERS,
} from "../../../../shared/api-schemas";
import { BadRequestError } from "../../../../shared/errors";
import { fixMistralPrompt } from "../../../../shared/api-schemas/mistral-ai";
import { fixMistralPrompt, isMistralVisionModel } from "../../../../shared/api-schemas/mistral-ai";
import {
isImageGenerationRequest,
isTextGenerationRequest,
@@ -117,12 +117,66 @@ function applyMistralPromptFixes(req: Request): void {
// mistral prompt and try to fix it if it fails. It will be re-validated
// after this function returns.
const result = API_REQUEST_VALIDATORS["mistral-ai"].parse(req.body);
// Check if this is a vision model request
const isVisionModel = isMistralVisionModel(req.body.model);
// Check if the request contains image content
const hasImageContent = result.messages?.some((msg: {content: string | any[]}) =>
Array.isArray(msg.content) &&
msg.content.some((item: any) => item.type === "image_url")
);
// For vision requests, normalize the image_url format
if (hasImageContent && Array.isArray(result.messages)) {
// Process each message with image content
result.messages.forEach((msg: any) => {
if (Array.isArray(msg.content)) {
// Process each content item
msg.content.forEach((item: any) => {
if (item.type === "image_url") {
// Normalize the image_url field to a string format that Mistral expects
if (typeof item.image_url === "object") {
// If it's an object, extract the URL or base64 data
if (item.image_url.url) {
item.image_url = item.image_url.url;
} else if (item.image_url.data) {
item.image_url = item.image_url.data;
}
req.log.info(
{ model: req.body.model },
"Normalized object-format image_url to string format"
);
}
}
});
}
});
}
// Apply Mistral prompt fixes while preserving multimodal content
req.body.messages = fixMistralPrompt(result.messages);
req.log.info(
{ n: req.body.messages.length, prev: result.messages.length },
{
n: req.body.messages.length,
prev: result.messages.length,
isVisionModel,
hasImageContent
},
"Applied Mistral chat prompt fixes."
);
// If this is a vision model with image content, it MUST use the chat API
// and cannot be converted to text completions
if (hasImageContent) {
req.log.info(
{ model: req.body.model },
"Detected Mistral vision request with image content. Keeping as chat format."
);
return;
}
// If the prompt relies on `prefix: true` for the last message, we need to
// convert it to a text completions request because AWS Mistral support for
// this feature is broken.
+50 -27
View File
@@ -20,38 +20,61 @@ import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middlewa
// months of releasing them so this list is hard to keep up to date. 2024-07-28
// https://docs.mistral.ai/platform/endpoints
export const KNOWN_MISTRAL_AI_MODELS = [
/*
Mistral Nemo
"A 12B model built with the partnership with Nvidia. It is easy to use and a
drop-in replacement in any system using Mistral 7B that it supersedes."
*/
/* Premier models */
// Mistral Large (top-tier reasoning model)
"mistral-large-latest",
"mistral-large-2411",
"mistral-large-2407",
"mistral-large-2402", // older version
// Pixtral Large (multimodal/vision model)
"pixtral-large-latest",
"pixtral-large-2411",
// Mistral Saba (language-specialized model)
"mistral-saba-latest",
"mistral-saba-2502",
// Codestral (code model)
"codestral-latest",
"codestral-2501",
"codestral-2405",
// Ministral models (edge models)
"ministral-8b-latest",
"ministral-8b-2410",
"ministral-3b-latest",
"ministral-3b-2410",
// Embedding & Moderation
"mistral-embed",
"mistral-embed-2312",
"mistral-moderation-latest",
"mistral-moderation-2411",
/* Free models */
// Mistral Small (with vision in latest version)
"mistral-small-latest",
"mistral-small-2503", // v3.1 with vision
"mistral-small-2402", // older version
// Pixtral 12B (vision model)
"pixtral-12b-latest",
"pixtral-12b-2409",
/* Research & Open Models */
// Mistral Nemo
"open-mistral-nemo",
"open-mistral-nemo-2407",
/*
Mistral Large
"Our flagship model with state-of-the-art reasoning, knowledge, and coding
capabilities."
*/
"mistral-large-latest",
"mistral-large-2407",
"mistral-large-2402", // deprecated
/*
Codestral
"A cutting-edge generative model that has been specifically designed and
optimized for code generation tasks, including fill-in-the-middle and code
completion."
note: this uses a separate bidi completion endpoint that is not implemented
*/
"codestral-latest",
"codestral-2405",
/* So-called "Research Models" */
// Earlier Mixtral & Mistral models
"open-mistral-7b",
"open-mixtral-8x7b",
"open-mistral-8x22b",
"open-mixtral-8x22b",
"open-codestral-mamba",
/* Deprecated production models */
"mistral-small-latest",
"mistral-small-2402",
"mathstral",
/* Legacy/deprecated models */
"mistral-medium-latest",
"mistral-medium-2312",
"mistral-tiny",
+119 -8
View File
@@ -4,9 +4,59 @@ import { Template } from "@huggingface/jinja";
import { APIFormatTransformer } from "./index";
import { logger } from "../../logger";
// Define the content types for multimodal messages
export const TextContentSchema = z.object({
type: z.literal("text"),
text: z.string()
});
export const ImageUrlContentSchema = z.object({
type: z.literal("image_url"),
image_url: z.union([
// URL format (https://...)
z.string().url(),
// Base64 format (data:image/jpeg;base64,...)
z.string().regex(/^data:image\/(jpeg|png|gif|webp);base64,/),
// Object format (might contain detail or url properties)
z.record(z.any()),
// Allow any string for maximum compatibility
z.string()
])
});
export const ContentItemSchema = z.union([TextContentSchema, ImageUrlContentSchema]);
// Export types for the content schemas
export type TextContent = z.infer<typeof TextContentSchema>;
export type ImageUrlContent = z.infer<typeof ImageUrlContentSchema>;
export type ContentItem = z.infer<typeof ContentItemSchema>;
// List of Mistral models with vision capabilities
export const MISTRAL_VISION_MODELS = [
"pixtral-12b-2409",
"pixtral-12b-latest",
"pixtral-large-2411",
"pixtral-large-latest",
"mistral-small-2503",
"mistral-small-latest"
];
// Helper function to check if a model supports vision
export function isMistralVisionModel(model: string): boolean {
return MISTRAL_VISION_MODELS.some(visionModel =>
model === visionModel ||
model.startsWith(`${visionModel}-`)
);
}
// Main Mistral chat message schema
const MistralChatMessageSchema = z.object({
role: z.enum(["system", "user", "assistant", "tool"]), // TODO: implement tools
content: z.string(),
// Support both string content (for backwards compatibility) and array of content items (for multimodal)
content: z.union([
z.string(),
z.array(ContentItemSchema)
]),
prefix: z.boolean().optional(),
});
@@ -107,7 +157,26 @@ export function fixMistralPrompt(
// Consolidate multiple messages from the same role
const last = acc[acc.length - 1];
if (last.role === copy.role) {
last.content += "\n\n" + copy.content;
// Handle different content types for consolidation
if (typeof last.content === "string" && typeof copy.content === "string") {
// Both are strings, concatenate them
last.content += "\n\n" + copy.content;
} else if (Array.isArray(last.content) && typeof copy.content === "string") {
// Add the string content as a new text content item
last.content.push({
type: "text",
text: copy.content
});
} else if (typeof last.content === "string" && Array.isArray(copy.content)) {
// Convert last.content to array and append copy.content items
last.content = [
{ type: "text", text: last.content },
...copy.content
];
} else if (Array.isArray(last.content) && Array.isArray(copy.content)) {
// Both are arrays, concatenate them
last.content = [...last.content, ...copy.content];
}
} else {
acc.push(copy);
}
@@ -125,18 +194,41 @@ export function fixMistralPrompt(
let jinjaTemplate: Template;
let renderTemplate: (messages: MistralAIChatMessage[]) => string;
// Helper function to convert multimodal content to string format for text-only models
function contentToString(content: string | any[]): string {
if (typeof content === "string") {
return content;
} else if (Array.isArray(content)) {
// For multimodal content, extract only the text parts
// Images are not supported in text-only templates
return content
.filter(item => item.type === "text")
.map(item => (item as any).text)
.join("\n\n");
}
return "";
}
function renderMistralPrompt(messages: MistralAIChatMessage[]) {
if (!jinjaTemplate) {
logger.warn("Lazy loading mistral chat template...");
const { chatTemplate, bosToken, eosToken } =
require("./templates/mistral-template").MISTRAL_TEMPLATE;
jinjaTemplate = new Template(chatTemplate);
renderTemplate = (messages) =>
jinjaTemplate.render({
messages,
renderTemplate = (messages) => {
// We need to convert any multimodal content to string format for the template
const textOnlyMessages = messages.map(msg => ({
...msg,
content: contentToString(msg.content)
}));
return jinjaTemplate.render({
messages: textOnlyMessages,
bos_token: bosToken,
eos_token: eosToken,
});
};
}
return renderTemplate(messages);
@@ -145,6 +237,9 @@ function renderMistralPrompt(messages: MistralAIChatMessage[]) {
/**
* Attempts to convert a Mistral chat completions request to a text completions,
* using the official prompt template published by Mistral.
*
* Note: This transformation is only applicable for text-only models.
* Multimodal/vision models (Pixtral, etc.) cannot use this transformation.
*/
export const transformMistralChatToText: APIFormatTransformer<
typeof MistralAIV1TextCompletionsSchema
@@ -159,8 +254,24 @@ export const transformMistralChatToText: APIFormatTransformer<
throw result.error;
}
const { messages, ...rest } = result.data;
const prompt = renderMistralPrompt(messages);
// Check if this is a vision request (contains any image_url content items)
const { messages, model, ...rest } = result.data;
const hasVisionContent = messages.some(msg =>
Array.isArray(msg.content) &&
msg.content.some(item => item.type === "image_url")
);
return { ...rest, prompt, messages: undefined };
// Cannot transform vision requests to text completions
if (hasVisionContent) {
req.log.warn(
{ model },
"Cannot transform Mistral vision request to text completions format"
);
throw new Error(
"Vision requests (with image_url content) cannot be transformed to text completions format"
);
}
const prompt = renderMistralPrompt(messages);
return { ...rest, model, prompt, messages: undefined };
};
+36 -2
View File
@@ -248,22 +248,56 @@ export function getGoogleAIModelFamily(model: string): GoogleAIModelFamily {
}
export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
const prunedModel = model.replace(/-(latest|\d{4})$/, "");
const prunedModel = model.replace(/-(latest|\d{4}(-\d{2}){0,2})$/, "");
// Premier models (higher tier)
switch (prunedModel) {
// Existing direct matches
case "mistral-tiny":
case "mistral-small":
case "mistral-medium":
case "mistral-large":
return prunedModel as MistralAIModelFamily;
// Premier models - Large tier
case "mistral-large":
case "pixtral-large":
return "mistral-large";
// Premier models - Medium tier
case "mistral-saba":
return "mistral-medium";
// Premier models - Small tier
case "codestral":
case "ministral-8b":
case "mistral-embed":
case "mistral-moderation":
return "mistral-small";
// Premier models - Tiny tier
case "ministral-3b":
return "mistral-tiny";
// Free models - Tiny tier
case "open-mistral-7b":
return "mistral-tiny";
// Free models - Small tier
case "mistral-small":
case "pixtral":
case "pixtral-12b":
case "open-mistral-nemo":
case "open-mixtral-8x7b":
case "codestral":
case "open-codestral-mamba":
case "mathstral":
return "mistral-small";
// Free models - Medium tier
case "open-mixtral-8x22b":
return "mistral-medium";
// Default to small if unknown
default:
return "mistral-small";
}
+12 -4
View File
@@ -105,19 +105,27 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
break;
case "aws-mistral-tiny":
case "mistral-tiny":
cost = 0.0000003;
// Using Ministral 3B pricing: $0.04/1M input tokens, $0.04/1M output tokens
// For edge/tiny models, a more balanced 1:1 ratio is used
cost = 0.00000004;
break;
case "aws-mistral-small":
case "mistral-small":
cost = 0.00000035;
// Using Codestral pricing: $0.3/1M input, $0.9/1M output (highest in category)
// Weighted average for 1:3 input:output ratio
cost = 0.00000075;
break;
case "aws-mistral-medium":
case "mistral-medium":
cost = 0.000004;
// Using Mistral Saba pricing: $0.2/1M input, $0.6/1M output
// Weighted average for 1:3 input:output ratio
cost = 0.0000005;
break;
case "aws-mistral-large":
case "mistral-large":
cost = 0.000012;
// Using Mistral Large/Pixtral Large pricing: $2/1M input, $6/1M output
// Weighted average for 1:3 input:output ratio
cost = 0.000005;
break;
case "gemini-flash":
cost = 0.0000002326;