Mistral AI support (khanon/oai-reverse-proxy!58)

2023-12-25 18:33:16 +00:00
parent 01e76cbb1c
commit 4a823b216f
27 changed files with 1070 additions and 12 deletions
@@ -34,10 +34,10 @@

 # Which model types users are allowed to access.
 # The following model families are recognized:
-# turbo | gpt4 | gpt4-32k | gpt4-turbo | dall-e | claude | gemini-pro | aws-claude | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo
+# turbo | gpt4 | gpt4-32k | gpt4-turbo | dall-e | claude | gemini-pro | mistral-tiny | mistral-small | mistral-medium | aws-claude | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo
 # By default, all models are allowed except for 'dall-e'. To allow DALL-E image
 # generation, uncomment the line below and add 'dall-e' to the list.
-# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,claude,gemini-pro,aws-claude,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo
+# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,claude,gemini-pro,mistral-tiny,mistral-small,mistral-medium,aws-claude,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo

 # URLs from which requests will be blocked.
 # BLOCKED_ORIGINS=reddit.com,9gag.com
@@ -26,6 +26,10 @@ type Config = {
   * same but the APIs are different. Vertex is the GCP product for enterprise.
   **/
  googleAIKey?: string;
+  /**
+   * Comma-delimited list of Mistral AI API keys.
+   */
+  mistralAIKey?: string;
  /**
   * Comma-delimited list of AWS credentials. Each credential item should be a
   * colon-delimited list of access key, secret key, and AWS region.
@@ -203,6 +207,7 @@ export const config: Config = {
  openaiKey: getEnvWithDefault("OPENAI_KEY", ""),
  anthropicKey: getEnvWithDefault("ANTHROPIC_KEY", ""),
  googleAIKey: getEnvWithDefault("GOOGLE_AI_KEY", ""),
+  mistralAIKey: getEnvWithDefault("MISTRAL_AI_KEY", ""),
  awsCredentials: getEnvWithDefault("AWS_CREDENTIALS", ""),
  azureCredentials: getEnvWithDefault("AZURE_CREDENTIALS", ""),
  proxyKey: getEnvWithDefault("PROXY_KEY", ""),
@@ -235,6 +240,9 @@ export const config: Config = {
    "gpt4-turbo",
    "claude",
    "gemini-pro",
+    "mistral-tiny",
+    "mistral-small",
+    "mistral-medium",
    "aws-claude",
    "azure-turbo",
    "azure-gpt4",
@@ -372,6 +380,7 @@ export const OMITTED_KEYS = [
  "openaiKey",
  "anthropicKey",
  "googleAIKey",
+  "mistralAIKey",
  "awsCredentials",
  "azureCredentials",
  "proxyKey",
@@ -17,6 +17,9 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "dall-e": "DALL-E",
  "claude": "Claude",
  "gemini-pro": "Gemini Pro",
+  "mistral-tiny": "Mistral 7B",
+  "mistral-small": "Mixtral 8x7B",
+  "mistral-medium": "Mistral prototype",
  "aws-claude": "AWS Claude",
  "azure-turbo": "Azure GPT-3.5 Turbo",
  "azure-gpt4": "Azure GPT-4",
@@ -193,6 +193,7 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
  const format = req.outboundApi;
  switch (format) {
    case "openai":
+    case "mistral-ai":
      return body.choices[0].message.content;
    case "openai-text":
      return body.choices[0].text;
@@ -222,6 +223,7 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
  switch (format) {
    case "openai":
    case "openai-text":
+    case "mistral-ai":
      return body.model;
    case "openai-image":
      return req.body.model;
@@ -40,6 +40,8 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
        );
      case "google-ai":
        throw new Error("add-key should not be used for this model.");
+      case "mistral-ai":
+        throw new Error("Mistral AI should never be translated");
      case "openai-image":
        assignedKey = keyPool.get("dall-e-3");
        break;
@@ -69,6 +71,7 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
      if (key.organizationId) {
        proxyReq.setHeader("OpenAI-Organization", key.organizationId);
      }
+    case "mistral-ai":
      proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
      break;
    case "azure":
@@ -1,7 +1,11 @@
 import { RequestPreprocessor } from "../index";
 import { countTokens } from "../../../../shared/tokenization";
 import { assertNever } from "../../../../shared/utils";
-import type { GoogleAIChatMessage, OpenAIChatMessage } from "./transform-outbound-payload";
+import type {
+  GoogleAIChatMessage,
+  MistralAIChatMessage,
+  OpenAIChatMessage,
+} from "./transform-outbound-payload";

 /**
 * Given a request with an already-transformed body, counts the number of
@@ -36,6 +40,12 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
      result = await countTokens({ req, prompt, service });
      break;
    }
+    case "mistral-ai": {
+      req.outputTokens = req.body.max_tokens;
+      const prompt: MistralAIChatMessage[] = req.body.messages;
+      result = await countTokens({ req, prompt, service });
+      break;
+    }
    case "openai-image": {
      req.outputTokens = 1;
      result = await countTokens({ req, service });
@@ -3,7 +3,10 @@ import { config } from "../../../../config";
 import { assertNever } from "../../../../shared/utils";
 import { RequestPreprocessor } from "../index";
 import { UserInputError } from "../../../../shared/errors";
-import { OpenAIChatMessage } from "./transform-outbound-payload";
+import {
+  MistralAIChatMessage,
+  OpenAIChatMessage,
+} from "./transform-outbound-payload";

 const rejectedClients = new Map<string, number>();

@@ -53,8 +56,9 @@ function getPromptFromRequest(req: Request) {
    case "anthropic":
      return body.prompt;
    case "openai":
+    case "mistral-ai":
      return body.messages
-        .map((msg: OpenAIChatMessage) => {
+        .map((msg: OpenAIChatMessage | MistralAIChatMessage) => {
          const text = Array.isArray(msg.content)
            ? msg.content
                .map((c) => {
@@ -155,12 +155,38 @@ export type GoogleAIChatMessage = z.infer<
  typeof GoogleAIV1GenerateContentSchema
 >["contents"][0];

+// https://docs.mistral.ai/api#operation/createChatCompletion
+const MistralAIV1ChatCompletionsSchema = z.object({
+  model: z.string(),
+  messages: z.array(
+    z.object({
+      role: z.enum(["system", "user", "assistant"]),
+      content: z.string(),
+    })
+  ),
+  temperature: z.number().optional().default(0.7),
+  top_p: z.number().optional().default(1),
+  max_tokens: z.coerce
+    .number()
+    .int()
+    .nullish()
+    .transform((v) => Math.min(v ?? OPENAI_OUTPUT_MAX, OPENAI_OUTPUT_MAX)),
+  stream: z.boolean().optional().default(false),
+  safe_mode: z.boolean().optional().default(false),
+  random_seed: z.number().int().optional(),
+});
+
+export type MistralAIChatMessage = z.infer<
+  typeof MistralAIV1ChatCompletionsSchema
+>["messages"][0];
+
 const VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
  anthropic: AnthropicV1CompleteSchema,
  openai: OpenAIV1ChatCompletionSchema,
  "openai-text": OpenAIV1TextCompletionSchema,
  "openai-image": OpenAIV1ImagesGenerationSchema,
  "google-ai": GoogleAIV1GenerateContentSchema,
+  "mistral-ai": MistralAIV1ChatCompletionsSchema,
 };

 /** Transforms an incoming request body to one that matches the target API. */
@@ -7,6 +7,7 @@ import { RequestPreprocessor } from "../index";
 const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic;
 const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI;
 const GOOGLE_AI_MAX_CONTEXT = 32000;
+const MISTRAL_AI_MAX_CONTENT = 32768;

 /**
 * Assigns `req.promptTokens` and `req.outputTokens` based on the request body
@@ -34,6 +35,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    case "google-ai":
      proxyMax = GOOGLE_AI_MAX_CONTEXT;
      break;
+    case "mistral-ai":
+      proxyMax = MISTRAL_AI_MAX_CONTENT;
    case "openai-image":
      return;
    default:
@@ -64,6 +67,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 200000;
  } else if (model.match(/^gemini-\d{3}$/)) {
    modelMax = GOOGLE_AI_MAX_CONTEXT;
+  } else if (model.match(/^mistral-(tiny|small|medium)$/)) {
+    modelMax = MISTRAL_AI_MAX_CONTENT;
  } else if (model.match(/^anthropic\.claude/)) {
    // Not sure if AWS Claude has the same context limit as Anthropic Claude.
    modelMax = 100000;
@@ -292,6 +292,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
    switch (service) {
      case "openai":
      case "google-ai":
+      case "mistral-ai":
      case "azure":
        const filteredCodes = ["content_policy_violation", "content_filter"];
        if (filteredCodes.includes(errorPayload.error?.code)) {
@@ -351,6 +352,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
        handleAwsRateLimitError(req, errorPayload);
        break;
      case "azure":
+      case "mistral-ai":
        handleAzureRateLimitError(req, errorPayload);
        break;
      case "google-ai":
@@ -379,6 +381,9 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
      case "google-ai":
        errorPayload.proxy_note = `The requested Google AI model might not exist, or the key might not be provisioned for it.`;
        break;
+      case "mistral-ai":
+        errorPayload.proxy_note = `The requested Mistral AI model might not exist, or the key might not be provisioned for it.`;
+        break;
      case "aws":
        errorPayload.proxy_note = `The requested AWS resource might not exist, or the key might not have access to it.`;
        break;
@@ -9,7 +9,10 @@ import {
 } from "../common";
 import { ProxyResHandlerWithBody } from ".";
 import { assertNever } from "../../../shared/utils";
-import { OpenAIChatMessage } from "../request/preprocessors/transform-outbound-payload";
+import {
+  MistralAIChatMessage,
+  OpenAIChatMessage,
+} from "../request/preprocessors/transform-outbound-payload";

 /** If prompt logging is enabled, enqueues the prompt for logging. */
 export const logPrompt: ProxyResHandlerWithBody = async (
@@ -54,12 +57,13 @@ type OaiImageResult = {
 const getPromptForRequest = (
  req: Request,
  responseBody: Record<string, any>
-): string | OpenAIChatMessage[] | OaiImageResult => {
+): string | OpenAIChatMessage[] | MistralAIChatMessage[] | OaiImageResult => {
  // Since the prompt logger only runs after the request has been proxied, we
  // can assume the body has already been transformed to the target API's
  // format.
  switch (req.outboundApi) {
    case "openai":
+    case "mistral-ai":
      return req.body.messages;
    case "openai-text":
      return req.body.prompt;
@@ -81,7 +85,7 @@ const getPromptForRequest = (
 };

 const flattenMessages = (
-  val: string | OpenAIChatMessage[] | OaiImageResult
+  val: string | OpenAIChatMessage[] | MistralAIChatMessage[] | OaiImageResult
 ): string => {
  if (typeof val === "string") {
    return val.trim();
@@ -4,7 +4,7 @@ import {
  mergeEventsForAnthropic,
  mergeEventsForOpenAIChat,
  mergeEventsForOpenAIText,
-  OpenAIChatCompletionStreamEvent
+  OpenAIChatCompletionStreamEvent,
 } from "./index";

 /**
@@ -28,6 +28,7 @@ export class EventAggregator {
    switch (this.format) {
      case "openai":
      case "google-ai":
+      case "mistral-ai":
        return mergeEventsForOpenAIChat(this.events);
      case "openai-text":
        return mergeEventsForOpenAIText(this.events);
@@ -106,6 +106,7 @@ function getTransformer(
 ): StreamingCompletionTransformer {
  switch (responseApi) {
    case "openai":
+    case "mistral-ai":
      return passthroughToOpenAI;
    case "openai-text":
      return openAITextToOpenAIChat;
@@ -0,0 +1,116 @@
+import { RequestHandler, Router } from "express";
+import { createProxyMiddleware } from "http-proxy-middleware";
+import { config } from "../config";
+import { keyPool } from "../shared/key-management";
+import {
+  getMistralAIModelFamily,
+  MistralAIModelFamily,
+  ModelFamily,
+} from "../shared/models";
+import { logger } from "../logger";
+import { createQueueMiddleware } from "./queue";
+import { ipLimiter } from "./rate-limit";
+import { handleProxyError } from "./middleware/common";
+import {
+  addKey,
+  createOnProxyReqHandler,
+  createPreprocessorMiddleware,
+  finalizeBody,
+} from "./middleware/request";
+import {
+  createOnProxyResHandler,
+  ProxyResHandlerWithBody,
+} from "./middleware/response";
+
+// https://docs.mistral.ai/platform/endpoints
+export const KNOWN_MISTRAL_AI_MODELS = [
+  "mistral-tiny",
+  "mistral-small",
+  "mistral-medium",
+];
+
+let modelsCache: any = null;
+let modelsCacheTime = 0;
+
+export function generateModelList(models = KNOWN_MISTRAL_AI_MODELS) {
+  let available = new Set<MistralAIModelFamily>();
+  for (const key of keyPool.list()) {
+    if (key.isDisabled || key.service !== "mistral-ai") continue;
+    key.modelFamilies.forEach((family) =>
+      available.add(family as MistralAIModelFamily)
+    );
+  }
+  const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
+  available = new Set([...available].filter((x) => allowed.has(x)));
+
+  return models
+    .map((id) => ({
+      id,
+      object: "model",
+      created: new Date().getTime(),
+      owned_by: "mistral-ai",
+    }))
+    .filter((model) => available.has(getMistralAIModelFamily(model.id)));
+}
+
+const handleModelRequest: RequestHandler = (_req, res) => {
+  if (new Date().getTime() - modelsCacheTime < 1000 * 60) return modelsCache;
+  const result = generateModelList();
+  modelsCache = { object: "list", data: result };
+  modelsCacheTime = new Date().getTime();
+  res.status(200).json(modelsCache);
+};
+
+const mistralAIResponseHandler: ProxyResHandlerWithBody = async (
+  _proxyRes,
+  req,
+  res,
+  body
+) => {
+  if (typeof body !== "object") {
+    throw new Error("Expected body to be an object");
+  }
+
+  if (config.promptLogging) {
+    const host = req.get("host");
+    body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`;
+  }
+
+  if (req.tokenizerInfo) {
+    body.proxy_tokenizer = req.tokenizerInfo;
+  }
+
+  res.status(200).json(body);
+};
+
+const mistralAIProxy = createQueueMiddleware({
+  proxyMiddleware: createProxyMiddleware({
+    target: "https://api.mistral.ai",
+    changeOrigin: true,
+    selfHandleResponse: true,
+    logger,
+    on: {
+      proxyReq: createOnProxyReqHandler({
+        pipeline: [addKey, finalizeBody],
+      }),
+      proxyRes: createOnProxyResHandler([mistralAIResponseHandler]),
+      error: handleProxyError,
+    },
+  }),
+});
+
+const mistralAIRouter = Router();
+mistralAIRouter.get("/v1/models", handleModelRequest);
+// General chat completion endpoint.
+mistralAIRouter.post(
+  "/v1/chat/completions",
+  ipLimiter,
+  createPreprocessorMiddleware({
+    inApi: "mistral-ai",
+    outApi: "mistral-ai",
+    service: "mistral-ai",
+  }),
+  mistralAIProxy
+);
+
+export const mistralAI = mistralAIRouter;
@@ -5,6 +5,7 @@ import { openai } from "./openai";
 import { openaiImage } from "./openai-image";
 import { anthropic } from "./anthropic";
 import { googleAI } from "./google-ai";
+import { mistralAI } from "./mistral-ai";
 import { aws } from "./aws";
 import { azure } from "./azure";

@@ -32,6 +33,7 @@ proxyRouter.use("/openai", addV1, openai);
 proxyRouter.use("/openai-image", addV1, openaiImage);
 proxyRouter.use("/anthropic", addV1, anthropic);
 proxyRouter.use("/google-ai", addV1, googleAI);
+proxyRouter.use("/mistral-ai", addV1, mistralAI);
 proxyRouter.use("/aws/claude", addV1, aws);
 proxyRouter.use("/azure/openai", addV1, azure);
 // Redirect browser requests to the homepage.
@@ -16,6 +16,7 @@ import {
  GoogleAIModelFamily,
  LLM_SERVICES,
  LLMService,
+  MistralAIModelFamily,
  MODEL_FAMILY_SERVICE,
  ModelFamily,
  OpenAIModelFamily,
@@ -24,6 +25,7 @@ import { getCostSuffix, getTokenCostUsd, prettyTokens } from "./shared/stats";
 import { getUniqueIps } from "./proxy/rate-limit";
 import { assertNever } from "./shared/utils";
 import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";
+import { MistralAIKey } from "./shared/key-management/mistral-ai/provider";

 const CACHE_TTL = 2000;

@@ -36,6 +38,8 @@ const keyIsAnthropicKey = (k: KeyPoolKey): k is AnthropicKey =>
  k.service === "anthropic";
 const keyIsGoogleAIKey = (k: KeyPoolKey): k is GoogleAIKey =>
  k.service === "google-ai";
+const keyIsMistralAIKey = (k: KeyPoolKey): k is MistralAIKey =>
+  k.service === "mistral-ai";
 const keyIsAwsKey = (k: KeyPoolKey): k is AwsBedrockKey => k.service === "aws";

 /** Stats aggregated across all keys for a given service. */
@@ -86,6 +90,7 @@ export type ServiceInfo = {
    "openai-image"?: string;
    anthropic?: string;
    "google-ai"?: string;
+    "mistral-ai"?: string;
    aws?: string;
    azure?: string;
  };
@@ -99,7 +104,8 @@ export type ServiceInfo = {
  & { [f in AnthropicModelFamily]?: AnthropicInfo; }
  & { [f in AwsBedrockModelFamily]?: AwsInfo }
  & { [f in AzureOpenAIModelFamily]?: BaseFamilyInfo; }
-  & { [f in GoogleAIModelFamily]?: BaseFamilyInfo };
+  & { [f in GoogleAIModelFamily]?: BaseFamilyInfo }
+  & { [f in MistralAIModelFamily]?: BaseFamilyInfo };

 // https://stackoverflow.com/a/66661477
 // type DeepKeyOf<T> = (
@@ -128,6 +134,9 @@ const SERVICE_ENDPOINTS: { [s in LLMService]: Record<string, string> } = {
  "google-ai": {
    "google-ai": `%BASE%/google-ai`,
  },
+  "mistral-ai": {
+    "mistral-ai": `%BASE%/mistral-ai`,
+  },
  aws: {
    aws: `%BASE%/aws/claude`,
  },
@@ -268,6 +277,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
  increment(serviceStats, "openai__keys", k.service === "openai" ? 1 : 0);
  increment(serviceStats, "anthropic__keys", k.service === "anthropic" ? 1 : 0);
  increment(serviceStats, "google-ai__keys", k.service === "google-ai" ? 1 : 0);
+  increment(serviceStats, "mistral-ai__keys", k.service === "mistral-ai" ? 1 : 0);
  increment(serviceStats, "aws__keys", k.service === "aws" ? 1 : 0);
  increment(serviceStats, "azure__keys", k.service === "azure" ? 1 : 0);

@@ -331,6 +341,18 @@ function addKeyToAggregates(k: KeyPoolKey) {
      increment(modelStats, `${family}__tokens`, k["gemini-proTokens"]);
      break;
    }
+    case "mistral-ai": {
+      if (!keyIsMistralAIKey(k)) throw new Error("Invalid key type");
+      k.modelFamilies.forEach((f) => {
+        const tokens = k[`${f}Tokens`];
+        sumTokens += tokens;
+        sumCost += getTokenCostUsd(f, tokens);
+        increment(modelStats, `${f}__tokens`, tokens);
+        increment(modelStats, `${f}__revoked`, k.isRevoked ? 1 : 0);
+        increment(modelStats, `${f}__active`, k.isDisabled ? 0 : 1);
+      });
+      break;
+    }
    case "aws": {
      if (!keyIsAwsKey(k)) throw new Error("Invalid key type");
      const family = "aws-claude";
@@ -11,6 +11,7 @@ export type APIFormat =
  | "openai"
  | "anthropic"
  | "google-ai"
+  | "mistral-ai"
  | "openai-text"
  | "openai-image";
 export type Model =
@@ -11,6 +11,7 @@ import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider";
 import { GoogleAIKeyProvider } from "./google-ai/provider";
 import { AwsBedrockKeyProvider } from "./aws/provider";
 import { AzureOpenAIKeyProvider } from "./azure/provider";
+import { MistralAIKeyProvider } from "./mistral-ai/provider";

 type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate;

@@ -24,6 +25,7 @@ export class KeyPool {
    this.keyProviders.push(new OpenAIKeyProvider());
    this.keyProviders.push(new AnthropicKeyProvider());
    this.keyProviders.push(new GoogleAIKeyProvider());
+    this.keyProviders.push(new MistralAIKeyProvider());
    this.keyProviders.push(new AwsBedrockKeyProvider());
    this.keyProviders.push(new AzureOpenAIKeyProvider());
  }
@@ -121,6 +123,9 @@ export class KeyPool {
    } else if (model.includes("gemini")) {
      // https://developers.generativeai.google.com/models/language
      return "google-ai";
+    } else if (model.includes("mistral")) {
+      // https://docs.mistral.ai/platform/endpoints
+      return "mistral-ai";
    } else if (model.startsWith("anthropic.claude")) {
      // AWS offers models from a few providers
      // https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids-arns.html
@@ -0,0 +1,112 @@
+import axios, { AxiosError } from "axios";
+import type { MistralAIModelFamily, OpenAIModelFamily } from "../../models";
+import { KeyCheckerBase } from "../key-checker-base";
+import type { MistralAIKey, MistralAIKeyProvider } from "./provider";
+import { getMistralAIModelFamily, getOpenAIModelFamily } from "../../models";
+
+const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
+const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
+const GET_MODELS_URL = "https://api.mistral.ai/v1/models";
+
+type GetModelsResponse = {
+  data: [{ id: string }];
+};
+
+type MistralAIError = {
+  message: string;
+  request_id: string;
+};
+
+type UpdateFn = typeof MistralAIKeyProvider.prototype.update;
+
+export class MistralAIKeyChecker extends KeyCheckerBase<MistralAIKey> {
+  constructor(keys: MistralAIKey[], updateKey: UpdateFn) {
+    super(keys, {
+      service: "mistral-ai",
+      keyCheckPeriod: KEY_CHECK_PERIOD,
+      minCheckInterval: MIN_CHECK_INTERVAL,
+      recurringChecksEnabled: false,
+      updateKey,
+    });
+  }
+
+  protected async testKeyOrFail(key: MistralAIKey) {
+    // We only need to check for provisioned models on the initial check.
+    const isInitialCheck = !key.lastChecked;
+    if (isInitialCheck) {
+      const provisionedModels = await this.getProvisionedModels(key);
+      const updates = {
+        modelFamilies: provisionedModels,
+      };
+      this.updateKey(key.hash, updates);
+    }
+    this.log.info({ key: key.hash, models: key.modelFamilies }, "Checked key.");
+  }
+
+  private async getProvisionedModels(
+    key: MistralAIKey
+  ): Promise<MistralAIModelFamily[]> {
+    const opts = { headers: MistralAIKeyChecker.getHeaders(key) };
+    const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
+    const models = data.data;
+
+    const families = new Set<MistralAIModelFamily>();
+    models.forEach(({ id }) => families.add(getMistralAIModelFamily(id)));
+
+    // We want to update the key's model families here, but we don't want to
+    // update its `lastChecked` timestamp because we need to let the liveness
+    // check run before we can consider the key checked.
+
+    const familiesArray = [...families];
+    const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
+    this.updateKey(key.hash, {
+      modelFamilies: familiesArray,
+      lastChecked: keyFromPool.lastChecked,
+    });
+    return familiesArray;
+  }
+
+  protected handleAxiosError(key: MistralAIKey, error: AxiosError) {
+    if (error.response && MistralAIKeyChecker.errorIsMistralAIError(error)) {
+      const { status, data } = error.response;
+      if (status === 401) {
+        this.log.warn(
+          { key: key.hash, error: data },
+          "Key is invalid or revoked. Disabling key."
+        );
+        this.updateKey(key.hash, {
+          isDisabled: true,
+          isRevoked: true,
+          modelFamilies: ["mistral-tiny"],
+        });
+      } else {
+        this.log.error(
+          { key: key.hash, status, error: data },
+          "Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
+        );
+        this.updateKey(key.hash, { lastChecked: Date.now() });
+      }
+      return;
+    }
+    this.log.error(
+      { key: key.hash, error: error.message },
+      "Network error while checking key; trying this key again in a minute."
+    );
+    const oneMinute = 60 * 1000;
+    const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
+    this.updateKey(key.hash, { lastChecked: next });
+  }
+
+  static errorIsMistralAIError(
+    error: AxiosError
+  ): error is AxiosError<MistralAIError> {
+    const data = error.response?.data as any;
+    return data?.message && data?.request_id;
+  }
+
+  static getHeaders(key: MistralAIKey) {
+    return {
+      Authorization: `Bearer ${key.key}`,
+    };
+  }
+}
@@ -0,0 +1,210 @@
+import crypto from "crypto";
+import { Key, KeyProvider, Model } from "..";
+import { config } from "../../../config";
+import { logger } from "../../../logger";
+import { MistralAIModelFamily, getMistralAIModelFamily } from "../../models";
+import { MistralAIKeyChecker } from "./checker";
+
+export type MistralAIModel =
+  | "mistral-tiny"
+  | "mistral-small"
+  | "mistral-medium";
+
+export type MistralAIKeyUpdate = Omit<
+  Partial<MistralAIKey>,
+  | "key"
+  | "hash"
+  | "lastUsed"
+  | "promptCount"
+  | "rateLimitedAt"
+  | "rateLimitedUntil"
+>;
+
+type MistralAIKeyUsage = {
+  [K in MistralAIModelFamily as `${K}Tokens`]: number;
+};
+
+export interface MistralAIKey extends Key, MistralAIKeyUsage {
+  readonly service: "mistral-ai";
+  readonly modelFamilies: MistralAIModelFamily[];
+  /** The time at which this key was last rate limited. */
+  rateLimitedAt: number;
+  /** The time until which this key is rate limited. */
+  rateLimitedUntil: number;
+}
+
+/**
+ * Upon being rate limited, a key will be locked out for this many milliseconds
+ * while we wait for other concurrent requests to finish.
+ */
+const RATE_LIMIT_LOCKOUT = 2000;
+/**
+ * Upon assigning a key, we will wait this many milliseconds before allowing it
+ * to be used again. This is to prevent the queue from flooding a key with too
+ * many requests while we wait to learn whether previous ones succeeded.
+ */
+const KEY_REUSE_DELAY = 500;
+
+export class MistralAIKeyProvider implements KeyProvider<MistralAIKey> {
+  readonly service = "mistral-ai";
+
+  private keys: MistralAIKey[] = [];
+  private checker?: MistralAIKeyChecker;
+  private log = logger.child({ module: "key-provider", service: this.service });
+
+  constructor() {
+    const keyConfig = config.mistralAIKey?.trim();
+    if (!keyConfig) {
+      this.log.warn(
+        "MISTRAL_AI_KEY is not set. Mistral AI API will not be available."
+      );
+      return;
+    }
+    let bareKeys: string[];
+    bareKeys = [...new Set(keyConfig.split(",").map((k) => k.trim()))];
+    for (const key of bareKeys) {
+      const newKey: MistralAIKey = {
+        key,
+        service: this.service,
+        modelFamilies: ["mistral-tiny", "mistral-small", "mistral-medium"],
+        isDisabled: false,
+        isRevoked: false,
+        promptCount: 0,
+        lastUsed: 0,
+        rateLimitedAt: 0,
+        rateLimitedUntil: 0,
+        hash: `mst-${crypto
+          .createHash("sha256")
+          .update(key)
+          .digest("hex")
+          .slice(0, 8)}`,
+        lastChecked: 0,
+        "mistral-tinyTokens": 0,
+        "mistral-smallTokens": 0,
+        "mistral-mediumTokens": 0,
+      };
+      this.keys.push(newKey);
+    }
+    this.log.info({ keyCount: this.keys.length }, "Loaded Mistral AI keys.");
+  }
+
+  public init() {
+    if (config.checkKeys) {
+      const updateFn = this.update.bind(this);
+      this.checker = new MistralAIKeyChecker(this.keys, updateFn);
+      this.checker.start();
+    }
+  }
+
+  public list() {
+    return this.keys.map((k) => Object.freeze({ ...k, key: undefined }));
+  }
+
+  public get(_model: Model) {
+    const availableKeys = this.keys.filter((k) => !k.isDisabled);
+    if (availableKeys.length === 0) {
+      throw new Error("No Mistral AI keys available");
+    }
+
+    // (largely copied from the OpenAI provider, without trial key support)
+    // Select a key, from highest priority to lowest priority:
+    // 1. Keys which are not rate limited
+    //    a. If all keys were rate limited recently, select the least-recently
+    //       rate limited key.
+    // 3. Keys which have not been used in the longest time
+
+    const now = Date.now();
+
+    const keysByPriority = availableKeys.sort((a, b) => {
+      const aRateLimited = now - a.rateLimitedAt < RATE_LIMIT_LOCKOUT;
+      const bRateLimited = now - b.rateLimitedAt < RATE_LIMIT_LOCKOUT;
+
+      if (aRateLimited && !bRateLimited) return 1;
+      if (!aRateLimited && bRateLimited) return -1;
+      if (aRateLimited && bRateLimited) {
+        return a.rateLimitedAt - b.rateLimitedAt;
+      }
+
+      return a.lastUsed - b.lastUsed;
+    });
+
+    const selectedKey = keysByPriority[0];
+    selectedKey.lastUsed = now;
+    this.throttle(selectedKey.hash);
+    return { ...selectedKey };
+  }
+
+  public disable(key: MistralAIKey) {
+    const keyFromPool = this.keys.find((k) => k.hash === key.hash);
+    if (!keyFromPool || keyFromPool.isDisabled) return;
+    keyFromPool.isDisabled = true;
+    this.log.warn({ key: key.hash }, "Key disabled");
+  }
+
+  public update(hash: string, update: Partial<MistralAIKey>) {
+    const keyFromPool = this.keys.find((k) => k.hash === hash)!;
+    Object.assign(keyFromPool, { lastChecked: Date.now(), ...update });
+  }
+
+  public available() {
+    return this.keys.filter((k) => !k.isDisabled).length;
+  }
+
+  public incrementUsage(hash: string, model: string, tokens: number) {
+    const key = this.keys.find((k) => k.hash === hash);
+    if (!key) return;
+    key.promptCount++;
+    const family = getMistralAIModelFamily(model);
+    key[`${family}Tokens`] += tokens;
+  }
+
+  public getLockoutPeriod() {
+    const activeKeys = this.keys.filter((k) => !k.isDisabled);
+    // Don't lock out if there are no keys available or the queue will stall.
+    // Just let it through so the add-key middleware can throw an error.
+    if (activeKeys.length === 0) return 0;
+
+    const now = Date.now();
+    const rateLimitedKeys = activeKeys.filter((k) => now < k.rateLimitedUntil);
+    const anyNotRateLimited = rateLimitedKeys.length < activeKeys.length;
+
+    if (anyNotRateLimited) return 0;
+
+    // If all keys are rate-limited, return the time until the first key is
+    // ready.
+    return Math.min(...activeKeys.map((k) => k.rateLimitedUntil - now));
+  }
+
+  /**
+   * This is called when we receive a 429, which means there are already five
+   * concurrent requests running on this key. We don't have any information on
+   * when these requests will resolve, so all we can do is wait a bit and try
+   * again. We will lock the key for 2 seconds after getting a 429 before
+   * retrying in order to give the other requests a chance to finish.
+   */
+  public markRateLimited(keyHash: string) {
+    this.log.debug({ key: keyHash }, "Key rate limited");
+    const key = this.keys.find((k) => k.hash === keyHash)!;
+    const now = Date.now();
+    key.rateLimitedAt = now;
+    key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
+  }
+
+  public recheck() {}
+
+  /**
+   * Applies a short artificial delay to the key upon dequeueing, in order to
+   * prevent it from being immediately assigned to another request before the
+   * current one can be dispatched.
+   **/
+  private throttle(hash: string) {
+    const now = Date.now();
+    const key = this.keys.find((k) => k.hash === hash)!;
+
+    const currentRateLimit = key.rateLimitedUntil;
+    const nextRateLimit = now + KEY_REUSE_DELAY;
+
+    key.rateLimitedAt = now;
+    key.rateLimitedUntil = Math.max(currentRateLimit, nextRateLimit);
+  }
+}
@@ -8,7 +8,13 @@ import type { Request } from "express";
 * The service that a model is hosted on. Distinct from `APIFormat` because some
 * services have interoperable APIs (eg Anthropic/AWS, OpenAI/Azure).
 */
-export type LLMService = "openai" | "anthropic" | "google-ai" | "aws" | "azure";
+export type LLMService =
+  | "openai"
+  | "anthropic"
+  | "google-ai"
+  | "mistral-ai"
+  | "aws"
+  | "azure";

 export type OpenAIModelFamily =
  | "turbo"
@@ -18,6 +24,10 @@ export type OpenAIModelFamily =
  | "dall-e";
 export type AnthropicModelFamily = "claude";
 export type GoogleAIModelFamily = "gemini-pro";
+export type MistralAIModelFamily =
+  | "mistral-tiny"
+  | "mistral-small"
+  | "mistral-medium";
 export type AwsBedrockModelFamily = "aws-claude";
 export type AzureOpenAIModelFamily = `azure-${Exclude<
  OpenAIModelFamily,
@@ -27,6 +37,7 @@ export type ModelFamily =
  | OpenAIModelFamily
  | AnthropicModelFamily
  | GoogleAIModelFamily
+  | MistralAIModelFamily
  | AwsBedrockModelFamily
  | AzureOpenAIModelFamily;

@@ -40,6 +51,9 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "dall-e",
  "claude",
  "gemini-pro",
+  "mistral-tiny",
+  "mistral-small",
+  "mistral-medium",
  "aws-claude",
  "azure-turbo",
  "azure-gpt4",
@@ -49,7 +63,14 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(

 export const LLM_SERVICES = (<A extends readonly LLMService[]>(
  arr: A & ([LLMService] extends [A[number]] ? unknown : never)
-) => arr)(["openai", "anthropic", "google-ai", "aws", "azure"] as const);
+) => arr)([
+  "openai",
+  "anthropic",
+  "google-ai",
+  "mistral-ai",
+  "aws",
+  "azure",
+] as const);

 export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^gpt-4-1106(-preview)?$": "gpt4-turbo",
@@ -78,6 +99,9 @@ export const MODEL_FAMILY_SERVICE: {
  "azure-gpt4-32k": "azure",
  "azure-gpt4-turbo": "azure",
  "gemini-pro": "google-ai",
+  "mistral-tiny": "mistral-ai",
+  "mistral-small": "mistral-ai",
+  "mistral-medium": "mistral-ai",
 };

 pino({ level: "debug" }).child({ module: "startup" });
@@ -101,6 +125,17 @@ export function getGoogleAIModelFamily(_model: string): ModelFamily {
  return "gemini-pro";
 }

+export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
+  switch (model) {
+    case "mistral-tiny":
+    case "mistral-small":
+    case "mistral-medium":
+      return model;
+    default:
+      return "mistral-tiny";
+  }
+}
+
 export function getAwsBedrockModelFamily(_model: string): ModelFamily {
  return "aws-claude";
 }
@@ -158,6 +193,9 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
      case "google-ai":
        modelFamily = getGoogleAIModelFamily(model);
        break;
+      case "mistral-ai":
+        modelFamily = getMistralAIModelFamily(model);
+        break;
      default:
        assertNever(req.outboundApi);
    }
@@ -25,6 +25,15 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
    case "claude":
      cost = 0.00001102;
      break;
+    case "mistral-tiny":
+      cost = 0.00000031;
+      break;
+    case "mistral-small":
+      cost = 0.00000132;
+      break;
+    case "mistral-medium":
+      cost = 0.0000055;
+      break;
  }
  return cost * Math.max(0, tokens);
 }
@@ -64,6 +64,7 @@ export function makeCompletionSSE({

  switch (format) {
    case "openai":
+    case "mistral-ai":
      event = {
        id: "chatcmpl-" + id,
        object: "chat.completion.chunk",
@@ -0,0 +1,45 @@
+import { MistralAIChatMessage } from "../../proxy/middleware/request/preprocessors/transform-outbound-payload.js";
+import * as tokenizer from "./mistral-tokenizer-js";
+
+export function init() {
+  tokenizer.initializemistralTokenizer();
+  return true;
+}
+
+export function getTokenCount(prompt: MistralAIChatMessage[] | string) {
+  if (typeof prompt === "string") {
+    return getTextTokenCount(prompt);
+  }
+
+  let chunks = [];
+  for (const message of prompt) {
+    switch (message.role) {
+      case "system":
+        chunks.push(message.content);
+        break;
+      case "assistant":
+        chunks.push(message.content + "</s>");
+        break;
+      case "user":
+        chunks.push("[INST] " + message.content + " [/INST]");
+        break;
+    }
+  }
+  return getTextTokenCount(chunks.join(" "));
+}
+
+function getTextTokenCount(prompt: string) {
+  // Don't try tokenizing if the prompt is massive to prevent DoS.
+  // 500k characters should be sufficient for all supported models.
+  if (prompt.length > 500000) {
+    return {
+      tokenizer: "length fallback",
+      token_count: 100000,
+    };
+  }
+
+  return {
+    tokenizer: "mistral-tokenizer-js",
+    token_count: tokenizer.encode(prompt.normalize("NFKC"))!.length,
+  };
+}
@@ -1,6 +1,7 @@
 import { Request } from "express";
 import type {
  GoogleAIChatMessage,
+  MistralAIChatMessage,
  OpenAIChatMessage,
 } from "../../proxy/middleware/request/preprocessors/transform-outbound-payload";
 import { assertNever } from "../utils";
@@ -14,11 +15,16 @@ import {
  getOpenAIImageCost,
  estimateGoogleAITokenCount,
 } from "./openai";
+import {
+  init as initMistralAI,
+  getTokenCount as getMistralAITokenCount,
+} from "./mistral";
 import { APIFormat } from "../key-management";

 export async function init() {
  initClaude();
  initOpenAi();
+  initMistralAI();
 }

 /** Tagged union via `service` field of the different types of requests that can
@@ -31,6 +37,7 @@ type TokenCountRequest = { req: Request } & (
      service: "openai-text" | "anthropic" | "google-ai";
    }
  | { prompt?: GoogleAIChatMessage[]; completion?: never; service: "google-ai" }
+  | { prompt: MistralAIChatMessage[]; completion?: never; service: "mistral-ai" }
  | { prompt?: never; completion: string; service: APIFormat }
  | { prompt?: never; completion?: never; service: "openai-image" }
 );
@@ -77,6 +84,11 @@ export async function countTokens({
        ...estimateGoogleAITokenCount(prompt ?? (completion || [])),
        tokenization_duration_ms: getElapsedMs(time),
      };
+    case "mistral-ai":
+      return {
+        ...getMistralAITokenCount(prompt ?? completion),
+        tokenization_duration_ms: getElapsedMs(time),
+      };
    default:
      assertNever(service);
  }
@@ -15,6 +15,7 @@ import {
  getAzureOpenAIModelFamily,
  getClaudeModelFamily,
  getGoogleAIModelFamily,
+  getMistralAIModelFamily,
  getOpenAIModelFamily,
  MODEL_FAMILIES,
  ModelFamily,
@@ -34,6 +35,9 @@ const INITIAL_TOKENS: Required<UserTokenCounts> = {
  "dall-e": 0,
  claude: 0,
  "gemini-pro": 0,
+  "mistral-tiny": 0,
+  "mistral-small": 0,
+  "mistral-medium": 0,
  "aws-claude": 0,
  "azure-turbo": 0,
  "azure-gpt4": 0,
@@ -399,6 +403,8 @@ function getModelFamilyForQuotaUsage(
      return getClaudeModelFamily(model);
    case "google-ai":
      return getGoogleAIModelFamily(model);
+    case "mistral-ai":
+      return getMistralAIModelFamily(model);
    default:
      assertNever(api);
  }