adds gemini/makersuite keychecker, native endpoint, and streaming fixes

This commit is contained in:
nai-degen
2024-08-03 21:53:32 -05:00
parent e91532f4f7
commit 2f7315379c
17 changed files with 324 additions and 58 deletions
+3 -2
View File
@@ -40,11 +40,11 @@ NODE_ENV=production
# Which model types users are allowed to access.
# The following model families are recognized:
# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-dall-e
# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | dall-e | claude | claude-opus | gemini-flash | gemini-pro | gemini-ultra | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | aws-claude-opus | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-dall-e
# By default, all models are allowed except for 'dall-e' / 'azure-dall-e'.
# To allow DALL-E image generation, uncomment the line below and add 'dall-e' or
# 'azure-dall-e' to the list of allowed model families.
# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o
# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-flash,gemini-pro,gemini-ultra,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o
# Which services can be used to process prompts containing images via multimodal
# models. The following services are recognized:
@@ -144,6 +144,7 @@ NODE_ENV=production
# For AWS credentials, separate the access key ID, secret key, and region with a colon.
OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ANTHROPIC_KEY=sk-ant-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
GOOGLE_AI_KEY=AIzaxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# See `docs/aws-configuration.md` for more information, there may be additional steps required to set up AWS.
AWS_CREDENTIALS=myaccesskeyid:mysecretkey:us-east-1,anotheraccesskeyid:anothersecretkey:us-west-2
# See `docs/azure-configuration.md` for more information, there may be additional steps required to set up Azure.
+2
View File
@@ -428,7 +428,9 @@ export const config: Config = {
"gpt4o",
"claude",
"claude-opus",
"gemini-flash",
"gemini-pro",
"gemini-ultra",
"mistral-tiny",
"mistral-small",
"mistral-medium",
+2
View File
@@ -20,7 +20,9 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
"dall-e": "DALL-E",
claude: "Claude (Sonnet)",
"claude-opus": "Claude (Opus)",
"gemini-flash": "Gemini Flash",
"gemini-pro": "Gemini Pro",
"gemini-ultra": "Gemini Ultra",
"mistral-tiny": "Mistral 7B",
"mistral-small": "Mistral Nemo",
"mistral-medium": "Mistral Medium",
+7
View File
@@ -12,6 +12,7 @@ function getProxyAuthorizationFromRequest(req: Request): string | undefined {
// pass the _proxy_ key in this header too, instead of providing it as a
// Bearer token in the Authorization header. So we need to check both.
// Prefer the Authorization header if both are present.
// Google AI uses a key querystring parameter.
if (req.headers.authorization) {
const token = req.headers.authorization?.slice("Bearer ".length);
@@ -24,6 +25,12 @@ function getProxyAuthorizationFromRequest(req: Request): string | undefined {
delete req.headers["x-api-key"];
return token;
}
if (req.query.key) {
const token = req.query.key?.toString();
delete req.query.key;
return token;
}
return undefined;
}
+68 -10
View File
@@ -16,6 +16,7 @@ import {
ProxyResHandlerWithBody,
} from "./middleware/response";
import { addGoogleAIKey } from "./middleware/request/preprocessors/add-google-ai-key";
import { GoogleAIKey, keyPool } from "../shared/key-management";
let modelsCache: any = null;
let modelsCacheTime = 0;
@@ -30,14 +31,19 @@ const getModelsResponse = () => {
if (!config.googleAIKey) return { object: "list", data: [] };
const googleAIVariants = [
"gemini-pro",
"gemini-1.0-pro",
"gemini-1.5-pro",
"gemini-1.5-pro-latest",
];
const keys = keyPool
.list()
.filter((k) => k.service === "google-ai") as GoogleAIKey[];
if (keys.length === 0) {
modelsCache = { object: "list", data: [] };
modelsCacheTime = new Date().getTime();
return modelsCache;
}
const models = googleAIVariants.map((id) => ({
const modelIds = Array.from(
new Set(keys.map((k) => k.modelIds).flat())
).filter((id) => id.startsWith("models/gemini"));
const models = modelIds.map((id) => ({
id,
object: "model",
created: new Date().getTime(),
@@ -114,7 +120,17 @@ const googleAIProxy = createQueueMiddleware({
},
changeOrigin: true,
selfHandleResponse: true,
logger,
// Prevent logging of the API key by HPM
logger: logger.child(
{},
{
redact: {
paths: ["*"],
censor: (v) =>
typeof v === "string" ? v.replace(/key=\S+/g, "key=xxxxxxx") : v,
},
}
),
on: {
proxyReq: createOnProxyReqHandler({ pipeline: [finalizeSignedRequest] }),
proxyRes: createOnProxyResHandler([googleAIResponseHandler]),
@@ -125,6 +141,22 @@ const googleAIProxy = createQueueMiddleware({
const googleAIRouter = Router();
googleAIRouter.get("/v1/models", handleModelRequest);
// Native Google AI chat completion endpoint
googleAIRouter.post(
"/v1beta/models/:modelId:(generateContent|streamGenerateContent)",
ipLimiter,
createPreprocessorMiddleware(
{
inApi: "google-ai",
outApi: "google-ai",
service: "google-ai",
},
{ afterTransform: [maybeReassignModel, setStreamFlag] }
),
googleAIProxy
);
// OpenAI-to-Google AI compatibility endpoint.
googleAIRouter.post(
"/v1/chat/completions",
@@ -136,12 +168,38 @@ googleAIRouter.post(
googleAIProxy
);
/** Replaces requests for non-Google AI models with gemini-pro-1.5-latest. */
function setStreamFlag(req: Request) {
const isStreaming = req.url.includes("streamGenerateContent");
if (isStreaming) {
req.body.stream = true;
req.isStreaming = true;
} else {
req.body.stream = false;
req.isStreaming = false;
}
}
/**
* Replaces requests for non-Google AI models with gemini-pro-1.5-latest.
* Also strips models/ from the beginning of the model IDs.
**/
function maybeReassignModel(req: Request) {
const requested = req.body.model;
// Ensure model is on body as a lot of middleware will expect it.
const model = req.body.model || req.url.split("/").pop()?.split(":").shift();
if (!model) {
throw new Error("You must specify a model with your request.");
}
req.body.model = model;
const requested = model;
if (requested.startsWith("models/")) {
req.body.model = requested.slice("models/".length);
}
if (requested.includes("gemini")) {
return;
}
req.log.info({ requested }, "Reassigning model to gemini-pro-1.5-latest");
req.body.model = "gemini-pro-1.5-latest";
}
@@ -143,7 +143,7 @@ const handleTestMessage: RequestHandler = (req, res) => {
};
function isTestMessage(body: any) {
const { messages, prompt } = body;
const { messages, prompt, contents } = body;
if (messages) {
return (
@@ -151,6 +151,11 @@ function isTestMessage(body: any) {
messages[0].role === "user" &&
messages[0].content === "Hi"
);
} else if (contents) {
return (
contents.length === 1 &&
contents[0].parts[0]?.text === "Hi"
);
} else {
return (
prompt?.trim() === "Human: Hi\n\nAssistant:" ||
@@ -2,39 +2,38 @@ import { keyPool } from "../../../../shared/key-management";
import { RequestPreprocessor } from "../index";
export const addGoogleAIKey: RequestPreprocessor = (req) => {
const apisValid = req.inboundApi === "openai" && req.outboundApi === "google-ai";
const inboundValid =
req.inboundApi === "openai" || req.inboundApi === "google-ai";
const outboundValid = req.outboundApi === "google-ai";
const serviceValid = req.service === "google-ai";
if (!apisValid || !serviceValid) {
if (!inboundValid || !outboundValid || !serviceValid) {
throw new Error("addGoogleAIKey called on invalid request");
}
if (!req.body?.model) {
throw new Error("You must specify a model with your request.");
}
const model = req.body.model;
req.isStreaming = req.isStreaming || req.body.stream;
req.key = keyPool.get(model, "google-ai");
req.log.info(
{ key: req.key.hash, model },
{ key: req.key.hash, model, stream: req.isStreaming },
"Assigned Google AI API key to request"
);
// https://generativelanguage.googleapis.com/v1beta/models/$MODEL_ID:generateContent?key=$API_KEY
// https://generativelanguage.googleapis.com/v1beta/models/$MODEL_ID:streamGenerateContent?key=${API_KEY}
req.isStreaming = req.isStreaming || req.body.stream;
delete req.body.stream;
const payload = { ...req.body, stream: undefined, model: undefined };
req.signedRequest = {
method: "POST",
protocol: "https:",
hostname: "generativelanguage.googleapis.com",
path: `/v1beta/models/${model}:${req.isStreaming ? "streamGenerateContent" : "generateContent"}?key=${req.key.key}`,
path: `/v1beta/models/${model}:${
req.isStreaming ? "streamGenerateContent" : "generateContent"
}?key=${req.key.key}`,
headers: {
["host"]: `generativelanguage.googleapis.com`,
["content-type"]: "application/json",
},
body: JSON.stringify(req.body),
body: JSON.stringify(payload),
};
};
@@ -143,6 +143,8 @@ export function sendErrorToClient({
res.setHeader("x-oai-proxy-error-status", redactedOpts.statusCode || 500);
}
req.log.info({ statusCode: res.statusCode, isStreaming, format, redactedOpts, event }, "Sending error response");
if (isStreaming) {
if (!res.headersSent) {
initializeSseStream(res);
@@ -223,19 +225,16 @@ export function buildSpoofedCompletion({
// TODO: Native Google AI non-streaming responses are not supported, this
// is an untested guess at what the response should look like.
return {
id: "error-" + id,
object: "chat.completion",
created: Date.now(),
model,
candidates: [
{
content: { parts: [{ text: content }], role: "model" },
content: { parts: [{ text: content }], role: "assistant" },
finishReason: title,
index: 0,
tokenCount: null,
safetyRatings: [],
},
],
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
};
case "openai-image":
return obj;
@@ -302,7 +301,10 @@ export function buildSpoofedSSE({
};
break;
case "google-ai":
return JSON.stringify({
// TODO: google ai supports two streaming transports, SSE and JSON.
// we currently only support SSE.
// return JSON.stringify({
event = {
candidates: [
{
content: { parts: [{ text: content }], role: "model" },
@@ -312,7 +314,8 @@ export function buildSpoofedSSE({
safetyRatings: [],
},
],
});
};
break;
case "openai-image":
return JSON.stringify(obj);
default:
+1 -1
View File
@@ -561,7 +561,7 @@ async function handleGoogleAIBadRequestError(
errorPayload.proxy_note = `Assigned API key is invalid.`;
}
} else if (status === "FAILED_PRECONDITION") {
if (message.includes(/please enable billing/i)) {
if (message.match(/please enable billing/i)) {
req.log.warn(
{ key: req.key?.hash, status, msg: error.message },
"Cannot use key due to billing restrictions."
@@ -116,7 +116,7 @@ export class SSEStreamAdapter extends Transform {
try {
const hasParts = candidates[0].content?.parts?.length > 0;
if (hasParts) {
return `data: ${JSON.stringify(data.value ?? data)}\n`;
return `data: ${JSON.stringify(data.value ?? data)}`;
} else {
this.log.error({ event: data }, "Received bad Google AI event");
return `data: ${buildSpoofedSSE({
+1 -1
View File
@@ -70,7 +70,7 @@ export { proxyRouter as proxyRouter };
function addV1(req: Request, res: Response, next: NextFunction) {
// Clients don't consistently use the /v1 prefix so we'll add it for them.
if (!req.path.startsWith("/v1/")) {
if (!req.path.startsWith("/v1/") && !req.path.startsWith("/v1beta/")) {
req.url = `/v1${req.url}`;
}
next();
+9
View File
@@ -87,6 +87,15 @@ app.use(blacklist);
app.use(checkOrigin);
app.use("/admin", adminRouter);
app.use((req, _, next) => {
// For whatever reason SillyTavern just ignores the path a user provides
// when using Google AI with reverse proxy. We'll fix it here.
if (req.path.startsWith("/v1beta/models/")) {
req.url = `${config.proxyEndpointRoute}/google-ai${req.url}`;
return next();
}
next();
});
app.use(config.proxyEndpointRoute, proxyRouter);
app.use("/user", userRouter);
if (config.staticServiceInfo) {
+9 -8
View File
@@ -5,19 +5,20 @@ import {
} from "./openai";
import { APIFormatTransformer } from "./index";
const GoogleAIV1ContentSchema = z.object({
parts: z.array(z.object({ text: z.string() })), // TODO: add other media types
role: z.enum(["user", "model"]).optional(),
});
// https://developers.generativeai.google/api/rest/generativelanguage/models/generateContent
export const GoogleAIV1GenerateContentSchema = z
.object({
model: z.string().max(100), //actually specified in path but we need it for the router
stream: z.boolean().optional().default(false), // also used for router
contents: z.array(
z.object({
parts: z.array(z.object({ text: z.string() })),
role: z.enum(["user", "model"]),
})
),
contents: z.array(GoogleAIV1ContentSchema),
tools: z.array(z.object({})).max(0).optional(),
safetySettings: z.array(z.object({})).max(0).optional(),
safetySettings: z.array(z.object({})).optional(),
systemInstruction: GoogleAIV1ContentSchema.optional(),
generationConfig: z.object({
temperature: z.number().optional(),
maxOutputTokens: z.coerce
@@ -25,7 +26,7 @@ export const GoogleAIV1GenerateContentSchema = z
.int()
.optional()
.default(16)
.transform((v) => Math.min(v, 1024)), // TODO: Add config
.transform((v) => Math.min(v, 4096)), // TODO: Add config
candidateCount: z.literal(1).optional(),
topP: z.number().optional(),
topK: z.number().optional(),
@@ -0,0 +1,155 @@
import axios, { AxiosError } from "axios";
import type { GoogleAIModelFamily } from "../../models";
import { KeyCheckerBase } from "../key-checker-base";
import type { GoogleAIKey, GoogleAIKeyProvider } from "./provider";
import { getGoogleAIModelFamily } from "../../models";
const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
const KEY_CHECK_PERIOD = 3 * 60 * 60 * 1000; // 3 hours
const LIST_MODELS_URL =
"https://generativelanguage.googleapis.com/v1beta/models";
type ListModelsResponse = {
models: {
name: string;
baseModelId: string;
version: string;
displayName: string;
description: string;
inputTokenLimit: number;
outputTokenLimit: number;
supportedGenerationMethods: string[];
temperature: number;
maxTemperature: number;
topP: number;
topK: number;
}[];
nextPageToken: string;
};
type UpdateFn = typeof GoogleAIKeyProvider.prototype.update;
export class GoogleAIKeyChecker extends KeyCheckerBase<GoogleAIKey> {
constructor(keys: GoogleAIKey[], updateKey: UpdateFn) {
super(keys, {
service: "google-ai",
keyCheckPeriod: KEY_CHECK_PERIOD,
minCheckInterval: MIN_CHECK_INTERVAL,
recurringChecksEnabled: false,
updateKey,
});
}
protected async testKeyOrFail(key: GoogleAIKey) {
const provisionedModels = await this.getProvisionedModels(key);
const updates = {
modelFamilies: provisionedModels,
};
this.updateKey(key.hash, updates);
this.log.info(
{ key: key.hash, models: key.modelFamilies, ids: key.modelIds.length },
"Checked key."
);
}
private async getProvisionedModels(
key: GoogleAIKey
): Promise<GoogleAIModelFamily[]> {
const { data } = await axios.get<ListModelsResponse>(
`${LIST_MODELS_URL}?pageSize=1000&key=${key.key}`
);
const models = data.models;
const ids = new Set<string>();
const families = new Set<GoogleAIModelFamily>();
models.forEach(({ name }) => {
families.add(getGoogleAIModelFamily(name));
ids.add(name);
});
const familiesArray = Array.from(families);
this.updateKey(key.hash, {
modelFamilies: familiesArray,
modelIds: Array.from(ids),
});
return familiesArray;
}
protected handleAxiosError(key: GoogleAIKey, error: AxiosError): void {
if (error.response && GoogleAIKeyChecker.errorIsGoogleAIError(error)) {
const httpStatus = error.response.status;
const { code, message, status, details } = error.response.data.error;
switch (httpStatus) {
case 400:
const reason = details?.[0]?.reason;
if (status === "INVALID_ARGUMENT" && reason === "API_KEY_INVALID") {
this.log.warn(
{ key: key.hash, reason, details },
"Key check returned API_KEY_INVALID error. Disabling key."
);
this.updateKey(key.hash, { isDisabled: true, isRevoked: true });
return;
} else if (
status === "FAILED_PRECONDITION" &&
message.match(/please enable billing/i)
) {
this.log.warn(
{ key: key.hash, message, details },
"Key check returned billing disabled error. Disabling key."
);
this.updateKey(key.hash, { isDisabled: true, isRevoked: true });
return;
}
break;
case 401:
case 403:
this.log.warn(
{ key: key.hash, status, code, message, details },
"Key check returned Forbidden/Unauthorized error. Disabling key."
);
this.updateKey(key.hash, { isDisabled: true, isRevoked: true });
return;
case 429:
this.log.warn(
{ key: key.hash, status, code, message, details },
"Key is rate limited. Rechecking key in 1 minute."
);
const next = Date.now() - (KEY_CHECK_PERIOD - 10 * 1000);
this.updateKey(key.hash, { lastChecked: next });
return;
}
this.log.error(
{ key: key.hash, status, code, message, details },
"Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
);
return this.updateKey(key.hash, { lastChecked: Date.now() });
}
this.log.error(
{ key: key.hash, error: error.message },
"Network error while checking key; trying this key again in a minute."
);
const oneMinute = 10 * 1000;
const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
return this.updateKey(key.hash, { lastChecked: next });
}
static errorIsGoogleAIError(
error: AxiosError
): error is AxiosError<GoogleAIError> {
const data = error.response?.data as any;
return data?.error?.code || data?.error?.status;
}
}
type GoogleAIError = {
error: {
code: string;
message: string;
status: string;
details: any[];
};
};
@@ -2,12 +2,13 @@ import crypto from "crypto";
import { Key, KeyProvider } from "..";
import { config } from "../../../config";
import { logger } from "../../../logger";
import type { GoogleAIModelFamily } from "../../models";
import { HttpError, PaymentRequiredError } from "../../errors";
import { getGoogleAIModelFamily, type GoogleAIModelFamily } from "../../models";
import { PaymentRequiredError } from "../../errors";
import { GoogleAIKeyChecker } from "./checker";
// Note that Google AI is not the same as Vertex AI, both are provided by Google
// but Vertex is the GCP product for enterprise. while Google AI is the
// consumer-ish product. The API is different, and keys are not compatible.
// Note that Google AI is not the same as Vertex AI, both are provided by
// Google but Vertex is the GCP product for enterprise, while Google API is a
// development/hobbyist product. They use completely different APIs and keys.
// https://ai.google.dev/docs/migrate_to_cloud
export type GoogleAIKeyUpdate = Omit<
@@ -31,6 +32,8 @@ export interface GoogleAIKey extends Key, GoogleAIKeyUsage {
rateLimitedAt: number;
/** The time until which this key is rate limited. */
rateLimitedUntil: number;
/** All detected model IDs on this key. */
modelIds: string[];
}
/**
@@ -49,6 +52,7 @@ export class GoogleAIKeyProvider implements KeyProvider<GoogleAIKey> {
readonly service = "google-ai";
private keys: GoogleAIKey[] = [];
private checker?: GoogleAIKeyChecker;
private log = logger.child({ module: "key-provider", service: this.service });
constructor() {
@@ -78,14 +82,22 @@ export class GoogleAIKeyProvider implements KeyProvider<GoogleAIKey> {
.digest("hex")
.slice(0, 8)}`,
lastChecked: 0,
"gemini-flashTokens": 0,
"gemini-proTokens": 0,
"gemini-ultraTokens": 0,
modelIds: [],
};
this.keys.push(newKey);
}
this.log.info({ keyCount: this.keys.length }, "Loaded Google AI keys.");
}
public init() {}
public init() {
if (config.checkKeys) {
this.checker = new GoogleAIKeyChecker(this.keys, this.update.bind(this));
this.checker.start();
}
}
public list() {
return this.keys.map((k) => Object.freeze({ ...k, key: undefined }));
@@ -141,11 +153,11 @@ export class GoogleAIKeyProvider implements KeyProvider<GoogleAIKey> {
return this.keys.filter((k) => !k.isDisabled).length;
}
public incrementUsage(hash: string, _model: string, tokens: number) {
public incrementUsage(hash: string, model: string, tokens: number) {
const key = this.keys.find((k) => k.hash === hash);
if (!key) return;
key.promptCount++;
key["gemini-proTokens"] += tokens;
key[`${getGoogleAIModelFamily(model)}Tokens`] += tokens;
}
public getLockoutPeriod() {
@@ -114,7 +114,8 @@ export abstract class KeyCheckerBase<TKey extends Key> {
);
// Don't check any individual key too often.
// Don't check anything at all at a rate faster than once per 3 seconds.
// Don't check anything at all more frequently than some minimum interval
// even if keys still need to be checked.
const nextCheck = Math.max(
oldestKey.lastChecked + this.keyCheckPeriod,
this.lastCheck + this.minCheckInterval
+14 -3
View File
@@ -23,7 +23,10 @@ export type OpenAIModelFamily =
| "gpt4o"
| "dall-e";
export type AnthropicModelFamily = "claude" | "claude-opus";
export type GoogleAIModelFamily = "gemini-pro";
export type GoogleAIModelFamily =
| "gemini-flash"
| "gemini-pro"
| "gemini-ultra";
export type MistralAIModelFamily =
// mistral changes their model classes frequently so these no longer
// correspond to specific models. consider them rough pricing tiers.
@@ -49,7 +52,9 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"dall-e",
"claude",
"claude-opus",
"gemini-flash",
"gemini-pro",
"gemini-ultra",
"mistral-tiny",
"mistral-small",
"mistral-medium",
@@ -94,7 +99,9 @@ export const MODEL_FAMILY_SERVICE: {
"azure-gpt4-turbo": "azure",
"azure-gpt4o": "azure",
"azure-dall-e": "azure",
"gemini-flash": "google-ai",
"gemini-pro": "google-ai",
"gemini-ultra": "google-ai",
"mistral-tiny": "mistral-ai",
"mistral-small": "mistral-ai",
"mistral-medium": "mistral-ai",
@@ -134,8 +141,12 @@ export function getClaudeModelFamily(model: string): AnthropicModelFamily {
return "claude";
}
export function getGoogleAIModelFamily(_model: string): ModelFamily {
return "gemini-pro";
export function getGoogleAIModelFamily(model: string): GoogleAIModelFamily {
return model.includes("ultra")
? "gemini-ultra"
: model.includes("flash")
? "gemini-flash"
: "gemini-pro";
}
export function getMistralAIModelFamily(model: string): MistralAIModelFamily {