mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 14724a864c | |||
| 5e2b196c4d | |||
| e7686f60b1 | |||
| 380f666d35 | |||
| 3e277b1a35 | |||
| 9bac46ea75 | |||
| 2af4ee7dbe | |||
| 590fc0d021 | |||
| 746b0dad40 | |||
| b327da3ded | |||
| 7a818bdcd0 | |||
| c92ee2e22a | |||
| 632a4a565f | |||
| d712c275a0 | |||
| 1adff7481b | |||
| 393e19dda9 | |||
| 39c5c7c9ba | |||
| e64a5e59ef | |||
| 574c2cf0e3 | |||
| 1d3321b336 | |||
| de25e5822d | |||
| 6a904c9f37 | |||
| 30c3283572 | |||
| 10bba19079 | |||
| 713079f2f2 | |||
| 6e16e989ac | |||
| 4e89e0b1e4 | |||
| 6067c289ab | |||
| 32ebfea9cb | |||
| dec280d54d | |||
| 4823e97783 | |||
| 6a5685995f | |||
| 3b4d5691d7 | |||
| 45c09d021a | |||
| 8ef759fe0f | |||
| c06735fdd2 | |||
| cf4297a1af | |||
| 5d458d68bd | |||
| c3db077ae8 | |||
| 779b265b20 |
@@ -51,8 +51,7 @@ jobs:
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=raw,value=development,enable=${{ github.ref == 'refs/heads/main' }}
|
||||
type=raw,value=stable,enable=${{ github.ref == 'refs/heads/main-stable' }}
|
||||
type=raw,value=stable,enable=${{ github.ref == 'refs/heads/v1-stable' }}
|
||||
type=ref,event=tag # Use the tag name as a tag for tag builds
|
||||
type=semver,pattern={{version}} # Generate semantic versioning tags for tag builds
|
||||
type=sha # Just in case none of the above applies
|
||||
|
||||
@@ -11,18 +11,35 @@ Stay ahead of the curve with big-AGI. 🚀 Pros & Devs love big-AGI. 🤖
|
||||
|
||||
[](https://big-agi.com)
|
||||
|
||||
> 🚀 Big-AGI 2 is launching Q4 2024. Be the first to experience it before the public release.
|
||||
>
|
||||
> 👉 [Apply for Early Access](https://y2rjg0zillz.typeform.com/to/ZSADpr5u?utm_source=gh-stable&utm_medium=readme&utm_campaign=ea2)
|
||||
|
||||
Or fork & run on Vercel
|
||||
|
||||
[](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fenricoros%2Fbig-AGI&env=OPENAI_API_KEY&envDescription=Backend%20API%20keys%2C%20optional%20and%20may%20be%20overridden%20by%20the%20UI.&envLink=https%3A%2F%2Fgithub.com%2Fenricoros%2Fbig-AGI%2Fblob%2Fmain%2Fdocs%2Fenvironment-variables.md&project-name=big-AGI)
|
||||
|
||||
## 👉 [roadmap](https://github.com/users/enricoros/projects/4/views/2) 👉 [installation](docs/installation.md) 👉 [documentation](docs/README.md)
|
||||
### New Version
|
||||
|
||||
> Note: bigger better features (incl. Beam-2) are being cooked outside of `main`.
|
||||
This repository contains two main versions:
|
||||
|
||||
[//]: # (big-AGI is an open book; see the **[ready-to-ship and future ideas](https://github.com/users/enricoros/projects/4/views/2)** in our open roadmap)
|
||||
- Big-AGI 2: next-generation, bringing the most advanced AI experience
|
||||
- `v2-dev`: V2 development branch, the exciting one, future default
|
||||
- Big-AGI Stable: as deployed on big-agi.com
|
||||
- `v1-stable`: Current stable version & Docker 'latest' tag
|
||||
|
||||
### What's New in 1.16.1...1.16.3 · Jun 20, 2024 (patch releases)
|
||||
Note: After the V2 release in Q4, `v2/dev` will become the default branch and `v1/dev` will reach EOL.
|
||||
|
||||
### Quick links: 👉 [roadmap](https://github.com/users/enricoros/projects/4/views/2) 👉 [installation](docs/installation.md) 👉 [documentation](docs/README.md)
|
||||
|
||||
### What's New in 1.16.1...1.16.9 · Jan 21, 2025 (patch releases)
|
||||
|
||||
- 1.16.9: Docker Gemini fix (R1 models are supported in Big-AGI 2)
|
||||
- 1.16.8: OpenAI ChatGPT-4o Latest (o1 models are supported in Big-AGI 2)
|
||||
- 1.16.7: OpenAI support for GPT-4o 2024-08-06
|
||||
- 1.16.6: Groq support for Llama 3.1 models
|
||||
- 1.16.5: GPT-4o Mini support
|
||||
- 1.16.4: 8192 tokens support for Claude 3.5 Sonnet
|
||||
- 1.16.3: Anthropic Claude 3.5 Sonnet model support
|
||||
- 1.16.2: Improve web downloads, as text, markdwon, or HTML
|
||||
- 1.16.2: Proper support for Gemini models
|
||||
|
||||
+7
-1
@@ -10,8 +10,14 @@ by release.
|
||||
- milestone: [1.17.0](https://github.com/enricoros/big-agi/milestone/17)
|
||||
- work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
|
||||
|
||||
### What's New in 1.16.1...1.16.3 · Jun 20, 2024 (patch releases)
|
||||
### What's New in 1.16.1...1.16.9 · Jan 21, 2025 (patch releases)
|
||||
|
||||
- 1.16.9: Docker Gemini fix (R1 models are supported in Big-AGI 2)
|
||||
- 1.16.8: OpenAI ChatGPT-4o Latest (o1 models are supported in Big-AGI 2)
|
||||
- 1.16.7: OpenAI support for GPT-4o 2024-08-06
|
||||
- 1.16.6: Groq support for Llama 3.1 models
|
||||
- 1.16.5: GPT-4o Mini support
|
||||
- 1.16.4: 8192 tokens support for Claude 3.5 Sonnet
|
||||
- 1.16.3: Anthropic Claude 3.5 Sonnet model support
|
||||
- 1.16.2: Improve web downloads, as text, markdwon, or HTML
|
||||
- 1.16.2: Proper support for Gemini models
|
||||
|
||||
Generated
+1
-1
@@ -79,7 +79,7 @@
|
||||
"typescript": "^5.4.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^20.0.0 || ^18.0.0"
|
||||
"node": "^22.0.0 || ^20.0.0 || ^18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/code-frame": {
|
||||
|
||||
+1
-1
@@ -88,6 +88,6 @@
|
||||
"typescript": "^5.4.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^20.0.0 || ^18.0.0"
|
||||
"node": "^22.0.0 || ^20.0.0 || ^18.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,6 +280,7 @@ export function ChatMessage(props: {
|
||||
const wasEdited = !!messageUpdated;
|
||||
|
||||
const textSel = selText ? selText : messageText;
|
||||
// WARNING: if you get an issue here, you're downgrading from the new Big-AGI 2 data format to 1.x.
|
||||
const isSpecialT2I = textSel.startsWith('https://images.prodia.xyz/') || textSel.startsWith('/draw ') || textSel.startsWith('/imagine ') || textSel.startsWith('/img ');
|
||||
const couldDiagram = textSel.length >= 100 && !isSpecialT2I;
|
||||
const couldImagine = textSel.length >= 3 && !isSpecialT2I;
|
||||
|
||||
@@ -14,6 +14,7 @@ import { capitalizeFirstLetter } from '~/common/util/textUtils';
|
||||
|
||||
import { NewsItems } from './news.data';
|
||||
import { beamNewsCallout } from './beam.data';
|
||||
import { bigAgi2NewsCallout } from './bigAgi2.data';
|
||||
|
||||
|
||||
// number of news items to show by default, before the expander
|
||||
@@ -110,6 +111,13 @@ export function AppNews() {
|
||||
const addPadding = false; //!firstCard; // || showExpander;
|
||||
return <React.Fragment key={idx}>
|
||||
|
||||
{/* Inject the Big-AGI 2.0 item here*/}
|
||||
{idx === 0 && (
|
||||
<Box sx={{ mb: 3 }}>
|
||||
{bigAgi2NewsCallout}
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{/* Inject the Beam item here*/}
|
||||
{idx === 2 && (
|
||||
<Box sx={{ mb: 3 }}>
|
||||
|
||||
@@ -2,7 +2,6 @@ import * as React from 'react';
|
||||
|
||||
import { Button, Card, CardContent, Grid, Typography } from '@mui/joy';
|
||||
import LaunchIcon from '@mui/icons-material/Launch';
|
||||
import ThumbUpRoundedIcon from '@mui/icons-material/ThumbUpRounded';
|
||||
|
||||
import { Link } from '~/common/components/Link';
|
||||
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
import * as React from 'react';
|
||||
|
||||
import { Button, Card, CardContent, Grid, Typography } from '@mui/joy';
|
||||
import AccessTimeIcon from '@mui/icons-material/AccessTime';
|
||||
import LaunchIcon from '@mui/icons-material/Launch';
|
||||
|
||||
import { Link } from '~/common/components/Link';
|
||||
|
||||
|
||||
const bigAgi2SurveyUrl = 'https://y2rjg0zillz.typeform.com/to/ZSADpr5u?utm_source=gh-stable&utm_medium=news&utm_campaign=ea2';
|
||||
|
||||
export const bigAgi2NewsCallout =
|
||||
<Card variant='solid' invertedColors>
|
||||
<CardContent sx={{ gap: 2 }}>
|
||||
<Typography level='title-lg'>
|
||||
Big-AGI 2.0 - In Development
|
||||
</Typography>
|
||||
<Typography level='body-sm'>
|
||||
We're building the next version of Big-AGI with your needs in mind. New features, better performance, enhanced AI interactions. Help us shape it.
|
||||
</Typography>
|
||||
<Grid container spacing={1}>
|
||||
<Grid xs={12} sm={7}>
|
||||
<Button
|
||||
fullWidth variant='soft' color='primary' endDecorator={<LaunchIcon />}
|
||||
component={Link} href={bigAgi2SurveyUrl} noLinkStyle target='_blank'
|
||||
>
|
||||
Apply for Early Access
|
||||
</Button>
|
||||
</Grid>
|
||||
<Grid xs={12} sm={5} sx={{ display: 'flex', flexAlign: 'center', justifyContent: 'center' }}>
|
||||
<Button
|
||||
fullWidth variant='outlined' color='primary' startDecorator={<AccessTimeIcon />}
|
||||
disabled
|
||||
>
|
||||
Coming Fall 2024
|
||||
</Button>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>;
|
||||
@@ -61,7 +61,7 @@ export const NewsItems: NewsItem[] = [
|
||||
]
|
||||
}*/
|
||||
{
|
||||
versionCode: '1.16.3',
|
||||
versionCode: '1.16.9',
|
||||
versionName: 'Crystal Clear',
|
||||
versionDate: new Date('2024-06-07T05:00:00Z'),
|
||||
// versionDate: new Date('2024-05-13T19:00:00Z'),
|
||||
@@ -80,6 +80,13 @@ export const NewsItems: NewsItem[] = [
|
||||
{ text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B></> },
|
||||
{ text: <>1.16.2: Proper <B>Gemini</B> support, <B>HTML/Markdown</B> downloads, and latest <B>Mistral</B></> },
|
||||
{ text: <>1.16.3: Support for <B href='https://www.anthropic.com/news/claude-3-5-sonnet'>Claude 3.5 Sonnet</B> (refresh your <B>Anthropic</B> models)</> },
|
||||
{ text: <>1.16.4: <B>8192 tokens</B> support for Claude 3.5 Sonnet</> },
|
||||
{ text: <>1.16.5: OpenAI <B>GPT-4o Mini</B> support</> },
|
||||
{ text: <>1.16.6: Groq <B>Llama 3.1</B> support</> },
|
||||
{ text: <>1.16.7: Gpt-4o <B>2024-08-06</B></> },
|
||||
{ text: <>1.16.8: <B>ChatGPT-4o</B> latest</> },
|
||||
{ text: <>1.16.9: <B>Gemini</B> fixes</> },
|
||||
{ text: <>OpenAI <B>o1</B>, DeepSeek R1, and newer models require Big-AGI 2. <B href='https://y2rjg0zillz.typeform.com/to/ZSADpr5u?utm_source=gh-stable&utm_medium=news&utm_campaign=ea2'>Sign up here</B></> },
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -7,7 +7,7 @@ import { useAppStateStore } from '~/common/state/store-appstate';
|
||||
|
||||
|
||||
// update this variable every time you want to broadcast a new version to clients
|
||||
export const incrementalNewsVersion: number = 16.1; // not notifying for 16.3
|
||||
export const incrementalNewsVersion: number = 16.1; // not notifying for 1.16.9
|
||||
|
||||
|
||||
interface NewsState {
|
||||
|
||||
@@ -8,8 +8,11 @@ export function prettyBaseModel(model: string | undefined): string {
|
||||
if (!model) return '';
|
||||
if (model.includes('gpt-4-vision-preview')) return 'GPT-4 Vision';
|
||||
if (model.includes('gpt-4-1106-preview')) return 'GPT-4 Turbo';
|
||||
if (model.includes('gpt-4-32k')) return 'gpt-4-32k';
|
||||
if (model.includes('gpt-4')) return 'gpt-4';
|
||||
if (model.includes('gpt-4-32k')) return 'GPT-4-32k';
|
||||
if (model.includes('gpt-4o-mini')) return 'GPT-4o Mini';
|
||||
if (model.includes('gpt-4o')) return 'GPT-4o';
|
||||
if (model.includes('gpt-4-turbo')) return 'GPT-4 Turbo';
|
||||
if (model.includes('gpt-4')) return 'GPT-4';
|
||||
if (model.includes('gpt-3.5-turbo-instruct')) return '3.5 Turbo Instruct';
|
||||
if (model.includes('gpt-3.5-turbo-1106')) return '3.5 Turbo 16k';
|
||||
if (model.includes('gpt-3.5-turbo-16k')) return '3.5 Turbo 16k';
|
||||
|
||||
@@ -22,22 +22,29 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
// },
|
||||
// },
|
||||
{
|
||||
id: 'claude-3-5-sonnet-20240620',
|
||||
id: 'claude-3-5-sonnet-20241022',
|
||||
label: 'Claude 3.5 Sonnet',
|
||||
created: roundTime('2024-06-20 06:00'),
|
||||
description: 'The most intelligent Claude model',
|
||||
created: roundTime('2024-10-22 06:00'),
|
||||
description: 'Most intelligent Claude model to date',
|
||||
contextWindow: 200000, // Characters
|
||||
maxCompletionTokens: 4096,
|
||||
maxCompletionTokens: 8192,
|
||||
trainingDataCutoff: 'Apr 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: { chatIn: 3, chatOut: 15 },
|
||||
benchmark: {
|
||||
heCode: 92.0,
|
||||
vqaMmmu: 68.3,
|
||||
// TODO: Update with official benchmarks when available
|
||||
cbaElo: 1256 - 1, // Placeholder
|
||||
cbaMmlu: 86.8 - 1, // Placeholder
|
||||
},
|
||||
benchmark: { cbaElo: 1269, cbaMmlu: 88.7 }, // moved from 3.5 Sonnet (Previous Version), TO UPDATE!!
|
||||
},
|
||||
{
|
||||
id: 'claude-3-5-sonnet-20240620',
|
||||
label: 'Claude 3.5 Sonnet (Previous)',
|
||||
created: roundTime('2024-06-20 06:00'),
|
||||
description: 'The most intelligent Claude model',
|
||||
contextWindow: 200000, // Characters
|
||||
maxCompletionTokens: 8192,
|
||||
trainingDataCutoff: 'Apr 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: { chatIn: 3, chatOut: 15 },
|
||||
benchmark: { cbaElo: 1269 - 0.1, cbaMmlu: 88.7 - 0.1 },
|
||||
hidden: true,
|
||||
},
|
||||
// {
|
||||
// id: 'claude-3.5-haiku', // ...
|
||||
@@ -81,6 +88,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
pricing: { chatIn: 3, chatOut: 15 },
|
||||
benchmark: { cbaElo: 1203, cbaMmlu: 79 },
|
||||
hidden: true,
|
||||
isLegacy: true,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-haiku-20240307',
|
||||
@@ -131,24 +139,4 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
|
||||
pricing: { chatIn: 0.8, chatOut: 2.4 },
|
||||
hidden: true,
|
||||
},
|
||||
// {
|
||||
// id: 'claude-instant-1.1',
|
||||
// label: 'Claude Instant 1.1',
|
||||
// created: roundTime('2023-03-14'),
|
||||
// description: 'Precise and fast',
|
||||
// contextWindow: 100000,
|
||||
// maxCompletionTokens: 2048,
|
||||
// interfaces: [LLM_IF_OAI_Chat],
|
||||
// hidden: true,
|
||||
// },
|
||||
// {
|
||||
// id: 'claude-1.3',
|
||||
// label: 'Claude 1.3',
|
||||
// created: roundTime('2023-03-14'),
|
||||
// description: 'Claude 1.3 is the latest version of Claude v1',
|
||||
// contextWindow: 100000,
|
||||
// maxCompletionTokens: 4096,
|
||||
// interfaces: [LLM_IF_OAI_Chat],
|
||||
// hidden: true,
|
||||
// },
|
||||
];
|
||||
@@ -17,7 +17,9 @@ import { hardcodedAnthropicModels } from './anthropic.models';
|
||||
// Default hosts
|
||||
const DEFAULT_API_VERSION_HEADERS = {
|
||||
'anthropic-version': '2023-06-01',
|
||||
'anthropic-beta': 'messages-2023-12-15',
|
||||
// Former Betas:
|
||||
// - messages-2023-12-15: to use the Messages API
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
|
||||
};
|
||||
const DEFAULT_MAX_TOKENS = 2048;
|
||||
const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { GeminiModelSchema } from './gemini.wiretypes';
|
||||
import type { ModelDescriptionSchema } from '../llm.server.types';
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '../../store-llms';
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '../../store-llms';
|
||||
|
||||
|
||||
// dev options
|
||||
@@ -12,7 +12,21 @@ const geminiChatInterfaces: GeminiModelSchema['supportedGenerationMethods'] = ['
|
||||
|
||||
// unsupported interfaces
|
||||
const filterUnallowedNames = ['Legacy'];
|
||||
const filterUnallowedInterfaces: GeminiModelSchema['supportedGenerationMethods'] = ['generateAnswer', 'embedContent', 'embedText'];
|
||||
const filterUnallowedInterfaces: GeminiModelSchema['supportedGenerationMethods'] = [
|
||||
'generateAnswer', // e.g. removes "models/aqa"
|
||||
'embedContent', // e.g. removes "models/embedding-001"
|
||||
'embedText', // e.g. removes "models/text-embedding-004"
|
||||
'predict', // e.g. removes "models/imagen-3.0-generate-002" (appeared on 2025-02-09)
|
||||
];
|
||||
const filterLyingModelNames: GeminiModelSchema['name'][] = [
|
||||
// 2025-02-27: verified, old model is no more
|
||||
'models/gemini-2.0-flash-exp', // verified, replaced by gemini-2.0-flash, which is non-free anymore
|
||||
|
||||
// 2025-02-09 update: as of now they cleared the list, so we restart
|
||||
// 2024-12-10: name of models that are not what they say they are (e.g. 1114 is actually 1121 as of )
|
||||
'models/gemini-1.5-flash-8b-exp-0924', // replaced by non-free
|
||||
'models/gemini-1.5-flash-8b-exp-0827', // replaced by non-free
|
||||
];
|
||||
|
||||
|
||||
/* Manual models details
|
||||
@@ -20,175 +34,405 @@ const filterUnallowedInterfaces: GeminiModelSchema['supportedGenerationMethods']
|
||||
- Latest version gemini-1.0-pro-latest <model>-<generation>-<variation>-latest
|
||||
- Latest stable version gemini-1.0-pro <model>-<generation>-<variation>
|
||||
- Stable versions gemini-1.0-pro-001 <model>-<generation>-<variation>-<version>
|
||||
|
||||
Gemini capabilities chart (updated 2024-10-01):
|
||||
- [table stakes] System instructions
|
||||
- JSON Mode, with optional JSON Schema [NOTE: JSON Schema is poorly supported?]
|
||||
- Adjustable Safety Settings
|
||||
- Caching
|
||||
- Tuning
|
||||
- [good] Function calling, with configuration
|
||||
- [great] Code execution
|
||||
*/
|
||||
|
||||
// Experimental Gemini models are Free of charge
|
||||
const geminiExpPricingFree: ModelDescriptionSchema['pricing'] = {
|
||||
// input: 'free', output: 'free',
|
||||
};
|
||||
|
||||
const gemini20FlashPricing: ModelDescriptionSchema['pricing'] = {
|
||||
chatIn: 0.10, // inputAudio: 0.70,
|
||||
chatOut: 0.40,
|
||||
};
|
||||
|
||||
const gemini20FlashLitePricing: ModelDescriptionSchema['pricing'] = {
|
||||
chatIn: 0.075,
|
||||
chatOut: 0.30,
|
||||
};
|
||||
|
||||
const gemini15FlashPricing: ModelDescriptionSchema['pricing'] = {
|
||||
chatIn: 0.075,
|
||||
chatOut: 0.30,
|
||||
};
|
||||
|
||||
const gemini15Flash8BPricing: ModelDescriptionSchema['pricing'] = {
|
||||
chatIn: 0.0375,
|
||||
chatOut: 0.15,
|
||||
};
|
||||
|
||||
const gemini15ProPricing: ModelDescriptionSchema['pricing'] = {
|
||||
chatIn: 1.25,
|
||||
chatOut: 5.00,
|
||||
};
|
||||
|
||||
|
||||
const _knownGeminiModels: ({
|
||||
id: string,
|
||||
labelOverride?: string,
|
||||
isNewest?: boolean,
|
||||
isPreview?: boolean
|
||||
symLink?: string
|
||||
} & Pick<ModelDescriptionSchema, 'interfaces' | 'pricing' | 'trainingDataCutoff' | 'hidden'>)[] = [
|
||||
isPreview?: boolean,
|
||||
symLink?: string,
|
||||
deprecated?: string, // Gemini may provide deprecation dates
|
||||
_delete?: boolean, // some gemini models are not acknowledged by Google Docs anymore, and leaving them in the list will confuse users
|
||||
} & Pick<ModelDescriptionSchema, 'interfaces' | 'pricing' | 'hidden' | 'benchmark'>)[] = [
|
||||
|
||||
// Generation 1.5
|
||||
/// Generation 2.5
|
||||
|
||||
// 2.5 Pro Experimental
|
||||
{
|
||||
id: 'models/gemini-2.5-pro-exp-03-25',
|
||||
isPreview: true,
|
||||
pricing: geminiExpPricingFree,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1443 },
|
||||
},
|
||||
|
||||
/// Generation 2.0
|
||||
|
||||
// 2.0 Experimental - Pro
|
||||
{
|
||||
hidden: true, // showing the 2.5 instead
|
||||
id: 'models/gemini-2.0-pro-exp-02-05', // Base model: Gemini 2.0 Pro
|
||||
isPreview: true,
|
||||
pricing: geminiExpPricingFree,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1380 },
|
||||
},
|
||||
{
|
||||
hidden: true, // only keeping the latest
|
||||
id: 'models/gemini-2.0-pro-exp',
|
||||
symLink: 'models/gemini-2.0-pro-exp-02-05',
|
||||
// copied from symlink
|
||||
isPreview: true,
|
||||
pricing: geminiExpPricingFree,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1380 },
|
||||
},
|
||||
{
|
||||
_delete: true, // replaced by gemini-2.0-pro-exp-02-05, 2025-02-27: verified, old model is no more
|
||||
id: 'models/gemini-exp-1206',
|
||||
labelOverride: 'Gemini 2.0 Pro Experimental 1206',
|
||||
isPreview: true,
|
||||
pricing: geminiExpPricingFree,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1373 },
|
||||
},
|
||||
|
||||
// 2.0 Experimental - Flash Thinking
|
||||
{
|
||||
hidden: true, // only keeping the latest
|
||||
id: 'models/gemini-2.0-flash-thinking-exp', // alias to the latest Flash Thinking model
|
||||
labelOverride: 'Gemini 2.0 Flash Thinking Experimental',
|
||||
symLink: 'models/gemini-2.0-flash-thinking-exp-01-21',
|
||||
// copied from symlink
|
||||
pricing: geminiExpPricingFree,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
benchmark: { cbaElo: 1385 },
|
||||
isPreview: true,
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-2.0-flash-thinking-exp-01-21',
|
||||
pricing: geminiExpPricingFree,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
benchmark: { cbaElo: 1385 },
|
||||
isPreview: true,
|
||||
},
|
||||
{
|
||||
hidden: true, // replaced by gemini-2.0-flash-thinking-exp-01-21 - 2025-02-27: seems still different on the API, hence no deletion yet
|
||||
id: 'models/gemini-2.0-flash-thinking-exp-1219',
|
||||
labelOverride: 'Gemini 2.0 Flash Thinking Experimental 12-19',
|
||||
pricing: geminiExpPricingFree,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
benchmark: { cbaElo: 1363 },
|
||||
isPreview: true,
|
||||
},
|
||||
|
||||
// 2.0 Experimental - Flash Image Generation
|
||||
{
|
||||
id: 'models/gemini-2.0-flash-exp-image-generation',
|
||||
// labelOverride: 'Gemini 2.0 Flash Native Image Generation',
|
||||
pricing: geminiExpPricingFree,
|
||||
interfaces: [
|
||||
LLM_IF_OAI_Chat, LLM_IF_OAI_Vision,
|
||||
// LLM_IF_HOTFIX_StripSys0, // This first Gemini Image Generation model does not support the developer instruction
|
||||
],
|
||||
isPreview: true,
|
||||
},
|
||||
|
||||
// 2.0 Flash
|
||||
{
|
||||
id: 'models/gemini-2.0-flash-001',
|
||||
pricing: gemini20FlashPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1358 },
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-2.0-flash',
|
||||
symLink: 'models/gemini-2.0-flash-001',
|
||||
// copied from symlink
|
||||
pricing: gemini20FlashPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1358 },
|
||||
},
|
||||
|
||||
// 2.0 Flash Lite
|
||||
{
|
||||
id: 'models/gemini-2.0-flash-lite',
|
||||
pricing: gemini20FlashLitePricing,
|
||||
symLink: 'models/gemini-2.0-flash-lite-001',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1309 },
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-2.0-flash-lite-001',
|
||||
pricing: gemini20FlashLitePricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1309 },
|
||||
},
|
||||
{
|
||||
hidden: true, // discouraged, as the official is out
|
||||
id: 'models/gemini-2.0-flash-lite-preview-02-05',
|
||||
isPreview: true,
|
||||
pricing: gemini20FlashLitePricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1309 },
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-2.0-flash-lite-preview',
|
||||
symLink: 'models/gemini-2.0-flash-lite-preview-02-05',
|
||||
// coped from symlink
|
||||
isPreview: true,
|
||||
pricing: gemini20FlashLitePricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1309 },
|
||||
},
|
||||
|
||||
|
||||
/// Generation 1.5
|
||||
|
||||
// Gemini 1.5 Flash Models
|
||||
{
|
||||
id: 'models/gemini-1.5-flash-latest', // updated regularly and might be a preview version
|
||||
isNewest: true,
|
||||
isPreview: true,
|
||||
pricing: {
|
||||
chatIn: 0.70, // 0.35 up to 128k tokens, 0.70 prompts > 128k tokens
|
||||
chatOut: 2.10, // 1.05 up to 128k tokens, 2.10 prompts > 128k tokens
|
||||
},
|
||||
trainingDataCutoff: 'May 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // input: audio, images and text
|
||||
pricing: gemini15FlashPricing,
|
||||
// symLink: '-002 or newer',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-flash',
|
||||
// copied from above
|
||||
pricing: {
|
||||
chatIn: 0.70, // 0.35 up to 128k tokens, 0.70 prompts > 128k tokens
|
||||
chatOut: 2.10, // 1.05 up to 128k tokens, 2.10 prompts > 128k tokens
|
||||
},
|
||||
trainingDataCutoff: 'Apr 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // input: audio, images and text
|
||||
// Defaults to version 002 on Oct 8, 2024
|
||||
symLink: 'models/gemini-1.5-flash-002',
|
||||
pricing: gemini15FlashPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1271 },
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-flash-002', // new stable version
|
||||
pricing: gemini15FlashPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1271 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-flash-001',
|
||||
// copied from above
|
||||
pricing: {
|
||||
chatIn: 0.70, // 0.35 up to 128k tokens, 0.70 prompts > 128k tokens
|
||||
chatOut: 2.10, // 1.05 up to 128k tokens, 2.10 prompts > 128k tokens
|
||||
},
|
||||
trainingDataCutoff: 'Apr 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // input: audio, images and text
|
||||
id: 'models/gemini-1.5-flash-001', // previous stable version
|
||||
pricing: gemini15FlashPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1227 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-flash-001-tuning', // supports model tuning
|
||||
pricing: gemini15FlashPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn /* Tuning ... */],
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
// Gemini 1.5 Flash-8B Models
|
||||
{
|
||||
id: 'models/gemini-1.5-pro-latest', // updated regularly and might be a preview version
|
||||
isNewest: true,
|
||||
isPreview: true,
|
||||
pricing: {
|
||||
chatIn: 7.00, // $3.50 / 1 million tokens (for prompts up to 128K tokens), $7.00 / 1 million tokens (for prompts longer than 128K)
|
||||
chatOut: 21.00, // $10.50 / 1 million tokens (128K or less), $21.00 / 1 million tokens (128K+)
|
||||
},
|
||||
trainingDataCutoff: 'May 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // input: audio, images and text
|
||||
id: 'models/gemini-1.5-flash-8b-latest',
|
||||
isPreview: false,
|
||||
pricing: gemini15Flash8BPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-pro', // latest stable -> 001
|
||||
// copied from above
|
||||
pricing: {
|
||||
chatIn: 7.00, // $3.50 / 1 million tokens (for prompts up to 128K tokens), $7.00 / 1 million tokens (for prompts longer than 128K)
|
||||
chatOut: 21.00, // $10.50 / 1 million tokens (128K or less), $21.00 / 1 million tokens (128K+)
|
||||
},
|
||||
trainingDataCutoff: 'Apr 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json],
|
||||
id: 'models/gemini-1.5-flash-8b',
|
||||
symLink: 'models/gemini-1.5-flash-8b-001',
|
||||
pricing: gemini15Flash8BPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1213 },
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-flash-8b-001',
|
||||
pricing: gemini15Flash8BPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1213 },
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
// Gemini 1.5 Pro Models
|
||||
{
|
||||
id: 'models/gemini-1.5-pro-latest', // updated to latest stable version
|
||||
pricing: gemini15ProPricing,
|
||||
// symLink: '-002 or newer',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-pro',
|
||||
symLink: 'models/gemini-1.5-pro-002',
|
||||
pricing: gemini15ProPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1302 },
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-pro-002',
|
||||
pricing: gemini15ProPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1302 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.5-pro-001', // stable snapshot
|
||||
// copied from above
|
||||
pricing: {
|
||||
chatIn: 7.00, // $3.50 / 1 million tokens (for prompts up to 128K tokens), $7.00 / 1 million tokens (for prompts longer than 128K)
|
||||
chatOut: 21.00, // $10.50 / 1 million tokens (128K or less), $21.00 / 1 million tokens (128K+)
|
||||
},
|
||||
trainingDataCutoff: 'Apr 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json],
|
||||
id: 'models/gemini-1.5-pro-001',
|
||||
pricing: gemini15ProPricing,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
|
||||
benchmark: { cbaElo: 1260 },
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
|
||||
// Generation 1.0
|
||||
{
|
||||
id: 'models/gemini-1.0-pro-latest',
|
||||
pricing: {
|
||||
chatIn: 0.50,
|
||||
chatOut: 1.50,
|
||||
},
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.0-pro',
|
||||
pricing: {
|
||||
chatIn: 0.50,
|
||||
chatOut: 1.50,
|
||||
},
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-1.0-pro-001',
|
||||
pricing: {
|
||||
chatIn: 0.50,
|
||||
chatOut: 1.50,
|
||||
},
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
/// Generation 1.0
|
||||
|
||||
// Generation 1.0 + Vision
|
||||
// Gemini 1.0 Pro Vision Model
|
||||
{
|
||||
id: 'models/gemini-1.0-pro-vision-latest',
|
||||
pricing: {
|
||||
chatIn: 0.50,
|
||||
chatOut: 1.50,
|
||||
},
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], // Text and Images
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
// Older symlinks
|
||||
{
|
||||
id: 'models/gemini-pro',
|
||||
symLink: 'models/gemini-1.0-pro',
|
||||
// copied from symlinked
|
||||
pricing: {
|
||||
chatIn: 0.50,
|
||||
chatOut: 1.50,
|
||||
},
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
hidden: true,
|
||||
_delete: true, // confusing
|
||||
},
|
||||
{
|
||||
id: 'models/gemini-pro-vision',
|
||||
// copied from symlinked
|
||||
symLink: 'models/gemini-1.0-pro-vision',
|
||||
pricing: {
|
||||
chatIn: 0.50,
|
||||
chatOut: 1.50,
|
||||
},
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], // Text and Images
|
||||
hidden: true,
|
||||
_delete: true, // confusing
|
||||
},
|
||||
|
||||
|
||||
/// Experimental
|
||||
|
||||
// LearnLM Experimental Model
|
||||
{
|
||||
id: 'models/learnlm-1.5-pro-experimental',
|
||||
isPreview: true,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: geminiExpPricingFree,
|
||||
// hidden: true,
|
||||
// _delete: true,
|
||||
},
|
||||
|
||||
{
|
||||
id: 'models/gemma-3-27b-it',
|
||||
isPreview: true,
|
||||
interfaces: [
|
||||
LLM_IF_OAI_Chat,
|
||||
// LLM_IF_HOTFIX_StripImages, /* "Image input modality is not enabled for models/gemma-3-27b-it" */
|
||||
// LLM_IF_HOTFIX_Sys0ToUsr0, /* "Developer instruction is not enabled for models/gemma-3-27b-it" */
|
||||
],
|
||||
// pricing: geminiExpPricingFree,
|
||||
// hidden: true,
|
||||
// _delete: true,
|
||||
},
|
||||
|
||||
];
|
||||
|
||||
|
||||
export function geminiFilterModels(geminiModel: GeminiModelSchema): boolean {
|
||||
const isAllowed = !filterUnallowedNames.some(name => geminiModel.displayName.includes(name));
|
||||
const isSupported = !filterUnallowedInterfaces.some(iface => geminiModel.supportedGenerationMethods.includes(iface));
|
||||
return isAllowed && isSupported;
|
||||
const isWhatItSaysItIs = !filterLyingModelNames.includes(geminiModel.name);
|
||||
return isAllowed && isSupported && isWhatItSaysItIs;
|
||||
}
|
||||
|
||||
|
||||
const _sortOderIdPrefix: string[] = [
|
||||
'models/gemini-exp',
|
||||
'models/gemini-2.5-pro',
|
||||
'models/gemini-2.0-pro',
|
||||
'models/gemini-2.0-flash-exp-image-generation',
|
||||
'models/gemini-2.0-flash-thinking',
|
||||
'models/gemini-2.0-flash-0',
|
||||
'models/gemini-2.0-flash',
|
||||
'models/gemini-2.0-flash-lite',
|
||||
'models/gemini-1.5-pro',
|
||||
'models/gemini-1.5-flash',
|
||||
'models/gemini-1.5-flash-8b',
|
||||
'models/gemini-1.0-pro',
|
||||
'models/gemini-pro',
|
||||
'models/gemma',
|
||||
'models/learnlm',
|
||||
] as const;
|
||||
|
||||
export function geminiSortModels(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
|
||||
// links to the bottom
|
||||
const aIsLink = a.label.startsWith('🔗');
|
||||
const bIsLink = b.label.startsWith('🔗');
|
||||
if (aIsLink && !bIsLink) return 1;
|
||||
if (!aIsLink && bIsLink) return -1;
|
||||
|
||||
// hidden to the bottom, then names descending
|
||||
if (a.hidden && !b.hidden) return 1;
|
||||
if (!a.hidden && b.hidden) return -1;
|
||||
// if (a.hidden && !b.hidden) return 1;
|
||||
// if (!a.hidden && b.hidden) return -1;
|
||||
|
||||
// models beginning with 'gemini-' to the top
|
||||
// const aGemini = a.label.startsWith('Gemini');
|
||||
// const bGemini = b.label.startsWith('Gemini');
|
||||
// if (aGemini && !bGemini) return -1;
|
||||
// if (!aGemini && bGemini) return 1;
|
||||
|
||||
// sort by sortOrderIdPrefix
|
||||
const aSortIdx = _sortOderIdPrefix.findIndex(p => a.id.startsWith(p));
|
||||
const bSortIdx = _sortOderIdPrefix.findIndex(p => b.id.startsWith(p));
|
||||
if (aSortIdx !== -1 && bSortIdx !== -1) {
|
||||
if (aSortIdx < bSortIdx) return -1;
|
||||
if (aSortIdx > bSortIdx) return 1;
|
||||
}
|
||||
|
||||
// sort by label descending
|
||||
return b.label.localeCompare(a.label);
|
||||
}
|
||||
|
||||
export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): ModelDescriptionSchema {
|
||||
|
||||
export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): ModelDescriptionSchema | null {
|
||||
const { description, displayName, name: modelId, supportedGenerationMethods } = geminiModel;
|
||||
|
||||
if (DEV_DEBUG_GEMINI_MODELS)
|
||||
console.log('geminiModelToModelDescription', geminiModel);
|
||||
// if (DEV_DEBUG_GEMINI_MODELS)
|
||||
// console.log('geminiModelToModelDescription', geminiModel);
|
||||
|
||||
// find known manual mapping
|
||||
const knownModel = _knownGeminiModels.find(m => m.id === modelId);
|
||||
if (!knownModel && DEV_DEBUG_GEMINI_MODELS)
|
||||
console.warn('geminiModelToModelDescription: unknown model', modelId, geminiModel);
|
||||
|
||||
// handle _delete
|
||||
if (knownModel?._delete)
|
||||
return null;
|
||||
|
||||
// handle symlinks
|
||||
const label = knownModel?.symLink
|
||||
? `🔗 ${displayName.replace('1.0', '')} → ${knownModel.symLink}`
|
||||
: displayName;
|
||||
let label = knownModel?.symLink
|
||||
? `🔗 ${knownModel?.labelOverride || displayName} → ${knownModel.symLink}`
|
||||
: knownModel?.labelOverride || displayName;
|
||||
|
||||
// FIX: the Gemini 1114 model now returns 1121 as the version.. highlight the issue
|
||||
// if (geminiModel.name.endsWith('1114') && label.endsWith('1121'))
|
||||
// label += ' (really: 1114)';
|
||||
|
||||
// handle hidden models
|
||||
const hasChatInterfaces = supportedGenerationMethods.some(iface => geminiChatInterfaces.includes(iface));
|
||||
@@ -200,14 +444,13 @@ export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): M
|
||||
|
||||
// description
|
||||
const { version, topK, topP, temperature } = geminiModel;
|
||||
const descriptionLong = description + ` (Version: ${version}, Defaults: temperature=${temperature}, topP=${topP}, topK=${topK}, interfaces=[${supportedGenerationMethods.join(',')}])`;
|
||||
const descriptionLong = (description || 'No description.') + ` (Version: ${version}, Defaults: temperature=${temperature}, topP=${topP}, topK=${topK}, interfaces=[${supportedGenerationMethods.join(',')}])`;
|
||||
|
||||
// use known interfaces, or add chat if this is a generateContent model
|
||||
const interfaces: ModelDescriptionSchema['interfaces'] = knownModel?.interfaces || [];
|
||||
if (!interfaces.length && hasChatInterfaces) {
|
||||
interfaces.push(LLM_IF_OAI_Chat);
|
||||
// if (geminiVisionNames.some(name => modelId.includes(name)))
|
||||
// interfaces.push(LLM_IF_OAI_Vision);
|
||||
// newer models get good capabilities by default
|
||||
interfaces.push(LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision, LLM_IF_OAI_Json);
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -218,11 +461,13 @@ export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): M
|
||||
description: descriptionLong,
|
||||
contextWindow: contextWindow,
|
||||
maxCompletionTokens: outputTokenLimit,
|
||||
trainingDataCutoff: knownModel?.trainingDataCutoff,
|
||||
// trainingDataCutoff: knownModel?.trainingDataCutoff, // disabled as we don't get this from Gemini
|
||||
interfaces,
|
||||
// parameterSpecs: knownModel?.parameterSpecs,
|
||||
// rateLimits: isGeminiPro ? { reqPerMinute: 60 } : undefined,
|
||||
// benchmarks: ...
|
||||
pricing: knownModel?.pricing, // TODO: needs <>128k, and per-character and per-image pricing
|
||||
benchmark: knownModel?.benchmark,
|
||||
pricing: knownModel?.pricing,
|
||||
hidden,
|
||||
// deprecated: knownModel?.deprecated,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
|
||||
import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
|
||||
|
||||
import { fixupHost } from '~/common/util/urlUtils';
|
||||
import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema } from '../llm.server.types';
|
||||
import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, type ModelDescriptionSchema } from '../llm.server.types';
|
||||
|
||||
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
|
||||
|
||||
@@ -148,9 +148,10 @@ export const llmGeminiRouter = createTRPCRouter({
|
||||
// as the List API already all the info on all the models
|
||||
|
||||
// map to our output schema
|
||||
const models = detailedModels
|
||||
const models = (detailedModels
|
||||
.filter(geminiFilterModels)
|
||||
.map(geminiModel => geminiModelToModelDescription(geminiModel))
|
||||
.filter(model => !!model) as ModelDescriptionSchema[])
|
||||
.sort(geminiSortModels);
|
||||
|
||||
return {
|
||||
|
||||
@@ -9,28 +9,30 @@ export const geminiModelsStreamGenerateContentPath = '/v1beta/{model=models/*}:s
|
||||
|
||||
|
||||
// models.list = /v1beta/models
|
||||
const Methods_enum = z.enum([
|
||||
'bidiGenerateContent', // appeared on 2024-12, see https://github.com/enricoros/big-AGI/issues/700
|
||||
'createCachedContent', // appeared on 2024-06-10, see https://github.com/enricoros/big-AGI/issues/565
|
||||
'countMessageTokens',
|
||||
'countTextTokens',
|
||||
'countTokens',
|
||||
'createTunedModel',
|
||||
'createTunedTextModel',
|
||||
'embedContent',
|
||||
'embedText',
|
||||
'generateAnswer',
|
||||
'generateContent',
|
||||
'generateMessage',
|
||||
'generateText',
|
||||
]);
|
||||
|
||||
const geminiModelSchema = z.object({
|
||||
name: z.string(),
|
||||
version: z.string(),
|
||||
displayName: z.string(),
|
||||
description: z.string(),
|
||||
description: z.string().optional(),
|
||||
inputTokenLimit: z.number().int().min(1),
|
||||
outputTokenLimit: z.number().int().min(1),
|
||||
supportedGenerationMethods: z.array(z.enum([
|
||||
'createCachedContent', // appeared on 2024-06-10, see https://github.com/enricoros/big-AGI/issues/565
|
||||
'countMessageTokens',
|
||||
'countTextTokens',
|
||||
'countTokens',
|
||||
'createTunedModel',
|
||||
'createTunedTextModel',
|
||||
'embedContent',
|
||||
'embedText',
|
||||
'generateAnswer',
|
||||
'generateContent',
|
||||
'generateMessage',
|
||||
'generateText',
|
||||
])),
|
||||
supportedGenerationMethods: z.array(z.union([Methods_enum, z.string()])), // relaxed with z.union to not break on expansion
|
||||
temperature: z.number().optional(),
|
||||
topP: z.number().optional(),
|
||||
topK: z.number().optional(),
|
||||
@@ -172,7 +174,7 @@ export const geminiGeneratedContentResponseSchema = z.object({
|
||||
// either all requested candidates are returned or no candidates at all
|
||||
// no candidates are returned only if there was something wrong with the prompt (see promptFeedback)
|
||||
candidates: z.array(z.object({
|
||||
index: z.number(),
|
||||
index: z.number().optional(),
|
||||
content: geminiContentSchema.optional(), // this can be missing if the finishReason is not 'MAX_TOKENS'
|
||||
finishReason: geminiFinishReasonSchema.optional(),
|
||||
safetyRatings: z.array(geminiSafetyRatingSchema).optional(), // undefined when finishReason is 'RECITATION'
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
// here for reference only - for future mapping of CBA scores to the model IDs
|
||||
// const modelIdToPrefixMap: { [key: string]: string } = {
|
||||
// // Anthropic models
|
||||
// 'Claude 3.5 Sonnet': 'claude-3-5-sonnet-20240620',
|
||||
// 'Claude 3 Opus': 'claude-3-opus-20240229',
|
||||
// 'Claude 3 Sonnet': 'claude-3-sonnet-20240229',
|
||||
// 'Claude 3 Haiku': 'claude-3-haiku-20240307',
|
||||
// 'Claude-2.1': 'claude-2.1',
|
||||
// 'Claude-2.0': 'claude-2.0',
|
||||
// 'Claude-1': '', // No exact match
|
||||
// 'Claude-Instant-1': 'claude-instant-1.2', // Closest match
|
||||
//
|
||||
// // Gemini models
|
||||
// 'Gemini-1.5-Pro-Exp-0801': 'models/gemini-1.5-pro-latest', // Closest match
|
||||
// 'Gemini Advanced App (2024-05-14)': '', // No exact match
|
||||
// 'Gemini-1.5-Pro-001': 'models/gemini-1.5-pro-001',
|
||||
// 'Gemini-1.5-Pro-Preview-0409': 'models/gemini-1.5-pro-latest', // Closest match
|
||||
// 'Gemini-1.5-Flash-001': 'models/gemini-1.5-flash-001',
|
||||
// 'Gemini App (2024-01-24)': '', // No exact match
|
||||
// 'Gemini-1.0-Pro-001': 'models/gemini-1.0-pro-001',
|
||||
// 'Gemini Pro': 'models/gemini-pro',
|
||||
//
|
||||
// // OpenAI models (from the previous file)
|
||||
// 'GPT-4o-2024-05-13': 'gpt-4o-2024-05-13',
|
||||
// 'GPT-4o-mini-2024-07-18': 'gpt-4o-mini-2024-07-18',
|
||||
// 'GPT-4-Turbo-2024-04-09': 'gpt-4-turbo-2024-04-09',
|
||||
// 'GPT-4-1106-preview': 'gpt-4-1106-preview',
|
||||
// 'GPT-4-0125-preview': 'gpt-4-0125-preview',
|
||||
// 'GPT-4-0314': 'gpt-4-0314',
|
||||
// 'GPT-4-0613': 'gpt-4-0613',
|
||||
// 'GPT-3.5-Turbo-0613': 'gpt-3.5-turbo-0613',
|
||||
// 'GPT-3.5-Turbo-0314': 'gpt-3.5-turbo-0314',
|
||||
// 'GPT-3.5-Turbo-0125': 'gpt-3.5-turbo-0125',
|
||||
//
|
||||
// // Mistral models (from the previous file)
|
||||
// 'Mistral-Large-2402': 'mistral-large-2402',
|
||||
// 'Mixtral-8x7b-Instruct-v0.1': 'mistralai/Mixtral-8x7B-Instruct-v0.1',
|
||||
//
|
||||
// // Other models without matches
|
||||
// 'Gemini-1.5-Pro-Exp-0801': '',
|
||||
// 'Meta-Llama-3.1-405b-Instruct': '',
|
||||
// 'Gemini-1.5-Pro-001': '',
|
||||
// 'Meta-Llama-3.1-70b-Instruct': '',
|
||||
// 'Yi-Large-preview': '',
|
||||
// 'Deepseek-v2-API-0628': '',
|
||||
// 'Gemma-2-27b-it': '',
|
||||
// 'Yi-Large': '',
|
||||
// 'Nemotron-4-340B-Instruct': '',
|
||||
// 'GLM-4-0520': '',
|
||||
// 'Llama-3-70b-Instruct': '',
|
||||
// 'Reka-Core-20240501': '',
|
||||
// 'Command R+': '',
|
||||
// 'Gemma-2-9b-it': '',
|
||||
// 'Qwen2-72B-Instruct': '',
|
||||
// 'GLM-4-0116': '',
|
||||
// 'Qwen-Max-0428': '',
|
||||
// 'DeepSeek-Coder-V2-Instruct': '',
|
||||
// 'Reka-Flash-Preview-20240611': '',
|
||||
// 'Meta-Llama-3.1-8b-Instruct': '',
|
||||
// 'Qwen1.5-110B-Chat': '',
|
||||
// 'Yi-1.5-34B-Chat': '',
|
||||
// 'Reka-Flash-21B-online': '',
|
||||
// 'Llama-3-8b-Instruct': '',
|
||||
// 'Command R': '',
|
||||
// 'Reka-Flash-21B': '',
|
||||
// 'Qwen1.5-72B-Chat': '',
|
||||
// 'Mixtral-8x22b-Instruct-v0.1': '',
|
||||
// 'Zephyr-ORPO-141b-A35b-v0.1': '',
|
||||
// 'Qwen1.5-32B-Chat': '',
|
||||
// 'Mistral-Next': '',
|
||||
// 'Phi-3-Medium-4k-Instruct': '',
|
||||
// 'Starling-LM-7B-beta': '',
|
||||
// 'Yi-34B-Chat': '',
|
||||
// 'Qwen1.5-14B-Chat': '',
|
||||
// 'WizardLM-70B-v1.0': '',
|
||||
// 'Tulu-2-DPO-70B': '',
|
||||
// 'DBRX-Instruct-Preview': '',
|
||||
// 'Phi-3-Small-8k-Instruct': '',
|
||||
// 'Llama-2-70b-chat': '',
|
||||
// 'OpenChat-3.5-0106': '',
|
||||
// 'Vicuna-33B': '',
|
||||
// 'Snowflake Arctic Instruct': '',
|
||||
// 'Starling-LM-7B-alpha': '',
|
||||
// };
|
||||
@@ -9,34 +9,139 @@ import { wireTogetherAIListOutputSchema } from './togetherai.wiretypes';
|
||||
|
||||
|
||||
// [Azure] / [OpenAI]
|
||||
// https://platform.openai.com/docs/models
|
||||
const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-4o -> 2024-05-13
|
||||
// GPT-4o -> 2024-05-13 (Starting October 2nd, 2024, gpt-4o will point to the gpt-4o-2024-08-06 snapshot)
|
||||
{
|
||||
idPrefix: 'gpt-4o',
|
||||
label: 'GPT-4o',
|
||||
description: 'Currently points to gpt-4o-2024-05-13.',
|
||||
symLink: 'gpt-4o-2024-05-13',
|
||||
description: 'Points to gpt-4o-2024-08-06 starting on Oct 2, 2024.',
|
||||
symLink: 'gpt-4o-2024-08-06',
|
||||
hidden: true,
|
||||
// copied from symlinked
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 4096,
|
||||
maxCompletionTokens: 16384,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 5, chatOut: 15 },
|
||||
benchmark: { cbaElo: 1310 },
|
||||
pricing: { chatIn: 2.5, chatOut: 10 },
|
||||
benchmark: { cbaElo: 1286 + 1 },
|
||||
},
|
||||
{
|
||||
isLatest: true,
|
||||
idPrefix: 'gpt-4o-2024-08-06',
|
||||
label: 'GPT-4o (2024-08-06)',
|
||||
description: 'Latest snapshot that supports Structured Outputs',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 16384,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], // + Structured Outputs?
|
||||
pricing: { chatIn: 2.5, chatOut: 10 },
|
||||
benchmark: { cbaElo: 1286 + 1 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4o-2024-05-13',
|
||||
label: 'GPT-4o (2024-05-13)',
|
||||
description: 'Advanced, multimodal flagship model that’s cheaper and faster than GPT-4 Turbo.',
|
||||
description: 'Advanced, multimodal flagship model that\'s cheaper and faster than GPT-4 Turbo.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 5, chatOut: 15 },
|
||||
benchmark: { cbaElo: 1310 },
|
||||
benchmark: { cbaElo: 1286 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
idPrefix: 'chatgpt-4o-latest',
|
||||
label: 'ChatGPT-4o Latest',
|
||||
description: 'Intended for research and evaluation. Dynamic model continuously updated to the current version of GPT-4o in ChatGPT.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 16384,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 5, chatOut: 15 },
|
||||
},
|
||||
|
||||
// GPT-4o mini
|
||||
{
|
||||
idPrefix: 'gpt-4o-mini',
|
||||
label: 'GPT-4o mini',
|
||||
description: 'Currently points to gpt-4o-mini-2024-07-18.',
|
||||
symLink: 'gpt-4o-mini-2024-07-18',
|
||||
hidden: true,
|
||||
// copied from symlinked
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 16384,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 0.15, chatOut: 0.60 },
|
||||
benchmark: { cbaElo: 1277, cbaMmlu: 82.0 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4o-mini-2024-07-18',
|
||||
label: 'GPT-4o Mini (2024-07-18)',
|
||||
description: 'Affordable model for fast, lightweight tasks. GPT-4o mini is cheaper and more capable than GPT-3.5 Turbo.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 16384,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 0.15, chatOut: 0.60 },
|
||||
},
|
||||
|
||||
// o1-preview
|
||||
{
|
||||
idPrefix: 'o1-preview',
|
||||
label: 'o1 Preview',
|
||||
description: 'Supported in Big-AGI 2. Points to the most recent snapshot of the o1 model: o1-preview-2024-09-12',
|
||||
symLink: 'o1-preview-2024-09-12',
|
||||
hidden: true,
|
||||
// copied from symlinked
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 32768,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: { chatIn: 15, chatOut: 60 },
|
||||
isPreview: true,
|
||||
},
|
||||
{
|
||||
hidden: true, // we can't support it in Big-AGI 1
|
||||
idPrefix: 'o1-preview-2024-09-12',
|
||||
label: 'o1 Preview (2024-09-12)',
|
||||
description: 'Supported in Big-AGI 2. New reasoning model for complex tasks that require broad general knowledge.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 32768,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: { chatIn: 15, chatOut: 60 },
|
||||
isPreview: true,
|
||||
},
|
||||
|
||||
// o1-mini
|
||||
{
|
||||
idPrefix: 'o1-mini',
|
||||
label: 'o1 Mini',
|
||||
description: 'Supported in Big-AGI 2. Points to the most recent o1-mini snapshot: o1-mini-2024-09-12',
|
||||
symLink: 'o1-mini-2024-09-12',
|
||||
hidden: true,
|
||||
// copied from symlinked
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 65536,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: { chatIn: 3, chatOut: 12 },
|
||||
isPreview: true,
|
||||
},
|
||||
{
|
||||
hidden: true, // we can't support it in Big-AGI 1
|
||||
idPrefix: 'o1-mini-2024-09-12',
|
||||
label: 'o1 Mini (2024-09-12)',
|
||||
description: 'Supported in Big-AGI 2. Fast, cost-efficient reasoning model tailored to coding, math, and science use cases.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 65536,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: { chatIn: 3, chatOut: 12 },
|
||||
isPreview: true,
|
||||
},
|
||||
|
||||
// GPT4 Turbo with Vision -> 2024-04-09
|
||||
@@ -52,7 +157,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Dec 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1261 },
|
||||
benchmark: { cbaElo: 1257 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-turbo-2024-04-09',
|
||||
@@ -63,12 +168,12 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Dec 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1261 },
|
||||
benchmark: { cbaElo: 1257 },
|
||||
},
|
||||
|
||||
// GPT4 Turbo Previews
|
||||
{
|
||||
idPrefix: 'gpt-4-turbo-preview', // GPT-4 Turbo preview model -> 0125
|
||||
idPrefix: 'gpt-4-turbo-preview',
|
||||
label: 'GPT-4 Preview Turbo',
|
||||
description: 'GPT-4 Turbo preview model. Currently points to gpt-4-0125-preview.',
|
||||
symLink: 'gpt-4-0125-preview',
|
||||
@@ -80,63 +185,33 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Dec 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1251 },
|
||||
benchmark: { cbaElo: 1245 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-0125-preview', // GPT-4 Turbo preview model
|
||||
idPrefix: 'gpt-4-0125-preview',
|
||||
label: 'GPT-4 Turbo (0125)',
|
||||
description: 'GPT-4 Turbo preview model intended to reduce cases of "laziness" where the model doesn\'t complete a task. Returns a maximum of 4,096 output tokens.',
|
||||
isPreview: true,
|
||||
description: 'GPT-4 Turbo preview model intended to reduce cases of "laziness" where the model doesn\'t complete a task.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Dec 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1251 },
|
||||
benchmark: { cbaElo: 1245 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-1106-preview', // GPT-4 Turbo preview model
|
||||
label: 'GPT-4 Turbo (1106)',
|
||||
description: 'GPT-4 Turbo preview model featuring improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Returns a maximum of 4,096 output tokens.',
|
||||
isPreview: true,
|
||||
description: 'GPT-4 Turbo preview model featuring improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Apr 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1255 },
|
||||
benchmark: { cbaElo: 1251 },
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
// GPT4 Vision Previews
|
||||
{
|
||||
idPrefix: 'gpt-4-vision-preview', // GPT-4 Turbo vision preview
|
||||
label: 'GPT-4 Preview Vision',
|
||||
description: 'GPT-4 model with the ability to understand images, in addition to all other GPT-4 Turbo capabilities. This is a preview model, we recommend developers to now use gpt-4-turbo which includes vision capabilities. Currently points to gpt-4-1106-vision-preview.',
|
||||
symLink: 'gpt-4-1106-vision-preview',
|
||||
// copied from symlinked
|
||||
isPreview: true,
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Apr 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
hidden: true, // Deprecated in favor of gpt-4-turbo
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-1106-vision-preview',
|
||||
label: 'GPT-4 Preview Vision (1106)',
|
||||
description: 'GPT-4 model with the ability to understand images, in addition to all other GPT-4 Turbo capabilities. This is a preview model, we recommend developers to now use gpt-4-turbo which includes vision capabilities. Returns a maximum of 4,096 output tokens.',
|
||||
isPreview: true,
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Apr 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
hidden: true, // Deprecated in favor of gpt-4-turbo
|
||||
},
|
||||
|
||||
|
||||
// GPT4-32k's
|
||||
{
|
||||
@@ -182,7 +257,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 30, chatOut: 60 },
|
||||
benchmark: { cbaElo: 1164 },
|
||||
benchmark: { cbaElo: 1161 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-0314',
|
||||
@@ -192,7 +267,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 30, chatOut: 60 },
|
||||
benchmark: { cbaElo: 1189 },
|
||||
benchmark: { cbaElo: 1186 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
@@ -206,39 +281,27 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 30, chatOut: 60 },
|
||||
benchmark: { cbaElo: 1164 },
|
||||
benchmark: { cbaElo: 1161 },
|
||||
isLegacy: true,
|
||||
},
|
||||
|
||||
|
||||
// 3.5-Turbo-Instruct (Not for Chat)
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo-instruct',
|
||||
label: '3.5-Turbo Instruct',
|
||||
description: 'Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.',
|
||||
contextWindow: 4097,
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [/* NO: LLM_IF_OAI_Chat,*/ LLM_IF_OAI_Complete],
|
||||
pricing: { chatIn: 1.5, chatOut: 2 },
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
|
||||
// 3.5-Turbo-16k's
|
||||
// 3.5-Turbo
|
||||
// As of July 2024, gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo-0125',
|
||||
label: '3.5-Turbo (0125)',
|
||||
description: 'The latest GPT-3.5 Turbo model with higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls. Returns a maximum of 4,096 output tokens.',
|
||||
description: 'The latest GPT-3.5 Turbo model with higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.',
|
||||
contextWindow: 16385,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 0.5, chatOut: 1.5 },
|
||||
benchmark: { cbaElo: 1104 },
|
||||
benchmark: { cbaElo: 1105 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo-1106',
|
||||
label: '3.5-Turbo (1106)',
|
||||
description: 'The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
|
||||
description: 'GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
|
||||
contextWindow: 16385,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
@@ -250,7 +313,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo',
|
||||
label: '3.5-Turbo',
|
||||
description: 'Currently points to gpt-3.5-turbo-0125.',
|
||||
description: 'Currently points to gpt-3.5-turbo-0125. As of July 2024, gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.',
|
||||
symLink: 'gpt-3.5-turbo-0125',
|
||||
hidden: true,
|
||||
// copied
|
||||
@@ -259,7 +322,19 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 0.5, chatOut: 1.5 },
|
||||
benchmark: { cbaElo: 1104 },
|
||||
benchmark: { cbaElo: 1105 },
|
||||
},
|
||||
|
||||
// 3.5-Turbo-Instruct (Not for Chat)
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo-instruct',
|
||||
label: '3.5-Turbo Instruct',
|
||||
description: 'Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.',
|
||||
contextWindow: 4097,
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [/* NO: LLM_IF_OAI_Chat,*/ LLM_IF_OAI_Complete],
|
||||
pricing: { chatIn: 1.5, chatOut: 2 },
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
|
||||
@@ -667,10 +742,14 @@ export function openRouterModelFamilySortFn(a: { id: string }, b: { id: string }
|
||||
return aPrefixIndex !== -1 ? -1 : 1;
|
||||
}
|
||||
|
||||
export function openRouterModelToModelDescription(wireModel: object): ModelDescriptionSchema {
|
||||
export function openRouterModelToModelDescription(wireModel: object): ModelDescriptionSchema | null {
|
||||
|
||||
// parse the model
|
||||
const model = wireOpenrouterModelsListOutputSchema.parse(wireModel);
|
||||
const { data: model, error } = wireOpenrouterModelsListOutputSchema.safeParse(wireModel);
|
||||
if (error) {
|
||||
console.warn(`openrouterModelToModelDescription: Failed to parse model: ${error}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// parse pricing
|
||||
const pricing: ModelDescriptionSchema['pricing'] = {
|
||||
@@ -859,41 +938,84 @@ export function perplexityAIModelSort(a: ModelDescriptionSchema, b: ModelDescrip
|
||||
const _knownGroqModels: ManualMappings = [
|
||||
{
|
||||
isLatest: true,
|
||||
idPrefix: 'llama-3.1-405b-reasoning',
|
||||
label: 'Llama 3.1 · 405B',
|
||||
description: 'LLaMA 3.1 405B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: 8000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
isLatest: true,
|
||||
idPrefix: 'llama-3.1-70b-versatile',
|
||||
label: 'Llama 3.1 · 70B',
|
||||
description: 'LLaMA 3.1 70B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: 8000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
isLatest: true,
|
||||
idPrefix: 'llama-3.1-8b-instant',
|
||||
label: 'Llama 3.1 · 8B',
|
||||
description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: 8000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'llama3-groq-70b-8192-tool-use-preview',
|
||||
label: 'Llama 3 Groq · 70B Tool Use',
|
||||
description: 'LLaMA 3 70B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'llama3-groq-8b-8192-tool-use-preview',
|
||||
label: 'Llama 3 Groq · 8B Tool Use',
|
||||
description: 'LLaMA 3 8B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'llama3-70b-8192',
|
||||
label: 'Llama 3 · 70B',
|
||||
description: 'LLaMA3 70b developed by Meta with a context window of 8,192 tokens.',
|
||||
description: 'LLaMA3 70B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
// isLegacy: true,
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
// isLatest: true,
|
||||
idPrefix: 'llama3-8b-8192',
|
||||
label: 'Llama 3 · 8B',
|
||||
description: 'LLaMA3 8b developed by Meta with a context window of 8,192 tokens.',
|
||||
description: 'LLaMA3 8B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'llama2-70b-4096',
|
||||
label: 'Llama 2 · 70B',
|
||||
description: 'LLaMA2 70b developed by Meta with a context window of 4,096 tokens.',
|
||||
contextWindow: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
// isLegacy: true,
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
idPrefix: 'mixtral-8x7b-32768',
|
||||
label: 'Mixtral 8x7B',
|
||||
description: 'Mixtral 8x7b developed by Mistral with a context window of 32,768 tokens.',
|
||||
description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Supports tool use.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'gemma2-9b-it',
|
||||
label: 'Gemma 2 · 9B Instruct',
|
||||
description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Supports tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'gemma-7b-it',
|
||||
label: 'Gemma 1.1 · 7B Instruct',
|
||||
description: 'Gemma 7b developed by Google with a context window of 8,192 tokens.',
|
||||
description: 'Gemma 7B developed by Google with a context window of 8,192 tokens. Supports tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -910,6 +1032,11 @@ export function groqModelToModelDescription(_model: unknown): ModelDescriptionSc
|
||||
}
|
||||
|
||||
export function groqModelSortFn(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
|
||||
// sort hidden at the end
|
||||
if (a.hidden && !b.hidden)
|
||||
return 1;
|
||||
if (!a.hidden && b.hidden)
|
||||
return -1;
|
||||
// sort as per their order in the known models
|
||||
const aIndex = _knownGroqModels.findIndex(base => a.id.startsWith(base.idPrefix));
|
||||
const bIndex = _knownGroqModels.findIndex(base => b.id.startsWith(base.idPrefix));
|
||||
|
||||
@@ -256,7 +256,8 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
case 'openrouter':
|
||||
models = openAIModels
|
||||
.sort(openRouterModelFamilySortFn)
|
||||
.map(openRouterModelToModelDescription);
|
||||
.map(openRouterModelToModelDescription)
|
||||
.filter(desc => !!desc) as ModelDescriptionSchema[];
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
@@ -5,16 +5,20 @@ export const wireOpenrouterModelsListOutputSchema = z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string(),
|
||||
// NOTE: for 'openrouter/auto', this is: {
|
||||
// "prompt": "-1",
|
||||
// "completion": "-1"
|
||||
// }
|
||||
pricing: z.object({
|
||||
prompt: z.string(),
|
||||
completion: z.string(),
|
||||
image: z.string(),
|
||||
request: z.string(),
|
||||
image: z.string().optional(),
|
||||
request: z.string().optional(),
|
||||
}),
|
||||
context_length: z.number(),
|
||||
architecture: z.object({
|
||||
modality: z.string(), // z.enum(['text', 'multimodal']),
|
||||
tokenizer: z.string(), // e.g. 'Mistral'
|
||||
modality: z.string(), // z.enum(['text', 'multimodal', 'text+image->text]),
|
||||
tokenizer: z.string(), // e.g. 'Mistral', 'Claude'
|
||||
instruct_type: z.string().nullable(),
|
||||
}),
|
||||
top_provider: z.object({
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"functions": {
|
||||
"app/api/trpc-node/**/*": {
|
||||
"maxDuration": 25
|
||||
"api/trpc-node/**/*": {
|
||||
"maxDuration": 30
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user