Compare commits

...

47 Commits

Author SHA1 Message Date
Enrico Ros 14724a864c OpenRouter: compensate for older typescript 2025-04-05 10:30:15 -07:00
Enrico Ros 5e2b196c4d OpenRouter: models list: prevent schema changes from breaking working models. Fixes #787 2025-04-05 10:26:44 -07:00
Enrico Ros e7686f60b1 OpenRouter: models list: ignore missing fields on 'openrouter/auto'. Fixes #787 2025-04-05 10:25:35 -07:00
Enrico Ros 380f666d35 Roll Gemini descriptions. Fixes #783 2025-03-29 12:35:29 -07:00
Enrico Ros 3e277b1a35 Optional desc. #783 2025-03-29 12:35:05 -07:00
Enrico Ros 9bac46ea75 1.16.9 Release 2025-01-21 18:09:37 -08:00
Enrico Ros 2af4ee7dbe Remove v1-dev, fully absorbed into v2-dev. 2025-01-21 18:05:18 -08:00
Enrico Ros 590fc0d021 Gemini: relax parser - Fixes #700 2024-12-19 01:09:40 -08:00
Enrico Ros 746b0dad40 Update Node to 22 2024-12-19 01:08:41 -08:00
Enrico Ros b327da3ded Fix #675 (pre-v2) 2024-11-06 16:37:18 -08:00
Enrico Ros 7a818bdcd0 Update branch names 2024-10-28 20:09:53 -07:00
Enrico Ros c92ee2e22a v1: document branch names 2024-10-28 20:02:18 -07:00
Enrico Ros 632a4a565f [stable] OpenAI: update models 2024-10-25 10:13:13 -07:00
Enrico Ros d712c275a0 [stable] Anthropic: update models 2024-10-25 10:06:42 -07:00
Enrico Ros 1adff7481b Dev survey for Big-AGI 2. 2024-10-11 21:55:46 -07:00
Enrico Ros 393e19dda9 Vercel: fix timeout 2024-10-03 12:37:21 -07:00
Enrico Ros 39c5c7c9ba Call out to Big-AGI 2 2024-09-13 14:06:12 -07:00
Enrico Ros e64a5e59ef 1.16.8 Release 2024-09-13 13:50:00 -07:00
Enrico Ros 574c2cf0e3 Call out to Big-AGI 2 2024-09-13 13:49:11 -07:00
Enrico Ros 1d3321b336 OpenAI: o1 support label 2024-09-13 11:02:33 -07:00
Enrico Ros de25e5822d OpenAI: o1 relabel 2024-09-13 10:59:40 -07:00
Enrico Ros 6a904c9f37 OpenAI: 3.5 non legacy 2024-09-13 10:59:32 -07:00
Enrico Ros 30c3283572 OpenAI: add o1 2024-09-13 10:53:42 -07:00
Enrico Ros 10bba19079 OpenAI: add ChatGPT-4o-latest 2024-09-13 10:53:32 -07:00
Enrico Ros 713079f2f2 OpenAI: bits 2024-09-13 10:53:20 -07:00
Enrico Ros 6e16e989ac OpenAI: move 4o-mini 2024-09-13 10:53:09 -07:00
Enrico Ros 4e89e0b1e4 OpenAI: clean IDs 2024-09-13 10:52:19 -07:00
Enrico Ros 6067c289ab OpenAI: remove vision previews 2024-09-13 10:52:00 -07:00
Enrico Ros 32ebfea9cb OpenAI: reorder 2024-09-13 10:20:52 -07:00
Enrico Ros dec280d54d 1.16.7 Release
(cherry picked from commit 22b32d571d)
2024-08-07 02:51:59 -07:00
Enrico Ros 4823e97783 Mapping doc, for the future.
(cherry picked from commit a416cafc4e)
2024-08-07 02:51:59 -07:00
Enrico Ros 6a5685995f OpenAI: update models
(cherry picked from commit 5f5efe6133)
2024-08-07 02:51:59 -07:00
Enrico Ros 3b4d5691d7 1.16.6: Release. Fixes #604 2024-07-24 21:31:57 -07:00
Enrico Ros 45c09d021a Groq: update output tokens (max 8,000 for 3.1) 2024-07-24 21:27:20 -07:00
Enrico Ros 8ef759fe0f Groq: update Models 2024-07-24 21:27:12 -07:00
Enrico Ros c06735fdd2 1.16.5: Release 2024-07-18 16:15:53 -07:00
Enrico Ros cf4297a1af OpenAI: support 4o Mini (16384 token output) 2024-07-18 16:15:37 -07:00
Enrico Ros 5d458d68bd Warn devs. 2024-07-18 16:12:17 -07:00
Enrico Ros c3db077ae8 1.16.4: release 2024-07-15 14:13:36 -07:00
Enrico Ros 779b265b20 Anthropic: 8192 tokens 2024-07-15 14:08:02 -07:00
Enrico Ros 7d6d7e619b Anthropic: hardcode date 2024-06-20 12:42:10 -07:00
Enrico Ros 34caa16e39 1.16.3: release 2024-06-20 12:27:42 -07:00
Enrico Ros 976426dbd3 Anthropic: support Claude 3.5 Sonnet 2024-06-20 12:27:26 -07:00
Enrico Ros b4d8e39d56 Gemini: acknowledge the new capability to createCachedContent. Fixes #565 2024-06-10 23:56:02 -07:00
Enrico Ros 11c41e7381 Function call: increase debug verbosity 2024-06-07 14:18:01 -07:00
Enrico Ros 358d8a54ff Increase llms alignment before function calling. 2024-06-07 14:11:36 -07:00
Enrico Ros 3c8fedce68 Highlight issues with chatGenerateWithFunctions 2024-06-07 12:38:21 -07:00
42 changed files with 1038 additions and 379 deletions
+1 -2
View File
@@ -51,8 +51,7 @@ jobs:
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=raw,value=development,enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=stable,enable=${{ github.ref == 'refs/heads/main-stable' }}
type=raw,value=stable,enable=${{ github.ref == 'refs/heads/v1-stable' }}
type=ref,event=tag # Use the tag name as a tag for tag builds
type=semver,pattern={{version}} # Generate semantic versioning tags for tag builds
type=sha # Just in case none of the above applies
+26 -11
View File
@@ -11,27 +11,42 @@ Stay ahead of the curve with big-AGI. 🚀 Pros & Devs love big-AGI. 🤖
[![Official Website](https://img.shields.io/badge/BIG--AGI.com-%23096bde?style=for-the-badge&logo=vercel&label=launch)](https://big-agi.com)
> 🚀 Big-AGI 2 is launching Q4 2024. Be the first to experience it before the public release.
>
> 👉 [Apply for Early Access](https://y2rjg0zillz.typeform.com/to/ZSADpr5u?utm_source=gh-stable&utm_medium=readme&utm_campaign=ea2)
Or fork & run on Vercel
[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fenricoros%2Fbig-AGI&env=OPENAI_API_KEY&envDescription=Backend%20API%20keys%2C%20optional%20and%20may%20be%20overridden%20by%20the%20UI.&envLink=https%3A%2F%2Fgithub.com%2Fenricoros%2Fbig-AGI%2Fblob%2Fmain%2Fdocs%2Fenvironment-variables.md&project-name=big-AGI)
## 👉 [roadmap](https://github.com/users/enricoros/projects/4/views/2) 👉 [installation](docs/installation.md) 👉 [documentation](docs/README.md)
### New Version
> Note: bigger better features (incl. Beam-2) are being cooked outside of `main`.
This repository contains two main versions:
[//]: # (big-AGI is an open book; see the **[ready-to-ship and future ideas](https://github.com/users/enricoros/projects/4/views/2)** in our open roadmap)
- Big-AGI 2: next-generation, bringing the most advanced AI experience
- `v2-dev`: V2 development branch, the exciting one, future default
- Big-AGI Stable: as deployed on big-agi.com
- `v1-stable`: Current stable version & Docker 'latest' tag
### What's New in 1.16.2 · Jun 7, 2024 (minor release)
Note: After the V2 release in Q4, `v2/dev` will become the default branch and `v1/dev` will reach EOL.
- Improve web downloads, as text, markdwon, or HTML
- Proper support for Gemini models
- Added the latest Mistral model
- Tokenizer support for gpt-4o
- Updates to Beam
### Quick links: 👉 [roadmap](https://github.com/users/enricoros/projects/4/views/2) 👉 [installation](docs/installation.md) 👉 [documentation](docs/README.md)
### What's New in 1.16.1 · May 13, 2024 (minor release)
### What's New in 1.16.1...1.16.9 · Jan 21, 2025 (patch releases)
- Support for the new OpenAI GPT-4o 2024-05-13 model
- 1.16.9: Docker Gemini fix (R1 models are supported in Big-AGI 2)
- 1.16.8: OpenAI ChatGPT-4o Latest (o1 models are supported in Big-AGI 2)
- 1.16.7: OpenAI support for GPT-4o 2024-08-06
- 1.16.6: Groq support for Llama 3.1 models
- 1.16.5: GPT-4o Mini support
- 1.16.4: 8192 tokens support for Claude 3.5 Sonnet
- 1.16.3: Anthropic Claude 3.5 Sonnet model support
- 1.16.2: Improve web downloads, as text, markdwon, or HTML
- 1.16.2: Proper support for Gemini models
- 1.16.2: Added the latest Mistral model
- 1.16.2: Tokenizer support for gpt-4o
- 1.16.2: Updates to Beam
- 1.16.1: Support for the new OpenAI GPT-4o 2024-05-13 model
### What's New in 1.16.0 · May 9, 2024 · Crystal Clear
+14 -10
View File
@@ -10,17 +10,21 @@ by release.
- milestone: [1.17.0](https://github.com/enricoros/big-agi/milestone/17)
- work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
### What's New in 1.16.2 · Jun 7, 2024 (minor release)
### What's New in 1.16.1...1.16.9 · Jan 21, 2025 (patch releases)
- Improve web downloads, as text, markdwon, or HTML
- Proper support for Gemini models
- Added the latest Mistral model
- Tokenizer support for gpt-4o
- Updates to Beam
### What's New in 1.16.1 · May 13, 2024 (minor release)
- Support for the new OpenAI GPT-4o 2024-05-13 model
- 1.16.9: Docker Gemini fix (R1 models are supported in Big-AGI 2)
- 1.16.8: OpenAI ChatGPT-4o Latest (o1 models are supported in Big-AGI 2)
- 1.16.7: OpenAI support for GPT-4o 2024-08-06
- 1.16.6: Groq support for Llama 3.1 models
- 1.16.5: GPT-4o Mini support
- 1.16.4: 8192 tokens support for Claude 3.5 Sonnet
- 1.16.3: Anthropic Claude 3.5 Sonnet model support
- 1.16.2: Improve web downloads, as text, markdwon, or HTML
- 1.16.2: Proper support for Gemini models
- 1.16.2: Added the latest Mistral model
- 1.16.2: Tokenizer support for gpt-4o
- 1.16.2: Updates to Beam
- 1.16.1: Support for the new OpenAI GPT-4o 2024-05-13 model
### What's New in 1.16.0 · May 9, 2024 · Crystal Clear
+1 -1
View File
@@ -79,7 +79,7 @@
"typescript": "^5.4.5"
},
"engines": {
"node": "^20.0.0 || ^18.0.0"
"node": "^22.0.0 || ^20.0.0 || ^18.0.0"
}
},
"node_modules/@babel/code-frame": {
+1 -1
View File
@@ -88,6 +88,6 @@
"typescript": "^5.4.5"
},
"engines": {
"node": "^20.0.0 || ^18.0.0"
"node": "^22.0.0 || ^20.0.0 || ^18.0.0"
}
}
+1 -1
View File
@@ -277,7 +277,7 @@ export function AppChat() {
const conversation = getConversation(conversationId);
if (!conversation)
return;
const imaginedPrompt = await imaginePromptFromText(messageText) || 'An error sign.';
const imaginedPrompt = await imaginePromptFromText(messageText, conversationId) || 'An error sign.';
await handleExecuteAndOutcome('generate-image', conversationId, [
...conversation.messages,
createDMessage('user', imaginedPrompt),
@@ -280,6 +280,7 @@ export function ChatMessage(props: {
const wasEdited = !!messageUpdated;
const textSel = selText ? selText : messageText;
// WARNING: if you get an issue here, you're downgrading from the new Big-AGI 2 data format to 1.x.
const isSpecialT2I = textSel.startsWith('https://images.prodia.xyz/') || textSel.startsWith('/draw ') || textSel.startsWith('/imagine ') || textSel.startsWith('/img ');
const couldDiagram = textSel.length >= 100 && !isSpecialT2I;
const couldImagine = textSel.length >= 3 && !isSpecialT2I;
+2 -2
View File
@@ -2,7 +2,7 @@ import type { DLLMId } from '~/modules/llms/store-llms';
import type { StreamingClientUpdate } from '~/modules/llms/vendors/unifiedStreamingClient';
import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
import { conversationAutoTitle } from '~/modules/aifn/autotitle/autoTitle';
import { llmStreamingChatGenerate, VChatContextRef, VChatContextName, VChatMessageIn } from '~/modules/llms/llm.client';
import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
import type { DMessage } from '~/common/state/store-chats';
@@ -63,7 +63,7 @@ type StreamMessageStatus = { outcome: StreamMessageOutcome, errorMessage?: strin
export async function streamAssistantMessage(
llmId: DLLMId,
messagesHistory: VChatMessageIn[],
contextName: VChatContextName,
contextName: VChatStreamContextName,
contextRef: VChatContextRef,
throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce the message frequency with the square root
autoSpeak: ChatAutoSpeakType,
+8
View File
@@ -14,6 +14,7 @@ import { capitalizeFirstLetter } from '~/common/util/textUtils';
import { NewsItems } from './news.data';
import { beamNewsCallout } from './beam.data';
import { bigAgi2NewsCallout } from './bigAgi2.data';
// number of news items to show by default, before the expander
@@ -110,6 +111,13 @@ export function AppNews() {
const addPadding = false; //!firstCard; // || showExpander;
return <React.Fragment key={idx}>
{/* Inject the Big-AGI 2.0 item here*/}
{idx === 0 && (
<Box sx={{ mb: 3 }}>
{bigAgi2NewsCallout}
</Box>
)}
{/* Inject the Beam item here*/}
{idx === 2 && (
<Box sx={{ mb: 3 }}>
-1
View File
@@ -2,7 +2,6 @@ import * as React from 'react';
import { Button, Card, CardContent, Grid, Typography } from '@mui/joy';
import LaunchIcon from '@mui/icons-material/Launch';
import ThumbUpRoundedIcon from '@mui/icons-material/ThumbUpRounded';
import { Link } from '~/common/components/Link';
+40
View File
@@ -0,0 +1,40 @@
import * as React from 'react';
import { Button, Card, CardContent, Grid, Typography } from '@mui/joy';
import AccessTimeIcon from '@mui/icons-material/AccessTime';
import LaunchIcon from '@mui/icons-material/Launch';
import { Link } from '~/common/components/Link';
const bigAgi2SurveyUrl = 'https://y2rjg0zillz.typeform.com/to/ZSADpr5u?utm_source=gh-stable&utm_medium=news&utm_campaign=ea2';
export const bigAgi2NewsCallout =
<Card variant='solid' invertedColors>
<CardContent sx={{ gap: 2 }}>
<Typography level='title-lg'>
Big-AGI 2.0 - In Development
</Typography>
<Typography level='body-sm'>
We&apos;re building the next version of Big-AGI with your needs in mind. New features, better performance, enhanced AI interactions. Help us shape it.
</Typography>
<Grid container spacing={1}>
<Grid xs={12} sm={7}>
<Button
fullWidth variant='soft' color='primary' endDecorator={<LaunchIcon />}
component={Link} href={bigAgi2SurveyUrl} noLinkStyle target='_blank'
>
Apply for Early Access
</Button>
</Grid>
<Grid xs={12} sm={5} sx={{ display: 'flex', flexAlign: 'center', justifyContent: 'center' }}>
<Button
fullWidth variant='outlined' color='primary' startDecorator={<AccessTimeIcon />}
disabled
>
Coming Fall 2024
</Button>
</Grid>
</Grid>
</CardContent>
</Card>;
+10 -2
View File
@@ -61,7 +61,7 @@ export const NewsItems: NewsItem[] = [
]
}*/
{
versionCode: '1.16.2',
versionCode: '1.16.9',
versionName: 'Crystal Clear',
versionDate: new Date('2024-06-07T05:00:00Z'),
// versionDate: new Date('2024-05-13T19:00:00Z'),
@@ -77,8 +77,16 @@ export const NewsItems: NewsItem[] = [
{ text: <>More: <B issue={517}>code soft-wrap</B>, selection toolbar, <B issue={507}>3x faster</B> on Apple silicon</>, issue: 507 },
{ text: <>Updated <B>Anthropic</B>*, <B>Groq</B>, <B>Ollama</B>, <B>OpenAI</B>*, <B>OpenRouter</B>*, and <B>Perplexity</B></> },
{ text: <>Developers: update LLMs data structures</>, dev: true },
{ text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B> (refresh your OpenAI models)</> },
{ text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B></> },
{ text: <>1.16.2: Proper <B>Gemini</B> support, <B>HTML/Markdown</B> downloads, and latest <B>Mistral</B></> },
{ text: <>1.16.3: Support for <B href='https://www.anthropic.com/news/claude-3-5-sonnet'>Claude 3.5 Sonnet</B> (refresh your <B>Anthropic</B> models)</> },
{ text: <>1.16.4: <B>8192 tokens</B> support for Claude 3.5 Sonnet</> },
{ text: <>1.16.5: OpenAI <B>GPT-4o Mini</B> support</> },
{ text: <>1.16.6: Groq <B>Llama 3.1</B> support</> },
{ text: <>1.16.7: Gpt-4o <B>2024-08-06</B></> },
{ text: <>1.16.8: <B>ChatGPT-4o</B> latest</> },
{ text: <>1.16.9: <B>Gemini</B> fixes</> },
{ text: <>OpenAI <B>o1</B>, DeepSeek R1, and newer models require Big-AGI 2. <B href='https://y2rjg0zillz.typeform.com/to/ZSADpr5u?utm_source=gh-stable&utm_medium=news&utm_campaign=ea2'>Sign up here</B></> },
],
},
{
+1 -1
View File
@@ -7,7 +7,7 @@ import { useAppStateStore } from '~/common/state/store-appstate';
// update this variable every time you want to broadcast a new version to clients
export const incrementalNewsVersion: number = 16.1; // not notifying for 16.2
export const incrementalNewsVersion: number = 16.1; // not notifying for 1.16.9
interface NewsState {
+5 -2
View File
@@ -8,8 +8,11 @@ export function prettyBaseModel(model: string | undefined): string {
if (!model) return '';
if (model.includes('gpt-4-vision-preview')) return 'GPT-4 Vision';
if (model.includes('gpt-4-1106-preview')) return 'GPT-4 Turbo';
if (model.includes('gpt-4-32k')) return 'gpt-4-32k';
if (model.includes('gpt-4')) return 'gpt-4';
if (model.includes('gpt-4-32k')) return 'GPT-4-32k';
if (model.includes('gpt-4o-mini')) return 'GPT-4o Mini';
if (model.includes('gpt-4o')) return 'GPT-4o';
if (model.includes('gpt-4-turbo')) return 'GPT-4 Turbo';
if (model.includes('gpt-4')) return 'GPT-4';
if (model.includes('gpt-3.5-turbo-instruct')) return '3.5 Turbo Instruct';
if (model.includes('gpt-3.5-turbo-1106')) return '3.5 Turbo 16k';
if (model.includes('gpt-3.5-turbo-16k')) return '3.5 Turbo 16k';
@@ -1,4 +1,4 @@
import { llmChatGenerateOrThrow, VChatFunctionIn } from '~/modules/llms/llm.client';
import { llmChatGenerateOrThrow, VChatFunctionIn, VChatMessageIn } from '~/modules/llms/llm.client';
import { useModelsStore } from '~/modules/llms/store-llms';
import { useChatStore } from '~/common/state/store-chats';
@@ -83,13 +83,18 @@ export function autoSuggestions(conversationId: string, assistantMessageId: stri
// Follow-up: Auto-Diagrams
if (suggestDiagrams) {
llmChatGenerateOrThrow(funcLLMId, [
{ role: 'system', content: systemMessage.text },
{ role: 'user', content: userMessage.text },
{ role: 'assistant', content: assistantMessageText },
], [suggestPlantUMLFn], 'draw_plantuml_diagram',
const instructions: VChatMessageIn[] = [
{ role: 'system', content: systemMessage.text },
{ role: 'user', content: userMessage.text },
{ role: 'assistant', content: assistantMessageText },
];
llmChatGenerateOrThrow(
funcLLMId,
instructions,
'chat-followup-diagram', conversationId,
[suggestPlantUMLFn], 'draw_plantuml_diagram',
).then(chatResponse => {
// cheap way to check if the function was supported
if (!('function_arguments' in chatResponse))
return;
+16 -14
View File
@@ -1,5 +1,5 @@
import { getFastLLMId } from '~/modules/llms/store-llms';
import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
import { useChatStore } from '~/common/state/store-chats';
@@ -34,21 +34,23 @@ export async function conversationAutoTitle(conversationId: string, forceReplace
try {
// LLM chat-generate call
const instructions: VChatMessageIn[] = [
{ role: 'system', content: `You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.` },
{
role: 'user', content:
'Analyze the given short conversation (every line is truncated) and extract a concise chat title that ' +
'summarizes the conversation in as little as a couple of words.\n' +
'Only respond with the lowercase short title and nothing else.\n' +
'\n' +
'```\n' +
historyLines.join('\n') +
'```\n',
},
];
const chatResponse = await llmChatGenerateOrThrow(
fastLLMId,
[
{ role: 'system', content: `You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.` },
{
role: 'user', content:
'Analyze the given short conversation (every line is truncated) and extract a concise chat title that ' +
'summarizes the conversation in as little as a couple of words.\n' +
'Only respond with the lowercase short title and nothing else.\n' +
'\n' +
'```\n' +
historyLines.join('\n') +
'```\n',
},
],
instructions,
'chat-ai-title', conversationId,
null, null,
);
@@ -1,5 +1,5 @@
import { getFastLLMId } from '~/modules/llms/store-llms';
import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
const simpleImagineSystemPrompt =
@@ -10,14 +10,15 @@ Provide output as a lowercase prompt and nothing else.`;
/**
* Creates a caption for a drawing or photo given some description - used to elevate the quality of the imaging
*/
export async function imaginePromptFromText(messageText: string): Promise<string | null> {
export async function imaginePromptFromText(messageText: string, contextRef: string): Promise<string | null> {
const fastLLMId = getFastLLMId();
if (!fastLLMId) return null;
try {
const chatResponse = await llmChatGenerateOrThrow(fastLLMId, [
const instructions: VChatMessageIn[] = [
{ role: 'system', content: simpleImagineSystemPrompt },
{ role: 'user', content: 'Write a prompt, based on the following input.\n\n```\n' + messageText.slice(0, 1000) + '\n```\n' },
], null, null);
];
const chatResponse = await llmChatGenerateOrThrow(fastLLMId, instructions, 'draw-expand-prompt', contextRef, null, null);
return chatResponse.content?.trim() ?? null;
} catch (error: any) {
console.error('imaginePromptFromText: fetch request error:', error);
+1 -1
View File
@@ -132,7 +132,7 @@ export class Agent {
S.messages.push({ role: 'user', content: prompt });
let content: string;
try {
content = (await llmChatGenerateOrThrow(llmId, S.messages, null, null, 500)).content;
content = (await llmChatGenerateOrThrow(llmId, S.messages, 'chat-react-turn', null, null, null, 500)).content;
} catch (error: any) {
content = `Error in llmChatGenerateOrThrow: ${error}`;
}
+4 -3
View File
@@ -1,5 +1,5 @@
import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
// prompt to be tried when doing recursive summerization.
@@ -80,10 +80,11 @@ async function cleanUpContent(chunk: string, llmId: DLLMId, _ignored_was_targetW
const autoResponseTokensSize = contextTokens ? Math.floor(contextTokens * outputTokenShare) : null;
try {
const chatResponse = await llmChatGenerateOrThrow(llmId, [
const instructions: VChatMessageIn[] = [
{ role: 'system', content: cleanupPrompt },
{ role: 'user', content: chunk },
], null, null, autoResponseTokensSize ?? undefined);
];
const chatResponse = await llmChatGenerateOrThrow(llmId, instructions, 'chat-ai-summarize', null, null, null, autoResponseTokensSize ?? undefined);
return chatResponse?.content ?? '';
} catch (error: any) {
return '';
+2 -2
View File
@@ -1,7 +1,7 @@
import * as React from 'react';
import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
// set to true to log to the console
@@ -20,7 +20,7 @@ export interface LLMChainStep {
/**
* React hook to manage a chain of LLM transformations.
*/
export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatContextName, contextRef: VChatContextRef) {
export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatStreamContextName, contextRef: VChatContextRef) {
// state
const [chain, setChain] = React.useState<ChainState | null>(null);
+2 -2
View File
@@ -1,7 +1,7 @@
import * as React from 'react';
import type { DLLMId } from '~/modules/llms/store-llms';
import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
export function useStreamChatText() {
@@ -13,7 +13,7 @@ export function useStreamChatText() {
const abortControllerRef = React.useRef<AbortController | null>(null);
const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatContextName, contextRef: VChatContextRef) => {
const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatStreamContextName, contextRef: VChatContextRef) => {
setStreamError(null);
setPartialText(null);
setText(null);
+9 -12
View File
@@ -2,7 +2,7 @@ import { sendGAEvent } from '@next/third-parties/google';
import { hasGoogleAnalytics } from '~/common/components/GoogleAnalytics';
import type { ModelDescriptionSchema } from './server/llm.server.types';
import type { GenerateContextNameSchema, ModelDescriptionSchema, StreamingContextNameSchema } from './server/llm.server.types';
import type { OpenAIWire } from './server/openai/openai.wiretypes';
import type { StreamingClientUpdate } from './vendors/unifiedStreamingClient';
import { DLLM, DLLMId, DModelSource, DModelSourceId, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, useModelsStore } from './store-llms';
@@ -21,14 +21,8 @@ export interface VChatMessageIn {
export type VChatFunctionIn = OpenAIWire.ChatCompletion.RequestFunctionDef;
export type VChatContextName =
| 'conversation'
| 'ai-diagram'
| 'ai-flattener'
| 'beam-scatter'
| 'beam-gather'
| 'call'
| 'persona-extract';
export type VChatStreamContextName = StreamingContextNameSchema;
export type VChatGenerateContextName = GenerateContextNameSchema;
export type VChatContextRef = string;
export interface VChatMessageOut {
@@ -122,7 +116,10 @@ function modelDescriptionToDLLMOpenAIOptions<TSourceSetup, TLLMOptions>(model: M
export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
llmId: DLLMId,
messages: VChatMessageIn[],
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
contextName: VChatGenerateContextName,
contextRef: VChatContextRef | null,
functions: VChatFunctionIn[] | null,
forceFunctionName: string | null,
maxTokens?: number,
): Promise<VChatMessageOut | VChatMessageOrFunctionCallOut> {
@@ -146,14 +143,14 @@ export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = u
await new Promise(resolve => setTimeout(resolve, delay));
// execute via the vendor
return await vendor.rpcChatGenerateOrThrow(access, options, messages, functions, forceFunctionName, maxTokens);
return await vendor.rpcChatGenerateOrThrow(access, options, messages, contextName, contextRef, functions, forceFunctionName, maxTokens);
}
export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
llmId: DLLMId,
messages: VChatMessageIn[],
contextName: VChatContextName,
contextName: VChatStreamContextName,
contextRef: VChatContextRef,
functions: VChatFunctionIn[] | null,
forceFunctionName: string | null,
@@ -1,4 +1,5 @@
import * as React from 'react';
import TimeAgo from 'react-timeago';
import { shallow } from 'zustand/shallow';
import { Box, Button, ButtonGroup, Divider, FormControl, Input, Switch, Tooltip, Typography } from '@mui/joy';
@@ -132,10 +133,10 @@ export function LLMOptionsModal(props: { id: DLLMId, onClose: () => void }) {
llm id: {llm.id}<br />
context tokens: <b>{llm.contextTokens ? llm.contextTokens.toLocaleString() : 'not provided'}</b>{` · `}
max output tokens: <b>{llm.maxOutputTokens ? llm.maxOutputTokens.toLocaleString() : 'not provided'}</b><br />
{!!llm.created && <>created: {(new Date(llm.created * 1000)).toLocaleString()}<br /></>}
{!!llm.created && <>created: <TimeAgo date={new Date(llm.created * 1000)} /><br /></>}
{/*· tags: {llm.tags.join(', ')}*/}
{!!llm.pricing && <>pricing: $<b>{llm.pricing.chatIn || '(unk) '}</b>/M in, $<b>{llm.pricing.chatOut || '(unk) '}</b>/M out<br /></>}
{!!llm.benchmark && <>benchmark: <b>{llm.benchmark.cbaElo?.toLocaleString() || '(unk) '}</b> CBA Elo<br /></>}
{/*{!!llm.benchmark && <>benchmark: <b>{llm.benchmark.cbaElo?.toLocaleString() || '(unk) '}</b> CBA Elo<br /></>}*/}
config: {JSON.stringify(llm.options)}
</Typography>
</Box>}
@@ -4,14 +4,71 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Vision } from '../../store-llms';
const roundTime = (date: string) => Math.round(new Date(date).getTime() / 1000);
export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: boolean })[] = [
// Claude 3.5 models - https://docs.anthropic.com/en/docs/about-claude/models
// {
// id: 'claude-3.5-opus', // ...
// label: 'Claude 3.5 Opus',
// created: roundTime(?),
// description: ?,
// contextWindow: 200000 ?, // Characters
// maxCompletionTokens: 4096 ?,
// trainingDataCutoff: ?,
// interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
// pricing: { chatIn: 15, chatOut: 75 },
// benchmark: {
// cbaElo: 1256, // Placeholder
// cbaMmlu: 86.8, // Placeholder
// },
// },
{
id: 'claude-3-5-sonnet-20241022',
label: 'Claude 3.5 Sonnet',
created: roundTime('2024-10-22 06:00'),
description: 'Most intelligent Claude model to date',
contextWindow: 200000, // Characters
maxCompletionTokens: 8192,
trainingDataCutoff: 'Apr 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
pricing: { chatIn: 3, chatOut: 15 },
benchmark: { cbaElo: 1269, cbaMmlu: 88.7 }, // moved from 3.5 Sonnet (Previous Version), TO UPDATE!!
},
{
id: 'claude-3-5-sonnet-20240620',
label: 'Claude 3.5 Sonnet (Previous)',
created: roundTime('2024-06-20 06:00'),
description: 'The most intelligent Claude model',
contextWindow: 200000, // Characters
maxCompletionTokens: 8192,
trainingDataCutoff: 'Apr 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
pricing: { chatIn: 3, chatOut: 15 },
benchmark: { cbaElo: 1269 - 0.1, cbaMmlu: 88.7 - 0.1 },
hidden: true,
},
// {
// id: 'claude-3.5-haiku', // ...
// label: 'Claude 3.5 Haiku',
// created: roundTime(?),
// description: ?,
// contextWindow: 200000 ?, // Characters
// maxCompletionTokens: 4096 ?,
// trainingDataCutoff: ?,
// interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
// pricing: { chatIn: 0.25, chatOut: 1.25 },
// benchmark: {
// cbaElo: 1181, // Placeholder
// cbaMmlu: 75.2, // Placeholder
// },
// },
// Claude-3 models - https://docs.anthropic.com/claude/docs/models-overview#model-comparison
// Claude 3 models
{
id: 'claude-3-opus-20240229',
label: 'Claude 3 Opus',
created: roundTime('2024-02-29'),
description: 'Most powerful model for highly complex tasks',
description: 'Powerful model for complex tasks',
contextWindow: 200000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Aug 2023',
@@ -23,19 +80,21 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
id: 'claude-3-sonnet-20240229',
label: 'Claude 3 Sonnet',
created: roundTime('2024-02-29'),
description: 'Ideal balance of intelligence and speed for enterprise workloads',
description: 'Balance of speed, cost, and performance',
contextWindow: 200000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Aug 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
pricing: { chatIn: 3, chatOut: 15 },
benchmark: { cbaElo: 1203, cbaMmlu: 79 },
hidden: true,
isLegacy: true,
},
{
id: 'claude-3-haiku-20240307',
label: 'Claude 3 Haiku',
created: roundTime('2024-03-07'),
description: 'Fastest and most compact model for near-instant responsiveness',
description: 'Fastest, most cost-effective model',
contextWindow: 200000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Aug 2023',
@@ -55,6 +114,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
interfaces: [LLM_IF_OAI_Chat],
pricing: { chatIn: 8, chatOut: 24 },
benchmark: { cbaElo: 1119 },
hidden: true,
},
{
id: 'claude-2.0',
@@ -77,25 +137,6 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
maxCompletionTokens: 4096,
interfaces: [LLM_IF_OAI_Chat],
pricing: { chatIn: 0.8, chatOut: 2.4 },
},
{
id: 'claude-instant-1.1',
label: 'Claude Instant 1.1',
created: roundTime('2023-03-14'),
description: 'Precise and fast',
contextWindow: 100000,
maxCompletionTokens: 2048,
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
},
{
id: 'claude-1.3',
label: 'Claude 1.3',
created: roundTime('2023-03-14'),
description: 'Claude 1.3 is the latest version of Claude v1',
contextWindow: 100000,
maxCompletionTokens: 4096,
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
},
];
@@ -8,7 +8,7 @@ import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
import { fixupHost } from '~/common/util/urlUtils';
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema } from '../llm.server.types';
import { AnthropicWireMessagesRequest, anthropicWireMessagesRequestSchema, AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic.wiretypes';
import { hardcodedAnthropicModels } from './anthropic.models';
@@ -17,7 +17,9 @@ import { hardcodedAnthropicModels } from './anthropic.models';
// Default hosts
const DEFAULT_API_VERSION_HEADERS = {
'anthropic-version': '2023-06-01',
'anthropic-beta': 'messages-2023-12-15',
// Former Betas:
// - messages-2023-12-15: to use the Messages API
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
};
const DEFAULT_MAX_TOKENS = 2048;
const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
@@ -158,7 +160,11 @@ const listModelsInputSchema = z.object({
const chatGenerateInputSchema = z.object({
access: anthropicAccessSchema,
model: openAIModelSchema, history: openAIHistorySchema,
model: openAIModelSchema,
history: openAIHistorySchema,
// functions: openAIFunctionsSchema.optional(),
// forceFunctionName: z.string().optional(),
context: llmsGenerateContextSchema.optional(),
});
+371 -119
View File
@@ -1,6 +1,10 @@
import type { GeminiModelSchema } from './gemini.wiretypes';
import type { ModelDescriptionSchema } from '../llm.server.types';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '../../store-llms';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '../../store-llms';
// dev options
const DEV_DEBUG_GEMINI_MODELS = false;
// supported interfaces
@@ -8,7 +12,21 @@ const geminiChatInterfaces: GeminiModelSchema['supportedGenerationMethods'] = ['
// unsupported interfaces
const filterUnallowedNames = ['Legacy'];
const filterUnallowedInterfaces: GeminiModelSchema['supportedGenerationMethods'] = ['generateAnswer', 'embedContent', 'embedText'];
const filterUnallowedInterfaces: GeminiModelSchema['supportedGenerationMethods'] = [
'generateAnswer', // e.g. removes "models/aqa"
'embedContent', // e.g. removes "models/embedding-001"
'embedText', // e.g. removes "models/text-embedding-004"
'predict', // e.g. removes "models/imagen-3.0-generate-002" (appeared on 2025-02-09)
];
const filterLyingModelNames: GeminiModelSchema['name'][] = [
// 2025-02-27: verified, old model is no more
'models/gemini-2.0-flash-exp', // verified, replaced by gemini-2.0-flash, which is non-free anymore
// 2025-02-09 update: as of now they cleared the list, so we restart
// 2024-12-10: name of models that are not what they say they are (e.g. 1114 is actually 1121 as of )
'models/gemini-1.5-flash-8b-exp-0924', // replaced by non-free
'models/gemini-1.5-flash-8b-exp-0827', // replaced by non-free
];
/* Manual models details
@@ -16,172 +34,405 @@ const filterUnallowedInterfaces: GeminiModelSchema['supportedGenerationMethods']
- Latest version gemini-1.0-pro-latest <model>-<generation>-<variation>-latest
- Latest stable version gemini-1.0-pro <model>-<generation>-<variation>
- Stable versions gemini-1.0-pro-001 <model>-<generation>-<variation>-<version>
Gemini capabilities chart (updated 2024-10-01):
- [table stakes] System instructions
- JSON Mode, with optional JSON Schema [NOTE: JSON Schema is poorly supported?]
- Adjustable Safety Settings
- Caching
- Tuning
- [good] Function calling, with configuration
- [great] Code execution
*/
// Experimental Gemini models are Free of charge
const geminiExpPricingFree: ModelDescriptionSchema['pricing'] = {
// input: 'free', output: 'free',
};
const gemini20FlashPricing: ModelDescriptionSchema['pricing'] = {
chatIn: 0.10, // inputAudio: 0.70,
chatOut: 0.40,
};
const gemini20FlashLitePricing: ModelDescriptionSchema['pricing'] = {
chatIn: 0.075,
chatOut: 0.30,
};
const gemini15FlashPricing: ModelDescriptionSchema['pricing'] = {
chatIn: 0.075,
chatOut: 0.30,
};
const gemini15Flash8BPricing: ModelDescriptionSchema['pricing'] = {
chatIn: 0.0375,
chatOut: 0.15,
};
const gemini15ProPricing: ModelDescriptionSchema['pricing'] = {
chatIn: 1.25,
chatOut: 5.00,
};
const _knownGeminiModels: ({
id: string,
labelOverride?: string,
isNewest?: boolean,
isPreview?: boolean
symLink?: string
} & Pick<ModelDescriptionSchema, 'interfaces' | 'pricing' | 'trainingDataCutoff' | 'hidden'>)[] = [
isPreview?: boolean,
symLink?: string,
deprecated?: string, // Gemini may provide deprecation dates
_delete?: boolean, // some gemini models are not acknowledged by Google Docs anymore, and leaving them in the list will confuse users
} & Pick<ModelDescriptionSchema, 'interfaces' | 'pricing' | 'hidden' | 'benchmark'>)[] = [
// Generation 1.5
/// Generation 2.5
// 2.5 Pro Experimental
{
id: 'models/gemini-2.5-pro-exp-03-25',
isPreview: true,
pricing: geminiExpPricingFree,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1443 },
},
/// Generation 2.0
// 2.0 Experimental - Pro
{
hidden: true, // showing the 2.5 instead
id: 'models/gemini-2.0-pro-exp-02-05', // Base model: Gemini 2.0 Pro
isPreview: true,
pricing: geminiExpPricingFree,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1380 },
},
{
hidden: true, // only keeping the latest
id: 'models/gemini-2.0-pro-exp',
symLink: 'models/gemini-2.0-pro-exp-02-05',
// copied from symlink
isPreview: true,
pricing: geminiExpPricingFree,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1380 },
},
{
_delete: true, // replaced by gemini-2.0-pro-exp-02-05, 2025-02-27: verified, old model is no more
id: 'models/gemini-exp-1206',
labelOverride: 'Gemini 2.0 Pro Experimental 1206',
isPreview: true,
pricing: geminiExpPricingFree,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1373 },
},
// 2.0 Experimental - Flash Thinking
{
hidden: true, // only keeping the latest
id: 'models/gemini-2.0-flash-thinking-exp', // alias to the latest Flash Thinking model
labelOverride: 'Gemini 2.0 Flash Thinking Experimental',
symLink: 'models/gemini-2.0-flash-thinking-exp-01-21',
// copied from symlink
pricing: geminiExpPricingFree,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
benchmark: { cbaElo: 1385 },
isPreview: true,
},
{
id: 'models/gemini-2.0-flash-thinking-exp-01-21',
pricing: geminiExpPricingFree,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
benchmark: { cbaElo: 1385 },
isPreview: true,
},
{
hidden: true, // replaced by gemini-2.0-flash-thinking-exp-01-21 - 2025-02-27: seems still different on the API, hence no deletion yet
id: 'models/gemini-2.0-flash-thinking-exp-1219',
labelOverride: 'Gemini 2.0 Flash Thinking Experimental 12-19',
pricing: geminiExpPricingFree,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
benchmark: { cbaElo: 1363 },
isPreview: true,
},
// 2.0 Experimental - Flash Image Generation
{
id: 'models/gemini-2.0-flash-exp-image-generation',
// labelOverride: 'Gemini 2.0 Flash Native Image Generation',
pricing: geminiExpPricingFree,
interfaces: [
LLM_IF_OAI_Chat, LLM_IF_OAI_Vision,
// LLM_IF_HOTFIX_StripSys0, // This first Gemini Image Generation model does not support the developer instruction
],
isPreview: true,
},
// 2.0 Flash
{
id: 'models/gemini-2.0-flash-001',
pricing: gemini20FlashPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1358 },
},
{
id: 'models/gemini-2.0-flash',
symLink: 'models/gemini-2.0-flash-001',
// copied from symlink
pricing: gemini20FlashPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1358 },
},
// 2.0 Flash Lite
{
id: 'models/gemini-2.0-flash-lite',
pricing: gemini20FlashLitePricing,
symLink: 'models/gemini-2.0-flash-lite-001',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1309 },
},
{
id: 'models/gemini-2.0-flash-lite-001',
pricing: gemini20FlashLitePricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1309 },
},
{
hidden: true, // discouraged, as the official is out
id: 'models/gemini-2.0-flash-lite-preview-02-05',
isPreview: true,
pricing: gemini20FlashLitePricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1309 },
},
{
id: 'models/gemini-2.0-flash-lite-preview',
symLink: 'models/gemini-2.0-flash-lite-preview-02-05',
// coped from symlink
isPreview: true,
pricing: gemini20FlashLitePricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1309 },
},
/// Generation 1.5
// Gemini 1.5 Flash Models
{
id: 'models/gemini-1.5-flash-latest', // updated regularly and might be a preview version
isNewest: true,
isPreview: true,
pricing: {
chatIn: 0.70, // 0.35 up to 128k tokens, 0.70 prompts > 128k tokens
chatOut: 2.10, // 1.05 up to 128k tokens, 2.10 prompts > 128k tokens
},
trainingDataCutoff: 'May 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // input: audio, images and text
pricing: gemini15FlashPricing,
// symLink: '-002 or newer',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
},
{
id: 'models/gemini-1.5-flash',
// copied from above
pricing: {
chatIn: 0.70, // 0.35 up to 128k tokens, 0.70 prompts > 128k tokens
chatOut: 2.10, // 1.05 up to 128k tokens, 2.10 prompts > 128k tokens
},
trainingDataCutoff: 'Apr 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // input: audio, images and text
// Defaults to version 002 on Oct 8, 2024
symLink: 'models/gemini-1.5-flash-002',
pricing: gemini15FlashPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1271 },
},
{
id: 'models/gemini-1.5-flash-002', // new stable version
pricing: gemini15FlashPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1271 },
hidden: true,
},
{
id: 'models/gemini-1.5-flash-001',
// copied from above
pricing: {
chatIn: 0.70, // 0.35 up to 128k tokens, 0.70 prompts > 128k tokens
chatOut: 2.10, // 1.05 up to 128k tokens, 2.10 prompts > 128k tokens
},
trainingDataCutoff: 'Apr 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // input: audio, images and text
id: 'models/gemini-1.5-flash-001', // previous stable version
pricing: gemini15FlashPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1227 },
hidden: true,
},
{
id: 'models/gemini-1.5-flash-001-tuning', // supports model tuning
pricing: gemini15FlashPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn /* Tuning ... */],
hidden: true,
},
// Gemini 1.5 Flash-8B Models
{
id: 'models/gemini-1.5-pro-latest', // updated regularly and might be a preview version
isNewest: true,
isPreview: true,
pricing: {
chatIn: 7.00, // $3.50 / 1 million tokens (for prompts up to 128K tokens), $7.00 / 1 million tokens (for prompts longer than 128K)
chatOut: 21.00, // $10.50 / 1 million tokens (128K or less), $21.00 / 1 million tokens (128K+)
},
trainingDataCutoff: 'May 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // input: audio, images and text
id: 'models/gemini-1.5-flash-8b-latest',
isPreview: false,
pricing: gemini15Flash8BPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
},
{
id: 'models/gemini-1.5-pro', // latest stable -> 001
// copied from above
pricing: {
chatIn: 7.00, // $3.50 / 1 million tokens (for prompts up to 128K tokens), $7.00 / 1 million tokens (for prompts longer than 128K)
chatOut: 21.00, // $10.50 / 1 million tokens (128K or less), $21.00 / 1 million tokens (128K+)
},
trainingDataCutoff: 'Apr 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json],
id: 'models/gemini-1.5-flash-8b',
symLink: 'models/gemini-1.5-flash-8b-001',
pricing: gemini15Flash8BPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1213 },
},
{
id: 'models/gemini-1.5-flash-8b-001',
pricing: gemini15Flash8BPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1213 },
hidden: true,
},
// Gemini 1.5 Pro Models
{
id: 'models/gemini-1.5-pro-latest', // updated to latest stable version
pricing: gemini15ProPricing,
// symLink: '-002 or newer',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
},
{
id: 'models/gemini-1.5-pro',
symLink: 'models/gemini-1.5-pro-002',
pricing: gemini15ProPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1302 },
},
{
id: 'models/gemini-1.5-pro-002',
pricing: gemini15ProPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1302 },
hidden: true,
},
{
id: 'models/gemini-1.5-pro-001', // stable snapshot
// copied from above
pricing: {
chatIn: 7.00, // $3.50 / 1 million tokens (for prompts up to 128K tokens), $7.00 / 1 million tokens (for prompts longer than 128K)
chatOut: 21.00, // $10.50 / 1 million tokens (128K or less), $21.00 / 1 million tokens (128K+)
},
trainingDataCutoff: 'Apr 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json],
id: 'models/gemini-1.5-pro-001',
pricing: gemini15ProPricing,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json, LLM_IF_OAI_Fn],
benchmark: { cbaElo: 1260 },
hidden: true,
},
// Generation 1.0
{
id: 'models/gemini-1.0-pro-latest',
pricing: {
chatIn: 0.50,
chatOut: 1.50,
},
interfaces: [LLM_IF_OAI_Chat],
},
{
id: 'models/gemini-1.0-pro',
pricing: {
chatIn: 0.50,
chatOut: 1.50,
},
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
},
{
id: 'models/gemini-1.0-pro-001',
pricing: {
chatIn: 0.50,
chatOut: 1.50,
},
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
},
/// Generation 1.0
// Generation 1.0 + Vision
// Gemini 1.0 Pro Vision Model
{
id: 'models/gemini-1.0-pro-vision-latest',
pricing: {
chatIn: 0.50,
chatOut: 1.50,
},
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], // Text and Images
hidden: true,
},
// Older symlinks
{
id: 'models/gemini-pro',
symLink: 'models/gemini-1.0-pro',
// copied from symlinked
pricing: {
chatIn: 0.50,
chatOut: 1.50,
},
interfaces: [LLM_IF_OAI_Chat],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
hidden: true,
_delete: true, // confusing
},
{
id: 'models/gemini-pro-vision',
// copied from symlinked
symLink: 'models/gemini-1.0-pro-vision',
pricing: {
chatIn: 0.50,
chatOut: 1.50,
},
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision], // Text and Images
hidden: true,
_delete: true, // confusing
},
/// Experimental
// LearnLM Experimental Model
{
id: 'models/learnlm-1.5-pro-experimental',
isPreview: true,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
pricing: geminiExpPricingFree,
// hidden: true,
// _delete: true,
},
{
id: 'models/gemma-3-27b-it',
isPreview: true,
interfaces: [
LLM_IF_OAI_Chat,
// LLM_IF_HOTFIX_StripImages, /* "Image input modality is not enabled for models/gemma-3-27b-it" */
// LLM_IF_HOTFIX_Sys0ToUsr0, /* "Developer instruction is not enabled for models/gemma-3-27b-it" */
],
// pricing: geminiExpPricingFree,
// hidden: true,
// _delete: true,
},
];
export function geminiFilterModels(geminiModel: GeminiModelSchema): boolean {
const isAllowed = !filterUnallowedNames.some(name => geminiModel.displayName.includes(name));
const isSupported = !filterUnallowedInterfaces.some(iface => geminiModel.supportedGenerationMethods.includes(iface));
return isAllowed && isSupported;
const isWhatItSaysItIs = !filterLyingModelNames.includes(geminiModel.name);
return isAllowed && isSupported && isWhatItSaysItIs;
}
const _sortOderIdPrefix: string[] = [
'models/gemini-exp',
'models/gemini-2.5-pro',
'models/gemini-2.0-pro',
'models/gemini-2.0-flash-exp-image-generation',
'models/gemini-2.0-flash-thinking',
'models/gemini-2.0-flash-0',
'models/gemini-2.0-flash',
'models/gemini-2.0-flash-lite',
'models/gemini-1.5-pro',
'models/gemini-1.5-flash',
'models/gemini-1.5-flash-8b',
'models/gemini-1.0-pro',
'models/gemini-pro',
'models/gemma',
'models/learnlm',
] as const;
export function geminiSortModels(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
// links to the bottom
const aIsLink = a.label.startsWith('🔗');
const bIsLink = b.label.startsWith('🔗');
if (aIsLink && !bIsLink) return 1;
if (!aIsLink && bIsLink) return -1;
// hidden to the bottom, then names descending
if (a.hidden && !b.hidden) return 1;
if (!a.hidden && b.hidden) return -1;
// if (a.hidden && !b.hidden) return 1;
// if (!a.hidden && b.hidden) return -1;
// models beginning with 'gemini-' to the top
// const aGemini = a.label.startsWith('Gemini');
// const bGemini = b.label.startsWith('Gemini');
// if (aGemini && !bGemini) return -1;
// if (!aGemini && bGemini) return 1;
// sort by sortOrderIdPrefix
const aSortIdx = _sortOderIdPrefix.findIndex(p => a.id.startsWith(p));
const bSortIdx = _sortOderIdPrefix.findIndex(p => b.id.startsWith(p));
if (aSortIdx !== -1 && bSortIdx !== -1) {
if (aSortIdx < bSortIdx) return -1;
if (aSortIdx > bSortIdx) return 1;
}
// sort by label descending
return b.label.localeCompare(a.label);
}
export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): ModelDescriptionSchema {
export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): ModelDescriptionSchema | null {
const { description, displayName, name: modelId, supportedGenerationMethods } = geminiModel;
// if (DEV_DEBUG_GEMINI_MODELS)
// console.log('geminiModelToModelDescription', geminiModel);
// find known manual mapping
const knownModel = _knownGeminiModels.find(m => m.id === modelId);
if (!knownModel && DEV_DEBUG_GEMINI_MODELS)
console.warn('geminiModelToModelDescription: unknown model', modelId, geminiModel);
// handle _delete
if (knownModel?._delete)
return null;
// handle symlinks
const label = knownModel?.symLink
? `🔗 ${displayName.replace('1.0', '')}${knownModel.symLink}`
: displayName;
let label = knownModel?.symLink
? `🔗 ${knownModel?.labelOverride || displayName}${knownModel.symLink}`
: knownModel?.labelOverride || displayName;
// FIX: the Gemini 1114 model now returns 1121 as the version.. highlight the issue
// if (geminiModel.name.endsWith('1114') && label.endsWith('1121'))
// label += ' (really: 1114)';
// handle hidden models
const hasChatInterfaces = supportedGenerationMethods.some(iface => geminiChatInterfaces.includes(iface));
@@ -193,14 +444,13 @@ export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): M
// description
const { version, topK, topP, temperature } = geminiModel;
const descriptionLong = description + ` (Version: ${version}, Defaults: temperature=${temperature}, topP=${topP}, topK=${topK}, interfaces=[${supportedGenerationMethods.join(',')}])`;
const descriptionLong = (description || 'No description.') + ` (Version: ${version}, Defaults: temperature=${temperature}, topP=${topP}, topK=${topK}, interfaces=[${supportedGenerationMethods.join(',')}])`;
// use known interfaces, or add chat if this is a generateContent model
const interfaces: ModelDescriptionSchema['interfaces'] = knownModel?.interfaces || [];
if (!interfaces.length && hasChatInterfaces) {
interfaces.push(LLM_IF_OAI_Chat);
// if (geminiVisionNames.some(name => modelId.includes(name)))
// interfaces.push(LLM_IF_OAI_Vision);
// newer models get good capabilities by default
interfaces.push(LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Vision, LLM_IF_OAI_Json);
}
return {
@@ -211,11 +461,13 @@ export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): M
description: descriptionLong,
contextWindow: contextWindow,
maxCompletionTokens: outputTokenLimit,
trainingDataCutoff: knownModel?.trainingDataCutoff,
// trainingDataCutoff: knownModel?.trainingDataCutoff, // disabled as we don't get this from Gemini
interfaces,
// parameterSpecs: knownModel?.parameterSpecs,
// rateLimits: isGeminiPro ? { reqPerMinute: 60 } : undefined,
// benchmarks: ...
pricing: knownModel?.pricing, // TODO: needs <>128k, and per-character and per-image pricing
benchmark: knownModel?.benchmark,
pricing: knownModel?.pricing,
hidden,
// deprecated: knownModel?.deprecated,
};
}
@@ -8,7 +8,7 @@ import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
import { fixupHost } from '~/common/util/urlUtils';
import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, type ModelDescriptionSchema } from '../llm.server.types';
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
@@ -120,8 +120,11 @@ const accessOnlySchema = z.object({
const chatGenerateInputSchema = z.object({
access: geminiAccessSchema,
model: openAIModelSchema, history: openAIHistorySchema,
// functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
model: openAIModelSchema,
history: openAIHistorySchema,
// functions: openAIFunctionsSchema.optional(),
// forceFunctionName: z.string().optional(),
context: llmsGenerateContextSchema.optional(),
});
@@ -145,9 +148,10 @@ export const llmGeminiRouter = createTRPCRouter({
// as the List API already all the info on all the models
// map to our output schema
const models = detailedModels
const models = (detailedModels
.filter(geminiFilterModels)
.map(geminiModel => geminiModelToModelDescription(geminiModel))
.filter(model => !!model) as ModelDescriptionSchema[])
.sort(geminiSortModels);
return {
@@ -9,27 +9,30 @@ export const geminiModelsStreamGenerateContentPath = '/v1beta/{model=models/*}:s
// models.list = /v1beta/models
const Methods_enum = z.enum([
'bidiGenerateContent', // appeared on 2024-12, see https://github.com/enricoros/big-AGI/issues/700
'createCachedContent', // appeared on 2024-06-10, see https://github.com/enricoros/big-AGI/issues/565
'countMessageTokens',
'countTextTokens',
'countTokens',
'createTunedModel',
'createTunedTextModel',
'embedContent',
'embedText',
'generateAnswer',
'generateContent',
'generateMessage',
'generateText',
]);
const geminiModelSchema = z.object({
name: z.string(),
version: z.string(),
displayName: z.string(),
description: z.string(),
description: z.string().optional(),
inputTokenLimit: z.number().int().min(1),
outputTokenLimit: z.number().int().min(1),
supportedGenerationMethods: z.array(z.enum([
'countMessageTokens',
'countTextTokens',
'countTokens',
'createTunedModel',
'createTunedTextModel',
'embedContent',
'embedText',
'generateAnswer',
'generateContent',
'generateMessage',
'generateText',
])),
supportedGenerationMethods: z.array(z.union([Methods_enum, z.string()])), // relaxed with z.union to not break on expansion
temperature: z.number().optional(),
topP: z.number().optional(),
topK: z.number().optional(),
@@ -171,7 +174,7 @@ export const geminiGeneratedContentResponseSchema = z.object({
// either all requested candidates are returned or no candidates at all
// no candidates are returned only if there was something wrong with the prompt (see promptFeedback)
candidates: z.array(z.object({
index: z.number(),
index: z.number().optional(),
content: geminiContentSchema.optional(), // this can be missing if the finishReason is not 'MAX_TOKENS'
finishReason: geminiFinishReasonSchema.optional(),
safetyRatings: z.array(geminiSafetyRatingSchema).optional(), // undefined when finishReason is 'RECITATION'
@@ -22,6 +22,9 @@ import type { OpenAIWire } from './openai/openai.wiretypes';
import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai/openai.router';
import { llmsStreamingContextSchema } from './llm.server.types';
// configuration
const USER_SYMBOL_MAX_TOKENS = '🧱';
const USER_SYMBOL_PROMPT_BLOCKED = '🚫';
@@ -46,17 +49,14 @@ type MuxingFormat = 'sse' | 'json-nl';
*/
type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };
const streamingContextSchema = z.object({
method: z.literal('chat-stream'),
name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
ref: z.string(),
});
const chatStreamingInputSchema = z.object({
access: z.union([anthropicAccessSchema, geminiAccessSchema, ollamaAccessSchema, openAIAccessSchema]),
model: openAIModelSchema,
history: openAIHistorySchema,
context: streamingContextSchema,
// NOTE: made it optional for now as we have some old requests without it
// 2024-07-07: remove .optional()
context: llmsStreamingContextSchema.optional(),
});
export type ChatStreamingInputSchema = z.infer<typeof chatStreamingInputSchema>;
@@ -12,6 +12,8 @@ const pricingSchema = z.object({
const benchmarkSchema = z.object({
cbaElo: z.number().optional(),
cbaMmlu: z.number().optional(),
heCode: z.number().optional(), // HumanEval, code, 0-shot
vqaMmmu: z.number().optional(), // Visual Question Answering, MMMU, 0-shot
});
// const rateLimitsSchema = z.object({
@@ -46,6 +48,25 @@ export const llmsListModelsOutputSchema = z.object({
});
// Chat Generation Input (some parts of)
const generateContextNameSchema = z.enum(['chat-ai-title', 'chat-ai-summarize', 'chat-followup-diagram', 'chat-react-turn', 'draw-expand-prompt']);
export type GenerateContextNameSchema = z.infer<typeof generateContextNameSchema>;
export const llmsGenerateContextSchema = z.object({
method: z.literal('chat-generate'),
name: generateContextNameSchema,
ref: z.string(),
});
const streamingContextNameSchema = z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']);
export type StreamingContextNameSchema = z.infer<typeof streamingContextNameSchema>;
export const llmsStreamingContextSchema = z.object({
method: z.literal('chat-stream'),
name: streamingContextNameSchema,
ref: z.string(),
});
// (non-streaming) Chat Generation Output
export const llmsChatGenerateOutputSchema = z.object({
@@ -11,7 +11,7 @@ import { capitalizeFirstLetter } from '~/common/util/textUtils';
import { fixupHost } from '~/common/util/urlUtils';
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
import { WireOllamaChatCompletionInput, wireOllamaChunkedOutputSchema, wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama.wiretypes';
@@ -117,8 +117,11 @@ const adminPullModelSchema = z.object({
const chatGenerateInputSchema = z.object({
access: ollamaAccessSchema,
model: openAIModelSchema, history: openAIHistorySchema,
// functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
model: openAIModelSchema,
history: openAIHistorySchema,
// functions: openAIFunctionsSchema.optional(),
// forceFunctionName: z.string().optional(),
context: llmsGenerateContextSchema.optional(),
});
const listPullableOutputSchema = z.object({
@@ -0,0 +1,84 @@
// here for reference only - for future mapping of CBA scores to the model IDs
// const modelIdToPrefixMap: { [key: string]: string } = {
// // Anthropic models
// 'Claude 3.5 Sonnet': 'claude-3-5-sonnet-20240620',
// 'Claude 3 Opus': 'claude-3-opus-20240229',
// 'Claude 3 Sonnet': 'claude-3-sonnet-20240229',
// 'Claude 3 Haiku': 'claude-3-haiku-20240307',
// 'Claude-2.1': 'claude-2.1',
// 'Claude-2.0': 'claude-2.0',
// 'Claude-1': '', // No exact match
// 'Claude-Instant-1': 'claude-instant-1.2', // Closest match
//
// // Gemini models
// 'Gemini-1.5-Pro-Exp-0801': 'models/gemini-1.5-pro-latest', // Closest match
// 'Gemini Advanced App (2024-05-14)': '', // No exact match
// 'Gemini-1.5-Pro-001': 'models/gemini-1.5-pro-001',
// 'Gemini-1.5-Pro-Preview-0409': 'models/gemini-1.5-pro-latest', // Closest match
// 'Gemini-1.5-Flash-001': 'models/gemini-1.5-flash-001',
// 'Gemini App (2024-01-24)': '', // No exact match
// 'Gemini-1.0-Pro-001': 'models/gemini-1.0-pro-001',
// 'Gemini Pro': 'models/gemini-pro',
//
// // OpenAI models (from the previous file)
// 'GPT-4o-2024-05-13': 'gpt-4o-2024-05-13',
// 'GPT-4o-mini-2024-07-18': 'gpt-4o-mini-2024-07-18',
// 'GPT-4-Turbo-2024-04-09': 'gpt-4-turbo-2024-04-09',
// 'GPT-4-1106-preview': 'gpt-4-1106-preview',
// 'GPT-4-0125-preview': 'gpt-4-0125-preview',
// 'GPT-4-0314': 'gpt-4-0314',
// 'GPT-4-0613': 'gpt-4-0613',
// 'GPT-3.5-Turbo-0613': 'gpt-3.5-turbo-0613',
// 'GPT-3.5-Turbo-0314': 'gpt-3.5-turbo-0314',
// 'GPT-3.5-Turbo-0125': 'gpt-3.5-turbo-0125',
//
// // Mistral models (from the previous file)
// 'Mistral-Large-2402': 'mistral-large-2402',
// 'Mixtral-8x7b-Instruct-v0.1': 'mistralai/Mixtral-8x7B-Instruct-v0.1',
//
// // Other models without matches
// 'Gemini-1.5-Pro-Exp-0801': '',
// 'Meta-Llama-3.1-405b-Instruct': '',
// 'Gemini-1.5-Pro-001': '',
// 'Meta-Llama-3.1-70b-Instruct': '',
// 'Yi-Large-preview': '',
// 'Deepseek-v2-API-0628': '',
// 'Gemma-2-27b-it': '',
// 'Yi-Large': '',
// 'Nemotron-4-340B-Instruct': '',
// 'GLM-4-0520': '',
// 'Llama-3-70b-Instruct': '',
// 'Reka-Core-20240501': '',
// 'Command R+': '',
// 'Gemma-2-9b-it': '',
// 'Qwen2-72B-Instruct': '',
// 'GLM-4-0116': '',
// 'Qwen-Max-0428': '',
// 'DeepSeek-Coder-V2-Instruct': '',
// 'Reka-Flash-Preview-20240611': '',
// 'Meta-Llama-3.1-8b-Instruct': '',
// 'Qwen1.5-110B-Chat': '',
// 'Yi-1.5-34B-Chat': '',
// 'Reka-Flash-21B-online': '',
// 'Llama-3-8b-Instruct': '',
// 'Command R': '',
// 'Reka-Flash-21B': '',
// 'Qwen1.5-72B-Chat': '',
// 'Mixtral-8x22b-Instruct-v0.1': '',
// 'Zephyr-ORPO-141b-A35b-v0.1': '',
// 'Qwen1.5-32B-Chat': '',
// 'Mistral-Next': '',
// 'Phi-3-Medium-4k-Instruct': '',
// 'Starling-LM-7B-beta': '',
// 'Yi-34B-Chat': '',
// 'Qwen1.5-14B-Chat': '',
// 'WizardLM-70B-v1.0': '',
// 'Tulu-2-DPO-70B': '',
// 'DBRX-Instruct-Preview': '',
// 'Phi-3-Small-8k-Instruct': '',
// 'Llama-2-70b-chat': '',
// 'OpenChat-3.5-0106': '',
// 'Vicuna-33B': '',
// 'Snowflake Arctic Instruct': '',
// 'Starling-LM-7B-alpha': '',
// };
+215 -88
View File
@@ -9,34 +9,139 @@ import { wireTogetherAIListOutputSchema } from './togetherai.wiretypes';
// [Azure] / [OpenAI]
// https://platform.openai.com/docs/models
const _knownOpenAIChatModels: ManualMappings = [
// GPT-4o -> 2024-05-13
// GPT-4o -> 2024-05-13 (Starting October 2nd, 2024, gpt-4o will point to the gpt-4o-2024-08-06 snapshot)
{
idPrefix: 'gpt-4o',
label: 'GPT-4o',
description: 'Currently points to gpt-4o-2024-05-13.',
symLink: 'gpt-4o-2024-05-13',
description: 'Points to gpt-4o-2024-08-06 starting on Oct 2, 2024.',
symLink: 'gpt-4o-2024-08-06',
hidden: true,
// copied from symlinked
contextWindow: 128000,
maxCompletionTokens: 4096,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 5, chatOut: 15 },
benchmark: { cbaElo: 1310 },
pricing: { chatIn: 2.5, chatOut: 10 },
benchmark: { cbaElo: 1286 + 1 },
},
{
isLatest: true,
idPrefix: 'gpt-4o-2024-08-06',
label: 'GPT-4o (2024-08-06)',
description: 'Latest snapshot that supports Structured Outputs',
contextWindow: 128000,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], // + Structured Outputs?
pricing: { chatIn: 2.5, chatOut: 10 },
benchmark: { cbaElo: 1286 + 1 },
},
{
idPrefix: 'gpt-4o-2024-05-13',
label: 'GPT-4o (2024-05-13)',
description: 'Advanced, multimodal flagship model thats cheaper and faster than GPT-4 Turbo.',
description: 'Advanced, multimodal flagship model that\'s cheaper and faster than GPT-4 Turbo.',
contextWindow: 128000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 5, chatOut: 15 },
benchmark: { cbaElo: 1310 },
benchmark: { cbaElo: 1286 },
hidden: true,
},
{
idPrefix: 'chatgpt-4o-latest',
label: 'ChatGPT-4o Latest',
description: 'Intended for research and evaluation. Dynamic model continuously updated to the current version of GPT-4o in ChatGPT.',
contextWindow: 128000,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 5, chatOut: 15 },
},
// GPT-4o mini
{
idPrefix: 'gpt-4o-mini',
label: 'GPT-4o mini',
description: 'Currently points to gpt-4o-mini-2024-07-18.',
symLink: 'gpt-4o-mini-2024-07-18',
hidden: true,
// copied from symlinked
contextWindow: 128000,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 0.15, chatOut: 0.60 },
benchmark: { cbaElo: 1277, cbaMmlu: 82.0 },
},
{
idPrefix: 'gpt-4o-mini-2024-07-18',
label: 'GPT-4o Mini (2024-07-18)',
description: 'Affordable model for fast, lightweight tasks. GPT-4o mini is cheaper and more capable than GPT-3.5 Turbo.',
contextWindow: 128000,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 0.15, chatOut: 0.60 },
},
// o1-preview
{
idPrefix: 'o1-preview',
label: 'o1 Preview',
description: 'Supported in Big-AGI 2. Points to the most recent snapshot of the o1 model: o1-preview-2024-09-12',
symLink: 'o1-preview-2024-09-12',
hidden: true,
// copied from symlinked
contextWindow: 128000,
maxCompletionTokens: 32768,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
pricing: { chatIn: 15, chatOut: 60 },
isPreview: true,
},
{
hidden: true, // we can't support it in Big-AGI 1
idPrefix: 'o1-preview-2024-09-12',
label: 'o1 Preview (2024-09-12)',
description: 'Supported in Big-AGI 2. New reasoning model for complex tasks that require broad general knowledge.',
contextWindow: 128000,
maxCompletionTokens: 32768,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
pricing: { chatIn: 15, chatOut: 60 },
isPreview: true,
},
// o1-mini
{
idPrefix: 'o1-mini',
label: 'o1 Mini',
description: 'Supported in Big-AGI 2. Points to the most recent o1-mini snapshot: o1-mini-2024-09-12',
symLink: 'o1-mini-2024-09-12',
hidden: true,
// copied from symlinked
contextWindow: 128000,
maxCompletionTokens: 65536,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
pricing: { chatIn: 3, chatOut: 12 },
isPreview: true,
},
{
hidden: true, // we can't support it in Big-AGI 1
idPrefix: 'o1-mini-2024-09-12',
label: 'o1 Mini (2024-09-12)',
description: 'Supported in Big-AGI 2. Fast, cost-efficient reasoning model tailored to coding, math, and science use cases.',
contextWindow: 128000,
maxCompletionTokens: 65536,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
pricing: { chatIn: 3, chatOut: 12 },
isPreview: true,
},
// GPT4 Turbo with Vision -> 2024-04-09
@@ -52,7 +157,7 @@ const _knownOpenAIChatModels: ManualMappings = [
trainingDataCutoff: 'Dec 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 10, chatOut: 30 },
benchmark: { cbaElo: 1261 },
benchmark: { cbaElo: 1257 },
},
{
idPrefix: 'gpt-4-turbo-2024-04-09',
@@ -63,12 +168,12 @@ const _knownOpenAIChatModels: ManualMappings = [
trainingDataCutoff: 'Dec 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 10, chatOut: 30 },
benchmark: { cbaElo: 1261 },
benchmark: { cbaElo: 1257 },
},
// GPT4 Turbo Previews
{
idPrefix: 'gpt-4-turbo-preview', // GPT-4 Turbo preview model -> 0125
idPrefix: 'gpt-4-turbo-preview',
label: 'GPT-4 Preview Turbo',
description: 'GPT-4 Turbo preview model. Currently points to gpt-4-0125-preview.',
symLink: 'gpt-4-0125-preview',
@@ -80,63 +185,33 @@ const _knownOpenAIChatModels: ManualMappings = [
trainingDataCutoff: 'Dec 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 10, chatOut: 30 },
benchmark: { cbaElo: 1251 },
benchmark: { cbaElo: 1245 },
},
{
idPrefix: 'gpt-4-0125-preview', // GPT-4 Turbo preview model
idPrefix: 'gpt-4-0125-preview',
label: 'GPT-4 Turbo (0125)',
description: 'GPT-4 Turbo preview model intended to reduce cases of "laziness" where the model doesn\'t complete a task. Returns a maximum of 4,096 output tokens.',
isPreview: true,
description: 'GPT-4 Turbo preview model intended to reduce cases of "laziness" where the model doesn\'t complete a task.',
contextWindow: 128000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Dec 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 10, chatOut: 30 },
benchmark: { cbaElo: 1251 },
benchmark: { cbaElo: 1245 },
hidden: true,
},
{
idPrefix: 'gpt-4-1106-preview', // GPT-4 Turbo preview model
label: 'GPT-4 Turbo (1106)',
description: 'GPT-4 Turbo preview model featuring improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Returns a maximum of 4,096 output tokens.',
isPreview: true,
description: 'GPT-4 Turbo preview model featuring improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
contextWindow: 128000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Apr 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
pricing: { chatIn: 10, chatOut: 30 },
benchmark: { cbaElo: 1255 },
benchmark: { cbaElo: 1251 },
hidden: true,
},
// GPT4 Vision Previews
{
idPrefix: 'gpt-4-vision-preview', // GPT-4 Turbo vision preview
label: 'GPT-4 Preview Vision',
description: 'GPT-4 model with the ability to understand images, in addition to all other GPT-4 Turbo capabilities. This is a preview model, we recommend developers to now use gpt-4-turbo which includes vision capabilities. Currently points to gpt-4-1106-vision-preview.',
symLink: 'gpt-4-1106-vision-preview',
// copied from symlinked
isPreview: true,
contextWindow: 128000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Apr 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
pricing: { chatIn: 10, chatOut: 30 },
hidden: true, // Deprecated in favor of gpt-4-turbo
},
{
idPrefix: 'gpt-4-1106-vision-preview',
label: 'GPT-4 Preview Vision (1106)',
description: 'GPT-4 model with the ability to understand images, in addition to all other GPT-4 Turbo capabilities. This is a preview model, we recommend developers to now use gpt-4-turbo which includes vision capabilities. Returns a maximum of 4,096 output tokens.',
isPreview: true,
contextWindow: 128000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Apr 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn],
pricing: { chatIn: 10, chatOut: 30 },
hidden: true, // Deprecated in favor of gpt-4-turbo
},
// GPT4-32k's
{
@@ -182,7 +257,7 @@ const _knownOpenAIChatModels: ManualMappings = [
trainingDataCutoff: 'Sep 2021',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
pricing: { chatIn: 30, chatOut: 60 },
benchmark: { cbaElo: 1164 },
benchmark: { cbaElo: 1161 },
},
{
idPrefix: 'gpt-4-0314',
@@ -192,7 +267,7 @@ const _knownOpenAIChatModels: ManualMappings = [
trainingDataCutoff: 'Sep 2021',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
pricing: { chatIn: 30, chatOut: 60 },
benchmark: { cbaElo: 1189 },
benchmark: { cbaElo: 1186 },
hidden: true,
},
{
@@ -206,39 +281,27 @@ const _knownOpenAIChatModels: ManualMappings = [
trainingDataCutoff: 'Sep 2021',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
pricing: { chatIn: 30, chatOut: 60 },
benchmark: { cbaElo: 1164 },
benchmark: { cbaElo: 1161 },
isLegacy: true,
},
// 3.5-Turbo-Instruct (Not for Chat)
{
idPrefix: 'gpt-3.5-turbo-instruct',
label: '3.5-Turbo Instruct',
description: 'Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.',
contextWindow: 4097,
trainingDataCutoff: 'Sep 2021',
interfaces: [/* NO: LLM_IF_OAI_Chat,*/ LLM_IF_OAI_Complete],
pricing: { chatIn: 1.5, chatOut: 2 },
hidden: true,
},
// 3.5-Turbo-16k's
// 3.5-Turbo
// As of July 2024, gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.
{
idPrefix: 'gpt-3.5-turbo-0125',
label: '3.5-Turbo (0125)',
description: 'The latest GPT-3.5 Turbo model with higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls. Returns a maximum of 4,096 output tokens.',
description: 'The latest GPT-3.5 Turbo model with higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.',
contextWindow: 16385,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Sep 2021',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
pricing: { chatIn: 0.5, chatOut: 1.5 },
benchmark: { cbaElo: 1104 },
benchmark: { cbaElo: 1105 },
},
{
idPrefix: 'gpt-3.5-turbo-1106',
label: '3.5-Turbo (1106)',
description: 'The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
description: 'GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
contextWindow: 16385,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Sep 2021',
@@ -250,7 +313,7 @@ const _knownOpenAIChatModels: ManualMappings = [
{
idPrefix: 'gpt-3.5-turbo',
label: '3.5-Turbo',
description: 'Currently points to gpt-3.5-turbo-0125.',
description: 'Currently points to gpt-3.5-turbo-0125. As of July 2024, gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.',
symLink: 'gpt-3.5-turbo-0125',
hidden: true,
// copied
@@ -259,7 +322,19 @@ const _knownOpenAIChatModels: ManualMappings = [
trainingDataCutoff: 'Sep 2021',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
pricing: { chatIn: 0.5, chatOut: 1.5 },
benchmark: { cbaElo: 1104 },
benchmark: { cbaElo: 1105 },
},
// 3.5-Turbo-Instruct (Not for Chat)
{
idPrefix: 'gpt-3.5-turbo-instruct',
label: '3.5-Turbo Instruct',
description: 'Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.',
contextWindow: 4097,
trainingDataCutoff: 'Sep 2021',
interfaces: [/* NO: LLM_IF_OAI_Chat,*/ LLM_IF_OAI_Complete],
pricing: { chatIn: 1.5, chatOut: 2 },
hidden: true,
},
@@ -667,10 +742,14 @@ export function openRouterModelFamilySortFn(a: { id: string }, b: { id: string }
return aPrefixIndex !== -1 ? -1 : 1;
}
export function openRouterModelToModelDescription(wireModel: object): ModelDescriptionSchema {
export function openRouterModelToModelDescription(wireModel: object): ModelDescriptionSchema | null {
// parse the model
const model = wireOpenrouterModelsListOutputSchema.parse(wireModel);
const { data: model, error } = wireOpenrouterModelsListOutputSchema.safeParse(wireModel);
if (error) {
console.warn(`openrouterModelToModelDescription: Failed to parse model: ${error}`);
return null;
}
// parse pricing
const pricing: ModelDescriptionSchema['pricing'] = {
@@ -859,41 +938,84 @@ export function perplexityAIModelSort(a: ModelDescriptionSchema, b: ModelDescrip
const _knownGroqModels: ManualMappings = [
{
isLatest: true,
idPrefix: 'llama-3.1-405b-reasoning',
label: 'Llama 3.1 · 405B',
description: 'LLaMA 3.1 405B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
contextWindow: 131072,
maxCompletionTokens: 8000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
isLatest: true,
idPrefix: 'llama-3.1-70b-versatile',
label: 'Llama 3.1 · 70B',
description: 'LLaMA 3.1 70B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
contextWindow: 131072,
maxCompletionTokens: 8000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
isLatest: true,
idPrefix: 'llama-3.1-8b-instant',
label: 'Llama 3.1 · 8B',
description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
contextWindow: 131072,
maxCompletionTokens: 8000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'llama3-groq-70b-8192-tool-use-preview',
label: 'Llama 3 Groq · 70B Tool Use',
description: 'LLaMA 3 70B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'llama3-groq-8b-8192-tool-use-preview',
label: 'Llama 3 Groq · 8B Tool Use',
description: 'LLaMA 3 8B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'llama3-70b-8192',
label: 'Llama 3 · 70B',
description: 'LLaMA3 70b developed by Meta with a context window of 8,192 tokens.',
description: 'LLaMA3 70B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
// isLegacy: true,
hidden: true,
},
{
// isLatest: true,
idPrefix: 'llama3-8b-8192',
label: 'Llama 3 · 8B',
description: 'LLaMA3 8b developed by Meta with a context window of 8,192 tokens.',
description: 'LLaMA3 8B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat],
},
{
idPrefix: 'llama2-70b-4096',
label: 'Llama 2 · 70B',
description: 'LLaMA2 70b developed by Meta with a context window of 4,096 tokens.',
contextWindow: 4096,
interfaces: [LLM_IF_OAI_Chat],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
// isLegacy: true,
hidden: true,
},
{
idPrefix: 'mixtral-8x7b-32768',
label: 'Mixtral 8x7B',
description: 'Mixtral 8x7b developed by Mistral with a context window of 32,768 tokens.',
description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Supports tool use.',
contextWindow: 32768,
interfaces: [LLM_IF_OAI_Chat],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'gemma2-9b-it',
label: 'Gemma 2 · 9B Instruct',
description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Supports tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
},
{
idPrefix: 'gemma-7b-it',
label: 'Gemma 1.1 · 7B Instruct',
description: 'Gemma 7b developed by Google with a context window of 8,192 tokens.',
description: 'Gemma 7B developed by Google with a context window of 8,192 tokens. Supports tool use.',
contextWindow: 8192,
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
},
];
@@ -910,6 +1032,11 @@ export function groqModelToModelDescription(_model: unknown): ModelDescriptionSc
}
export function groqModelSortFn(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
// sort hidden at the end
if (a.hidden && !b.hidden)
return 1;
if (!a.hidden && b.hidden)
return -1;
// sort as per their order in the known models
const aIndex = _knownGroqModels.findIndex(base => a.id.startsWith(base.idPrefix));
const bIndex = _knownGroqModels.findIndex(base => b.id.startsWith(base.idPrefix));
+26 -17
View File
@@ -12,7 +12,7 @@ import { fixupHost } from '~/common/util/urlUtils';
import { OpenAIWire, WireOpenAICreateImageOutput, wireOpenAICreateImageOutputSchema, WireOpenAICreateImageRequest } from './openai.wiretypes';
import { azureModelToModelDescription, groqModelSortFn, groqModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, mistralModelsSort, mistralModelToModelDescription, oobaboogaModelToModelDescription, openAIModelFilter, openAIModelToModelDescription, openRouterModelFamilySortFn, openRouterModelToModelDescription, perplexityAIModelDescriptions, perplexityAIModelSort, togetherAIModelsToModelDescriptions } from './models.data';
import { llmsChatGenerateWithFunctionsOutputSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
import { llmsChatGenerateWithFunctionsOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
@@ -72,8 +72,11 @@ const listModelsInputSchema = z.object({
const chatGenerateWithFunctionsInputSchema = z.object({
access: openAIAccessSchema,
model: openAIModelSchema, history: openAIHistorySchema,
functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
model: openAIModelSchema,
history: openAIHistorySchema,
functions: openAIFunctionsSchema.optional(),
forceFunctionName: z.string().optional(),
context: llmsGenerateContextSchema.optional(),
});
const createImagesInputSchema = z.object({
@@ -108,7 +111,7 @@ export const llmOpenAIRouter = createTRPCRouter({
// [Azure]: use an older 'deployments' API to enumerate the models, and a modified OpenAI id to description mapping
if (access.dialect === 'azure') {
const azureModels = await openaiGET(access, `/openai/deployments?api-version=2023-03-15-preview`);
const azureModels = await openaiGETOrThrow(access, `/openai/deployments?api-version=2023-03-15-preview`);
const wireAzureListDeploymentsSchema = z.object({
data: z.array(z.object({
@@ -146,7 +149,7 @@ export const llmOpenAIRouter = createTRPCRouter({
// [non-Azure]: fetch openAI-style for all but Azure (will be then used in each dialect)
const openAIWireModelsResponse = await openaiGET<OpenAIWire.Models.Response>(access, '/v1/models');
const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire.Models.Response>(access, '/v1/models');
// [Together] missing the .data property
if (access.dialect === 'togetherai')
@@ -253,7 +256,8 @@ export const llmOpenAIRouter = createTRPCRouter({
case 'openrouter':
models = openAIModels
.sort(openRouterModelFamilySortFn)
.map(openRouterModelToModelDescription);
.map(openRouterModelToModelDescription)
.filter(desc => !!desc) as ModelDescriptionSchema[];
break;
}
@@ -267,17 +271,22 @@ export const llmOpenAIRouter = createTRPCRouter({
.output(llmsChatGenerateWithFunctionsOutputSchema)
.mutation(async ({ input }) => {
const { access, model, history, functions, forceFunctionName } = input;
const { access, model, history, functions, forceFunctionName, context } = input;
const isFunctionsCall = !!functions && functions.length > 0;
const completionsBody = openAIChatCompletionPayload(access.dialect, model, history, isFunctionsCall ? functions : null, forceFunctionName ?? null, 1, false);
const wireCompletions = await openaiPOST<OpenAIWire.ChatCompletion.Response, OpenAIWire.ChatCompletion.Request>(
const wireCompletions = await openaiPOSTOrThrow<OpenAIWire.ChatCompletion.Response, OpenAIWire.ChatCompletion.Request>(
access, model.id, completionsBody, '/v1/chat/completions',
);
// expect a single output
if (wireCompletions?.choices?.length !== 1)
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[OpenAI Issue] Expected 1 completion, got ${wireCompletions?.choices?.length}` });
if (wireCompletions?.choices?.length !== 1) {
console.error(`[POST] llmOpenAI.chatGenerateWithFunctions: ${access.dialect}: ${context?.name || 'no context'}: unexpected output${forceFunctionName ? ` (fn: ${forceFunctionName})` : ''}:`, model.id, wireCompletions?.choices);
throw new TRPCError({
code: 'UNPROCESSABLE_CONTENT',
message: `[OpenAI Issue] Expected 1 completion, got ${wireCompletions?.choices?.length}`,
});
}
let { message, finish_reason } = wireCompletions.choices[0];
// LocalAI hack/workaround, until https://github.com/go-skynet/LocalAI/issues/788 is fixed
@@ -318,7 +327,7 @@ export const llmOpenAIRouter = createTRPCRouter({
delete requestBody.response_format;
// create 1 image (dall-e-3 won't support more than 1, so better transfer the burden to the client)
const wireOpenAICreateImageOutput = await openaiPOST<WireOpenAICreateImageOutput, WireOpenAICreateImageRequest>(
const wireOpenAICreateImageOutput = await openaiPOSTOrThrow<WireOpenAICreateImageOutput, WireOpenAICreateImageRequest>(
access, null, requestBody, '/v1/images/generations',
);
@@ -340,7 +349,7 @@ export const llmOpenAIRouter = createTRPCRouter({
.mutation(async ({ input: { access, text } }): Promise<OpenAIWire.Moderation.Response> => {
try {
return await openaiPOST<OpenAIWire.Moderation.Response, OpenAIWire.Moderation.Request>(access, null, {
return await openaiPOSTOrThrow<OpenAIWire.Moderation.Response, OpenAIWire.Moderation.Request>(access, null, {
input: text,
model: 'text-moderation-latest',
}, '/v1/moderations');
@@ -361,7 +370,7 @@ export const llmOpenAIRouter = createTRPCRouter({
dialectLocalAI_galleryModelsAvailable: publicProcedure
.input(listModelsInputSchema)
.query(async ({ input: { access } }) => {
const wireLocalAIModelsAvailable = await openaiGET(access, '/models/available');
const wireLocalAIModelsAvailable = await openaiGETOrThrow(access, '/models/available');
return wireLocalAIModelsAvailableOutputSchema.parse(wireLocalAIModelsAvailable);
}),
@@ -374,7 +383,7 @@ export const llmOpenAIRouter = createTRPCRouter({
}))
.mutation(async ({ input: { access, galleryName, modelName } }) => {
const galleryModelId = `${galleryName}@${modelName}`;
const wireLocalAIModelApply = await openaiPOST(access, null, { id: galleryModelId }, '/models/apply');
const wireLocalAIModelApply = await openaiPOSTOrThrow(access, null, { id: galleryModelId }, '/models/apply');
return wilreLocalAIModelsApplyOutputSchema.parse(wireLocalAIModelApply);
}),
@@ -385,7 +394,7 @@ export const llmOpenAIRouter = createTRPCRouter({
jobId: z.string(),
}))
.query(async ({ input: { access, jobId } }) => {
const wireLocalAIModelsJobs = await openaiGET(access, `/models/jobs/${jobId}`);
const wireLocalAIModelsJobs = await openaiGETOrThrow(access, `/models/jobs/${jobId}`);
return wireLocalAIModelsListOutputSchema.parse(wireLocalAIModelsJobs);
}),
@@ -623,12 +632,12 @@ export function openAIChatCompletionPayload(dialect: OpenAIDialects, model: Open
};
}
async function openaiGET<TOut extends object>(access: OpenAIAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
async function openaiGETOrThrow<TOut extends object>(access: OpenAIAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
const { headers, url } = openAIAccess(access, null, apiPath);
return await fetchJsonOrTRPCError<TOut>(url, 'GET', headers, undefined, `OpenAI/${access.dialect}`);
}
async function openaiPOST<TOut extends object, TPostBody extends object>(access: OpenAIAccessSchema, modelRefId: string | null, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
async function openaiPOSTOrThrow<TOut extends object, TPostBody extends object>(access: OpenAIAccessSchema, modelRefId: string | null, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
const { headers, url } = openAIAccess(access, modelRefId, apiPath);
return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, `OpenAI/${access.dialect}`);
}
@@ -5,16 +5,20 @@ export const wireOpenrouterModelsListOutputSchema = z.object({
id: z.string(),
name: z.string(),
description: z.string(),
// NOTE: for 'openrouter/auto', this is: {
// "prompt": "-1",
// "completion": "-1"
// }
pricing: z.object({
prompt: z.string(),
completion: z.string(),
image: z.string(),
request: z.string(),
image: z.string().optional(),
request: z.string().optional(),
}),
context_length: z.number(),
architecture: z.object({
modality: z.string(), // z.enum(['text', 'multimodal']),
tokenizer: z.string(), // e.g. 'Mistral'
modality: z.string(), // z.enum(['text', 'multimodal', 'text+image->text]),
tokenizer: z.string(), // e.g. 'Mistral', 'Claude'
instruct_type: z.string().nullable(),
}),
top_provider: z.object({
+3 -2
View File
@@ -8,7 +8,7 @@ import type { DLLM, DLLMId, DModelSourceId } from '../store-llms';
import type { ModelDescriptionSchema } from '../server/llm.server.types';
import type { ModelVendorId } from './vendors.registry';
import type { StreamingClientUpdate } from './unifiedStreamingClient';
import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
import type { VChatContextRef, VChatFunctionIn, VChatGenerateContextName, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut, VChatStreamContextName } from '../llm.client';
export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown, TDLLM = DLLM<TSourceSetup, TLLMOptions>> {
@@ -44,6 +44,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
access: TAccess,
llmOptions: TLLMOptions,
messages: VChatMessageIn[],
contextName: VChatGenerateContextName, contextRef: VChatContextRef | null,
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
maxTokens?: number,
) => Promise<VChatMessageOut | VChatMessageOrFunctionCallOut>;
@@ -53,7 +54,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
llmId: DLLMId,
llmOptions: TLLMOptions,
messages: VChatMessageIn[],
contextName: VChatContextName, contexRef: VChatContextRef,
contextName: VChatStreamContextName, contextRef: VChatContextRef,
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
abortSignal: AbortSignal,
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
+7 -2
View File
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
import type { AnthropicAccessSchema } from '../../server/anthropic/anthropic.router';
import type { IModelVendor } from '../IModelVendor';
import type { VChatMessageOut } from '../../llm.client';
import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
import { unifiedStreamingClient } from '../unifiedStreamingClient';
import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE, LLMOptionsOpenAI } from '../openai/openai.vendor';
@@ -47,7 +47,7 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmAnthropic.listModels.query({ access }),
// Chat Generate (non-streaming) with Functions
rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
if (functions?.length || forceFunctionName)
throw new Error('Anthropic does not support functions');
@@ -61,6 +61,11 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
maxTokens: maxTokens || llmResponseTokens || FALLBACK_LLM_RESPONSE_TOKENS,
},
history: messages,
context: contextRef ? {
method: 'chat-generate',
name: contextName,
ref: contextRef,
} : undefined,
}) as VChatMessageOut;
} catch (error: any) {
const errorMessage = error?.message || error?.toString() || 'Anthropic Chat Generate Error';
+8 -3
View File
@@ -1,10 +1,10 @@
import { GeminiIcon } from '~/common/components/icons/vendors/GeminiIcon';
import { apiAsync } from '~/common/util/trpc.client';
import { apiAsync } from '~/common/util/trpc.client';
import type { GeminiAccessSchema } from '../../server/gemini/gemini.router';
import type { GeminiBlockSafetyLevel } from '../../server/gemini/gemini.wiretypes';
import type { IModelVendor } from '../IModelVendor';
import type { VChatMessageOut } from '../../llm.client';
import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
import { unifiedStreamingClient } from '../unifiedStreamingClient';
import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE } from '../openai/openai.vendor';
@@ -60,7 +60,7 @@ export const ModelVendorGemini: IModelVendor<SourceSetupGemini, GeminiAccessSche
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmGemini.listModels.query({ access }),
// Chat Generate (non-streaming) with Functions
rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
if (functions?.length || forceFunctionName)
throw new Error('Gemini does not support functions');
@@ -74,6 +74,11 @@ export const ModelVendorGemini: IModelVendor<SourceSetupGemini, GeminiAccessSche
maxTokens: maxTokens || maxOutputTokens || FALLBACK_LLM_RESPONSE_TOKENS,
},
history: messages,
context: contextRef ? {
method: 'chat-generate',
name: contextName,
ref: contextRef,
} : undefined,
}) as VChatMessageOut;
} catch (error: any) {
const errorMessage = error?.message || error?.toString() || 'Gemini Chat Generate Error';
+7 -2
View File
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
import type { IModelVendor } from '../IModelVendor';
import type { OllamaAccessSchema } from '../../server/ollama/ollama.router';
import type { VChatMessageOut } from '../../llm.client';
import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
import { unifiedStreamingClient } from '../unifiedStreamingClient';
import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE, LLMOptionsOpenAI } from '../openai/openai.vendor';
@@ -42,7 +42,7 @@ export const ModelVendorOllama: IModelVendor<SourceSetupOllama, OllamaAccessSche
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOllama.listModels.query({ access }),
// Chat Generate (non-streaming) with Functions
rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
if (functions?.length || forceFunctionName)
throw new Error('Ollama does not support functions');
@@ -56,6 +56,11 @@ export const ModelVendorOllama: IModelVendor<SourceSetupOllama, OllamaAccessSche
maxTokens: maxTokens || llmResponseTokens || FALLBACK_LLM_RESPONSE_TOKENS,
},
history: messages,
context: contextRef ? {
method: 'chat-generate',
name: contextName,
ref: contextRef,
} : undefined,
}) as VChatMessageOut;
} catch (error: any) {
const errorMessage = error?.message || error?.toString() || 'Ollama Chat Generate Error';
+7 -2
View File
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
import type { IModelVendor } from '../IModelVendor';
import type { OpenAIAccessSchema } from '../../server/openai/openai.router';
import type { VChatMessageOrFunctionCallOut } from '../../llm.client';
import type { VChatContextRef, VChatGenerateContextName, VChatMessageOrFunctionCallOut } from '../../llm.client';
import { unifiedStreamingClient } from '../unifiedStreamingClient';
import { OpenAILLMOptions } from './OpenAILLMOptions';
@@ -60,7 +60,7 @@ export const ModelVendorOpenAI: IModelVendor<SourceSetupOpenAI, OpenAIAccessSche
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOpenAI.listModels.query({ access }),
// Chat Generate (non-streaming) with Functions
rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
const { llmRef, llmTemperature, llmResponseTokens } = llmOptions;
try {
return await apiAsync.llmOpenAI.chatGenerateWithFunctions.mutate({
@@ -73,6 +73,11 @@ export const ModelVendorOpenAI: IModelVendor<SourceSetupOpenAI, OpenAIAccessSche
functions: functions ?? undefined,
forceFunctionName: forceFunctionName ?? undefined,
history: messages,
context: contextRef ? {
method: 'chat-generate',
name: contextName,
ref: contextRef,
} : undefined,
}) as VChatMessageOrFunctionCallOut;
} catch (error: any) {
const errorMessage = error?.message || error?.toString() || 'OpenAI Chat Generate Error';
+2 -2
View File
@@ -3,7 +3,7 @@ import { frontendSideFetch } from '~/common/util/clientFetchers';
import type { ChatStreamingInputSchema, ChatStreamingPreambleModelSchema, ChatStreamingPreambleStartSchema } from '../server/llm.server.streaming';
import type { DLLMId } from '../store-llms';
import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn } from '../llm.client';
import type { VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatStreamContextName } from '../llm.client';
import type { OpenAIAccessSchema } from '../server/openai/openai.router';
import type { OpenAIWire } from '../server/openai/openai.wiretypes';
@@ -29,7 +29,7 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
llmId: DLLMId,
llmOptions: TLLMOptions,
messages: VChatMessageIn[],
contextName: VChatContextName, contextRef: VChatContextRef,
contextName: VChatStreamContextName, contextRef: VChatContextRef,
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
abortSignal: AbortSignal,
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
+2 -2
View File
@@ -1,7 +1,7 @@
{
"functions": {
"app/api/trpc-node/**/*": {
"maxDuration": 25
"api/trpc-node/**/*": {
"maxDuration": 30
}
}
}