mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 06:00:15 -07:00
Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| dec280d54d | |||
| 4823e97783 | |||
| 6a5685995f | |||
| 3b4d5691d7 | |||
| 45c09d021a | |||
| 8ef759fe0f | |||
| c06735fdd2 | |||
| cf4297a1af | |||
| 5d458d68bd | |||
| c3db077ae8 | |||
| 779b265b20 | |||
| 7d6d7e619b | |||
| 34caa16e39 | |||
| 976426dbd3 | |||
| b4d8e39d56 | |||
| 11c41e7381 | |||
| 358d8a54ff | |||
| 3c8fedce68 |
@@ -21,17 +21,19 @@ Or fork & run on Vercel
|
||||
|
||||
[//]: # (big-AGI is an open book; see the **[ready-to-ship and future ideas](https://github.com/users/enricoros/projects/4/views/2)** in our open roadmap)
|
||||
|
||||
### What's New in 1.16.2 · Jun 7, 2024 (minor release)
|
||||
### What's New in 1.16.1...1.16.7 · Aug 7, 2024 (patch releases)
|
||||
|
||||
- Improve web downloads, as text, markdwon, or HTML
|
||||
- Proper support for Gemini models
|
||||
- Added the latest Mistral model
|
||||
- Tokenizer support for gpt-4o
|
||||
- Updates to Beam
|
||||
|
||||
### What's New in 1.16.1 · May 13, 2024 (minor release)
|
||||
|
||||
- Support for the new OpenAI GPT-4o 2024-05-13 model
|
||||
- 1.16.7: OpenAI support for GPT-4o 2024-08-06
|
||||
- 1.16.6: Groq support for Llama 3.1 models
|
||||
- 1.16.5: GPT-4o Mini support
|
||||
- 1.16.4: 8192 tokens support for Claude 3.5 Sonnet
|
||||
- 1.16.3: Anthropic Claude 3.5 Sonnet model support
|
||||
- 1.16.2: Improve web downloads, as text, markdwon, or HTML
|
||||
- 1.16.2: Proper support for Gemini models
|
||||
- 1.16.2: Added the latest Mistral model
|
||||
- 1.16.2: Tokenizer support for gpt-4o
|
||||
- 1.16.2: Updates to Beam
|
||||
- 1.16.1: Support for the new OpenAI GPT-4o 2024-05-13 model
|
||||
|
||||
### What's New in 1.16.0 · May 9, 2024 · Crystal Clear
|
||||
|
||||
|
||||
+12
-10
@@ -10,17 +10,19 @@ by release.
|
||||
- milestone: [1.17.0](https://github.com/enricoros/big-agi/milestone/17)
|
||||
- work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
|
||||
|
||||
### What's New in 1.16.2 · Jun 7, 2024 (minor release)
|
||||
### What's New in 1.16.1...1.16.7 · Aug 7, 2024 (patch releases)
|
||||
|
||||
- Improve web downloads, as text, markdwon, or HTML
|
||||
- Proper support for Gemini models
|
||||
- Added the latest Mistral model
|
||||
- Tokenizer support for gpt-4o
|
||||
- Updates to Beam
|
||||
|
||||
### What's New in 1.16.1 · May 13, 2024 (minor release)
|
||||
|
||||
- Support for the new OpenAI GPT-4o 2024-05-13 model
|
||||
- 1.16.7: OpenAI support for GPT-4o 2024-08-06
|
||||
- 1.16.6: Groq support for Llama 3.1 models
|
||||
- 1.16.5: GPT-4o Mini support
|
||||
- 1.16.4: 8192 tokens support for Claude 3.5 Sonnet
|
||||
- 1.16.3: Anthropic Claude 3.5 Sonnet model support
|
||||
- 1.16.2: Improve web downloads, as text, markdwon, or HTML
|
||||
- 1.16.2: Proper support for Gemini models
|
||||
- 1.16.2: Added the latest Mistral model
|
||||
- 1.16.2: Tokenizer support for gpt-4o
|
||||
- 1.16.2: Updates to Beam
|
||||
- 1.16.1: Support for the new OpenAI GPT-4o 2024-05-13 model
|
||||
|
||||
### What's New in 1.16.0 · May 9, 2024 · Crystal Clear
|
||||
|
||||
|
||||
@@ -277,7 +277,7 @@ export function AppChat() {
|
||||
const conversation = getConversation(conversationId);
|
||||
if (!conversation)
|
||||
return;
|
||||
const imaginedPrompt = await imaginePromptFromText(messageText) || 'An error sign.';
|
||||
const imaginedPrompt = await imaginePromptFromText(messageText, conversationId) || 'An error sign.';
|
||||
await handleExecuteAndOutcome('generate-image', conversationId, [
|
||||
...conversation.messages,
|
||||
createDMessage('user', imaginedPrompt),
|
||||
|
||||
@@ -280,6 +280,7 @@ export function ChatMessage(props: {
|
||||
const wasEdited = !!messageUpdated;
|
||||
|
||||
const textSel = selText ? selText : messageText;
|
||||
// WARNING: if you get an issue here, you're downgrading from the new Big-AGI 2 data format to 1.x.
|
||||
const isSpecialT2I = textSel.startsWith('https://images.prodia.xyz/') || textSel.startsWith('/draw ') || textSel.startsWith('/imagine ') || textSel.startsWith('/img ');
|
||||
const couldDiagram = textSel.length >= 100 && !isSpecialT2I;
|
||||
const couldImagine = textSel.length >= 3 && !isSpecialT2I;
|
||||
|
||||
@@ -2,7 +2,7 @@ import type { DLLMId } from '~/modules/llms/store-llms';
|
||||
import type { StreamingClientUpdate } from '~/modules/llms/vendors/unifiedStreamingClient';
|
||||
import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
|
||||
import { conversationAutoTitle } from '~/modules/aifn/autotitle/autoTitle';
|
||||
import { llmStreamingChatGenerate, VChatContextRef, VChatContextName, VChatMessageIn } from '~/modules/llms/llm.client';
|
||||
import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
|
||||
import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
|
||||
|
||||
import type { DMessage } from '~/common/state/store-chats';
|
||||
@@ -63,7 +63,7 @@ type StreamMessageStatus = { outcome: StreamMessageOutcome, errorMessage?: strin
|
||||
export async function streamAssistantMessage(
|
||||
llmId: DLLMId,
|
||||
messagesHistory: VChatMessageIn[],
|
||||
contextName: VChatContextName,
|
||||
contextName: VChatStreamContextName,
|
||||
contextRef: VChatContextRef,
|
||||
throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce the message frequency with the square root
|
||||
autoSpeak: ChatAutoSpeakType,
|
||||
|
||||
@@ -61,7 +61,7 @@ export const NewsItems: NewsItem[] = [
|
||||
]
|
||||
}*/
|
||||
{
|
||||
versionCode: '1.16.2',
|
||||
versionCode: '1.16.7',
|
||||
versionName: 'Crystal Clear',
|
||||
versionDate: new Date('2024-06-07T05:00:00Z'),
|
||||
// versionDate: new Date('2024-05-13T19:00:00Z'),
|
||||
@@ -77,8 +77,13 @@ export const NewsItems: NewsItem[] = [
|
||||
{ text: <>More: <B issue={517}>code soft-wrap</B>, selection toolbar, <B issue={507}>3x faster</B> on Apple silicon</>, issue: 507 },
|
||||
{ text: <>Updated <B>Anthropic</B>*, <B>Groq</B>, <B>Ollama</B>, <B>OpenAI</B>*, <B>OpenRouter</B>*, and <B>Perplexity</B></> },
|
||||
{ text: <>Developers: update LLMs data structures</>, dev: true },
|
||||
{ text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B> (refresh your OpenAI models)</> },
|
||||
{ text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B></> },
|
||||
{ text: <>1.16.2: Proper <B>Gemini</B> support, <B>HTML/Markdown</B> downloads, and latest <B>Mistral</B></> },
|
||||
{ text: <>1.16.3: Support for <B href='https://www.anthropic.com/news/claude-3-5-sonnet'>Claude 3.5 Sonnet</B> (refresh your <B>Anthropic</B> models)</> },
|
||||
{ text: <>1.16.4: <B>8192 tokens</B> support for Claude 3.5 Sonnet</> },
|
||||
{ text: <>1.16.5: OpenAI <B>GPT-4o Mini</B> support</> },
|
||||
{ text: <>1.16.6: Groq <B>Llama 3.1</B> support</> },
|
||||
{ text: <>1.16.7: Gpt-4o <B>2024-08-06</B></> },
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -7,7 +7,7 @@ import { useAppStateStore } from '~/common/state/store-appstate';
|
||||
|
||||
|
||||
// update this variable every time you want to broadcast a new version to clients
|
||||
export const incrementalNewsVersion: number = 16.1; // not notifying for 16.2
|
||||
export const incrementalNewsVersion: number = 16.1; // not notifying for 1.16.7
|
||||
|
||||
|
||||
interface NewsState {
|
||||
|
||||
@@ -8,8 +8,11 @@ export function prettyBaseModel(model: string | undefined): string {
|
||||
if (!model) return '';
|
||||
if (model.includes('gpt-4-vision-preview')) return 'GPT-4 Vision';
|
||||
if (model.includes('gpt-4-1106-preview')) return 'GPT-4 Turbo';
|
||||
if (model.includes('gpt-4-32k')) return 'gpt-4-32k';
|
||||
if (model.includes('gpt-4')) return 'gpt-4';
|
||||
if (model.includes('gpt-4-32k')) return 'GPT-4-32k';
|
||||
if (model.includes('gpt-4o-mini')) return 'GPT-4o Mini';
|
||||
if (model.includes('gpt-4o')) return 'GPT-4o';
|
||||
if (model.includes('gpt-4-turbo')) return 'GPT-4 Turbo';
|
||||
if (model.includes('gpt-4')) return 'GPT-4';
|
||||
if (model.includes('gpt-3.5-turbo-instruct')) return '3.5 Turbo Instruct';
|
||||
if (model.includes('gpt-3.5-turbo-1106')) return '3.5 Turbo 16k';
|
||||
if (model.includes('gpt-3.5-turbo-16k')) return '3.5 Turbo 16k';
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { llmChatGenerateOrThrow, VChatFunctionIn } from '~/modules/llms/llm.client';
|
||||
import { llmChatGenerateOrThrow, VChatFunctionIn, VChatMessageIn } from '~/modules/llms/llm.client';
|
||||
import { useModelsStore } from '~/modules/llms/store-llms';
|
||||
|
||||
import { useChatStore } from '~/common/state/store-chats';
|
||||
@@ -83,13 +83,18 @@ export function autoSuggestions(conversationId: string, assistantMessageId: stri
|
||||
|
||||
// Follow-up: Auto-Diagrams
|
||||
if (suggestDiagrams) {
|
||||
llmChatGenerateOrThrow(funcLLMId, [
|
||||
{ role: 'system', content: systemMessage.text },
|
||||
{ role: 'user', content: userMessage.text },
|
||||
{ role: 'assistant', content: assistantMessageText },
|
||||
], [suggestPlantUMLFn], 'draw_plantuml_diagram',
|
||||
const instructions: VChatMessageIn[] = [
|
||||
{ role: 'system', content: systemMessage.text },
|
||||
{ role: 'user', content: userMessage.text },
|
||||
{ role: 'assistant', content: assistantMessageText },
|
||||
];
|
||||
llmChatGenerateOrThrow(
|
||||
funcLLMId,
|
||||
instructions,
|
||||
'chat-followup-diagram', conversationId,
|
||||
[suggestPlantUMLFn], 'draw_plantuml_diagram',
|
||||
).then(chatResponse => {
|
||||
|
||||
// cheap way to check if the function was supported
|
||||
if (!('function_arguments' in chatResponse))
|
||||
return;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { getFastLLMId } from '~/modules/llms/store-llms';
|
||||
import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
|
||||
import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
|
||||
|
||||
import { useChatStore } from '~/common/state/store-chats';
|
||||
|
||||
@@ -34,21 +34,23 @@ export async function conversationAutoTitle(conversationId: string, forceReplace
|
||||
|
||||
try {
|
||||
// LLM chat-generate call
|
||||
const instructions: VChatMessageIn[] = [
|
||||
{ role: 'system', content: `You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.` },
|
||||
{
|
||||
role: 'user', content:
|
||||
'Analyze the given short conversation (every line is truncated) and extract a concise chat title that ' +
|
||||
'summarizes the conversation in as little as a couple of words.\n' +
|
||||
'Only respond with the lowercase short title and nothing else.\n' +
|
||||
'\n' +
|
||||
'```\n' +
|
||||
historyLines.join('\n') +
|
||||
'```\n',
|
||||
},
|
||||
];
|
||||
const chatResponse = await llmChatGenerateOrThrow(
|
||||
fastLLMId,
|
||||
[
|
||||
{ role: 'system', content: `You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.` },
|
||||
{
|
||||
role: 'user', content:
|
||||
'Analyze the given short conversation (every line is truncated) and extract a concise chat title that ' +
|
||||
'summarizes the conversation in as little as a couple of words.\n' +
|
||||
'Only respond with the lowercase short title and nothing else.\n' +
|
||||
'\n' +
|
||||
'```\n' +
|
||||
historyLines.join('\n') +
|
||||
'```\n',
|
||||
},
|
||||
],
|
||||
instructions,
|
||||
'chat-ai-title', conversationId,
|
||||
null, null,
|
||||
);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { getFastLLMId } from '~/modules/llms/store-llms';
|
||||
import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
|
||||
import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
|
||||
|
||||
|
||||
const simpleImagineSystemPrompt =
|
||||
@@ -10,14 +10,15 @@ Provide output as a lowercase prompt and nothing else.`;
|
||||
/**
|
||||
* Creates a caption for a drawing or photo given some description - used to elevate the quality of the imaging
|
||||
*/
|
||||
export async function imaginePromptFromText(messageText: string): Promise<string | null> {
|
||||
export async function imaginePromptFromText(messageText: string, contextRef: string): Promise<string | null> {
|
||||
const fastLLMId = getFastLLMId();
|
||||
if (!fastLLMId) return null;
|
||||
try {
|
||||
const chatResponse = await llmChatGenerateOrThrow(fastLLMId, [
|
||||
const instructions: VChatMessageIn[] = [
|
||||
{ role: 'system', content: simpleImagineSystemPrompt },
|
||||
{ role: 'user', content: 'Write a prompt, based on the following input.\n\n```\n' + messageText.slice(0, 1000) + '\n```\n' },
|
||||
], null, null);
|
||||
];
|
||||
const chatResponse = await llmChatGenerateOrThrow(fastLLMId, instructions, 'draw-expand-prompt', contextRef, null, null);
|
||||
return chatResponse.content?.trim() ?? null;
|
||||
} catch (error: any) {
|
||||
console.error('imaginePromptFromText: fetch request error:', error);
|
||||
|
||||
@@ -132,7 +132,7 @@ export class Agent {
|
||||
S.messages.push({ role: 'user', content: prompt });
|
||||
let content: string;
|
||||
try {
|
||||
content = (await llmChatGenerateOrThrow(llmId, S.messages, null, null, 500)).content;
|
||||
content = (await llmChatGenerateOrThrow(llmId, S.messages, 'chat-react-turn', null, null, null, 500)).content;
|
||||
} catch (error: any) {
|
||||
content = `Error in llmChatGenerateOrThrow: ${error}`;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
|
||||
import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
|
||||
import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
|
||||
|
||||
|
||||
// prompt to be tried when doing recursive summerization.
|
||||
@@ -80,10 +80,11 @@ async function cleanUpContent(chunk: string, llmId: DLLMId, _ignored_was_targetW
|
||||
const autoResponseTokensSize = contextTokens ? Math.floor(contextTokens * outputTokenShare) : null;
|
||||
|
||||
try {
|
||||
const chatResponse = await llmChatGenerateOrThrow(llmId, [
|
||||
const instructions: VChatMessageIn[] = [
|
||||
{ role: 'system', content: cleanupPrompt },
|
||||
{ role: 'user', content: chunk },
|
||||
], null, null, autoResponseTokensSize ?? undefined);
|
||||
];
|
||||
const chatResponse = await llmChatGenerateOrThrow(llmId, instructions, 'chat-ai-summarize', null, null, null, autoResponseTokensSize ?? undefined);
|
||||
return chatResponse?.content ?? '';
|
||||
} catch (error: any) {
|
||||
return '';
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import * as React from 'react';
|
||||
|
||||
import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
|
||||
import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
|
||||
import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
|
||||
|
||||
|
||||
// set to true to log to the console
|
||||
@@ -20,7 +20,7 @@ export interface LLMChainStep {
|
||||
/**
|
||||
* React hook to manage a chain of LLM transformations.
|
||||
*/
|
||||
export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatContextName, contextRef: VChatContextRef) {
|
||||
export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatStreamContextName, contextRef: VChatContextRef) {
|
||||
|
||||
// state
|
||||
const [chain, setChain] = React.useState<ChainState | null>(null);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import * as React from 'react';
|
||||
|
||||
import type { DLLMId } from '~/modules/llms/store-llms';
|
||||
import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
|
||||
import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
|
||||
|
||||
|
||||
export function useStreamChatText() {
|
||||
@@ -13,7 +13,7 @@ export function useStreamChatText() {
|
||||
const abortControllerRef = React.useRef<AbortController | null>(null);
|
||||
|
||||
|
||||
const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatContextName, contextRef: VChatContextRef) => {
|
||||
const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatStreamContextName, contextRef: VChatContextRef) => {
|
||||
setStreamError(null);
|
||||
setPartialText(null);
|
||||
setText(null);
|
||||
|
||||
@@ -2,7 +2,7 @@ import { sendGAEvent } from '@next/third-parties/google';
|
||||
|
||||
import { hasGoogleAnalytics } from '~/common/components/GoogleAnalytics';
|
||||
|
||||
import type { ModelDescriptionSchema } from './server/llm.server.types';
|
||||
import type { GenerateContextNameSchema, ModelDescriptionSchema, StreamingContextNameSchema } from './server/llm.server.types';
|
||||
import type { OpenAIWire } from './server/openai/openai.wiretypes';
|
||||
import type { StreamingClientUpdate } from './vendors/unifiedStreamingClient';
|
||||
import { DLLM, DLLMId, DModelSource, DModelSourceId, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, useModelsStore } from './store-llms';
|
||||
@@ -21,14 +21,8 @@ export interface VChatMessageIn {
|
||||
|
||||
export type VChatFunctionIn = OpenAIWire.ChatCompletion.RequestFunctionDef;
|
||||
|
||||
export type VChatContextName =
|
||||
| 'conversation'
|
||||
| 'ai-diagram'
|
||||
| 'ai-flattener'
|
||||
| 'beam-scatter'
|
||||
| 'beam-gather'
|
||||
| 'call'
|
||||
| 'persona-extract';
|
||||
export type VChatStreamContextName = StreamingContextNameSchema;
|
||||
export type VChatGenerateContextName = GenerateContextNameSchema;
|
||||
export type VChatContextRef = string;
|
||||
|
||||
export interface VChatMessageOut {
|
||||
@@ -122,7 +116,10 @@ function modelDescriptionToDLLMOpenAIOptions<TSourceSetup, TLLMOptions>(model: M
|
||||
export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
|
||||
llmId: DLLMId,
|
||||
messages: VChatMessageIn[],
|
||||
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
|
||||
contextName: VChatGenerateContextName,
|
||||
contextRef: VChatContextRef | null,
|
||||
functions: VChatFunctionIn[] | null,
|
||||
forceFunctionName: string | null,
|
||||
maxTokens?: number,
|
||||
): Promise<VChatMessageOut | VChatMessageOrFunctionCallOut> {
|
||||
|
||||
@@ -146,14 +143,14 @@ export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = u
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
|
||||
// execute via the vendor
|
||||
return await vendor.rpcChatGenerateOrThrow(access, options, messages, functions, forceFunctionName, maxTokens);
|
||||
return await vendor.rpcChatGenerateOrThrow(access, options, messages, contextName, contextRef, functions, forceFunctionName, maxTokens);
|
||||
}
|
||||
|
||||
|
||||
export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
|
||||
llmId: DLLMId,
|
||||
messages: VChatMessageIn[],
|
||||
contextName: VChatContextName,
|
||||
contextName: VChatStreamContextName,
|
||||
contextRef: VChatContextRef,
|
||||
functions: VChatFunctionIn[] | null,
|
||||
forceFunctionName: string | null,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import * as React from 'react';
|
||||
import TimeAgo from 'react-timeago';
|
||||
import { shallow } from 'zustand/shallow';
|
||||
|
||||
import { Box, Button, ButtonGroup, Divider, FormControl, Input, Switch, Tooltip, Typography } from '@mui/joy';
|
||||
@@ -132,10 +133,10 @@ export function LLMOptionsModal(props: { id: DLLMId, onClose: () => void }) {
|
||||
llm id: {llm.id}<br />
|
||||
context tokens: <b>{llm.contextTokens ? llm.contextTokens.toLocaleString() : 'not provided'}</b>{` · `}
|
||||
max output tokens: <b>{llm.maxOutputTokens ? llm.maxOutputTokens.toLocaleString() : 'not provided'}</b><br />
|
||||
{!!llm.created && <>created: {(new Date(llm.created * 1000)).toLocaleString()}<br /></>}
|
||||
{!!llm.created && <>created: <TimeAgo date={new Date(llm.created * 1000)} /><br /></>}
|
||||
{/*· tags: {llm.tags.join(', ')}*/}
|
||||
{!!llm.pricing && <>pricing: $<b>{llm.pricing.chatIn || '(unk) '}</b>/M in, $<b>{llm.pricing.chatOut || '(unk) '}</b>/M out<br /></>}
|
||||
{!!llm.benchmark && <>benchmark: <b>{llm.benchmark.cbaElo?.toLocaleString() || '(unk) '}</b> CBA Elo<br /></>}
|
||||
{/*{!!llm.benchmark && <>benchmark: <b>{llm.benchmark.cbaElo?.toLocaleString() || '(unk) '}</b> CBA Elo<br /></>}*/}
|
||||
config: {JSON.stringify(llm.options)}
|
||||
</Typography>
|
||||
</Box>}
|
||||
|
||||
@@ -4,14 +4,64 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Vision } from '../../store-llms';
|
||||
|
||||
const roundTime = (date: string) => Math.round(new Date(date).getTime() / 1000);
|
||||
|
||||
export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: boolean })[] = [
|
||||
// Claude 3.5 models - https://docs.anthropic.com/en/docs/about-claude/models
|
||||
// {
|
||||
// id: 'claude-3.5-opus', // ...
|
||||
// label: 'Claude 3.5 Opus',
|
||||
// created: roundTime(?),
|
||||
// description: ?,
|
||||
// contextWindow: 200000 ?, // Characters
|
||||
// maxCompletionTokens: 4096 ?,
|
||||
// trainingDataCutoff: ?,
|
||||
// interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
// pricing: { chatIn: 15, chatOut: 75 },
|
||||
// benchmark: {
|
||||
// cbaElo: 1256, // Placeholder
|
||||
// cbaMmlu: 86.8, // Placeholder
|
||||
// },
|
||||
// },
|
||||
{
|
||||
id: 'claude-3-5-sonnet-20240620',
|
||||
label: 'Claude 3.5 Sonnet',
|
||||
created: roundTime('2024-06-20 06:00'),
|
||||
description: 'The most intelligent Claude model',
|
||||
contextWindow: 200000, // Characters
|
||||
maxCompletionTokens: 8192,
|
||||
trainingDataCutoff: 'Apr 2024',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: { chatIn: 3, chatOut: 15 },
|
||||
benchmark: {
|
||||
heCode: 92.0,
|
||||
vqaMmmu: 68.3,
|
||||
// TODO: Update with official benchmarks when available
|
||||
cbaElo: 1256 - 1, // Placeholder
|
||||
cbaMmlu: 86.8 - 1, // Placeholder
|
||||
},
|
||||
},
|
||||
// {
|
||||
// id: 'claude-3.5-haiku', // ...
|
||||
// label: 'Claude 3.5 Haiku',
|
||||
// created: roundTime(?),
|
||||
// description: ?,
|
||||
// contextWindow: 200000 ?, // Characters
|
||||
// maxCompletionTokens: 4096 ?,
|
||||
// trainingDataCutoff: ?,
|
||||
// interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
// pricing: { chatIn: 0.25, chatOut: 1.25 },
|
||||
// benchmark: {
|
||||
// cbaElo: 1181, // Placeholder
|
||||
// cbaMmlu: 75.2, // Placeholder
|
||||
// },
|
||||
// },
|
||||
|
||||
// Claude-3 models - https://docs.anthropic.com/claude/docs/models-overview#model-comparison
|
||||
|
||||
// Claude 3 models
|
||||
{
|
||||
id: 'claude-3-opus-20240229',
|
||||
label: 'Claude 3 Opus',
|
||||
created: roundTime('2024-02-29'),
|
||||
description: 'Most powerful model for highly complex tasks',
|
||||
description: 'Powerful model for complex tasks',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Aug 2023',
|
||||
@@ -23,19 +73,20 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
id: 'claude-3-sonnet-20240229',
|
||||
label: 'Claude 3 Sonnet',
|
||||
created: roundTime('2024-02-29'),
|
||||
description: 'Ideal balance of intelligence and speed for enterprise workloads',
|
||||
description: 'Balance of speed, cost, and performance',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Aug 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
|
||||
pricing: { chatIn: 3, chatOut: 15 },
|
||||
benchmark: { cbaElo: 1203, cbaMmlu: 79 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-haiku-20240307',
|
||||
label: 'Claude 3 Haiku',
|
||||
created: roundTime('2024-03-07'),
|
||||
description: 'Fastest and most compact model for near-instant responsiveness',
|
||||
description: 'Fastest, most cost-effective model',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Aug 2023',
|
||||
@@ -55,6 +106,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
pricing: { chatIn: 8, chatOut: 24 },
|
||||
benchmark: { cbaElo: 1119 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
id: 'claude-2.0',
|
||||
@@ -77,25 +129,26 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
pricing: { chatIn: 0.8, chatOut: 2.4 },
|
||||
},
|
||||
{
|
||||
id: 'claude-instant-1.1',
|
||||
label: 'Claude Instant 1.1',
|
||||
created: roundTime('2023-03-14'),
|
||||
description: 'Precise and fast',
|
||||
contextWindow: 100000,
|
||||
maxCompletionTokens: 2048,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
id: 'claude-1.3',
|
||||
label: 'Claude 1.3',
|
||||
created: roundTime('2023-03-14'),
|
||||
description: 'Claude 1.3 is the latest version of Claude v1',
|
||||
contextWindow: 100000,
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
// {
|
||||
// id: 'claude-instant-1.1',
|
||||
// label: 'Claude Instant 1.1',
|
||||
// created: roundTime('2023-03-14'),
|
||||
// description: 'Precise and fast',
|
||||
// contextWindow: 100000,
|
||||
// maxCompletionTokens: 2048,
|
||||
// interfaces: [LLM_IF_OAI_Chat],
|
||||
// hidden: true,
|
||||
// },
|
||||
// {
|
||||
// id: 'claude-1.3',
|
||||
// label: 'Claude 1.3',
|
||||
// created: roundTime('2023-03-14'),
|
||||
// description: 'Claude 1.3 is the latest version of Claude v1',
|
||||
// contextWindow: 100000,
|
||||
// maxCompletionTokens: 4096,
|
||||
// interfaces: [LLM_IF_OAI_Chat],
|
||||
// hidden: true,
|
||||
// },
|
||||
];
|
||||
@@ -8,7 +8,7 @@ import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
|
||||
import { fixupHost } from '~/common/util/urlUtils';
|
||||
|
||||
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
|
||||
import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
|
||||
import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema } from '../llm.server.types';
|
||||
|
||||
import { AnthropicWireMessagesRequest, anthropicWireMessagesRequestSchema, AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic.wiretypes';
|
||||
import { hardcodedAnthropicModels } from './anthropic.models';
|
||||
@@ -17,7 +17,9 @@ import { hardcodedAnthropicModels } from './anthropic.models';
|
||||
// Default hosts
|
||||
const DEFAULT_API_VERSION_HEADERS = {
|
||||
'anthropic-version': '2023-06-01',
|
||||
'anthropic-beta': 'messages-2023-12-15',
|
||||
// Former Betas:
|
||||
// - messages-2023-12-15: to use the Messages API
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
|
||||
};
|
||||
const DEFAULT_MAX_TOKENS = 2048;
|
||||
const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
|
||||
@@ -158,7 +160,11 @@ const listModelsInputSchema = z.object({
|
||||
|
||||
const chatGenerateInputSchema = z.object({
|
||||
access: anthropicAccessSchema,
|
||||
model: openAIModelSchema, history: openAIHistorySchema,
|
||||
model: openAIModelSchema,
|
||||
history: openAIHistorySchema,
|
||||
// functions: openAIFunctionsSchema.optional(),
|
||||
// forceFunctionName: z.string().optional(),
|
||||
context: llmsGenerateContextSchema.optional(),
|
||||
});
|
||||
|
||||
|
||||
|
||||
@@ -3,6 +3,10 @@ import type { ModelDescriptionSchema } from '../llm.server.types';
|
||||
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '../../store-llms';
|
||||
|
||||
|
||||
// dev options
|
||||
const DEV_DEBUG_GEMINI_MODELS = false;
|
||||
|
||||
|
||||
// supported interfaces
|
||||
const geminiChatInterfaces: GeminiModelSchema['supportedGenerationMethods'] = ['generateContent'];
|
||||
|
||||
@@ -175,6 +179,9 @@ export function geminiSortModels(a: ModelDescriptionSchema, b: ModelDescriptionS
|
||||
export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): ModelDescriptionSchema {
|
||||
const { description, displayName, name: modelId, supportedGenerationMethods } = geminiModel;
|
||||
|
||||
if (DEV_DEBUG_GEMINI_MODELS)
|
||||
console.log('geminiModelToModelDescription', geminiModel);
|
||||
|
||||
// find known manual mapping
|
||||
const knownModel = _knownGeminiModels.find(m => m.id === modelId);
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
|
||||
import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
|
||||
|
||||
import { fixupHost } from '~/common/util/urlUtils';
|
||||
import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
|
||||
import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema } from '../llm.server.types';
|
||||
|
||||
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
|
||||
|
||||
@@ -120,8 +120,11 @@ const accessOnlySchema = z.object({
|
||||
|
||||
const chatGenerateInputSchema = z.object({
|
||||
access: geminiAccessSchema,
|
||||
model: openAIModelSchema, history: openAIHistorySchema,
|
||||
// functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
|
||||
model: openAIModelSchema,
|
||||
history: openAIHistorySchema,
|
||||
// functions: openAIFunctionsSchema.optional(),
|
||||
// forceFunctionName: z.string().optional(),
|
||||
context: llmsGenerateContextSchema.optional(),
|
||||
});
|
||||
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ const geminiModelSchema = z.object({
|
||||
inputTokenLimit: z.number().int().min(1),
|
||||
outputTokenLimit: z.number().int().min(1),
|
||||
supportedGenerationMethods: z.array(z.enum([
|
||||
'createCachedContent', // appeared on 2024-06-10, see https://github.com/enricoros/big-AGI/issues/565
|
||||
'countMessageTokens',
|
||||
'countTextTokens',
|
||||
'countTokens',
|
||||
|
||||
@@ -22,6 +22,9 @@ import type { OpenAIWire } from './openai/openai.wiretypes';
|
||||
import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai/openai.router';
|
||||
|
||||
|
||||
import { llmsStreamingContextSchema } from './llm.server.types';
|
||||
|
||||
|
||||
// configuration
|
||||
const USER_SYMBOL_MAX_TOKENS = '🧱';
|
||||
const USER_SYMBOL_PROMPT_BLOCKED = '🚫';
|
||||
@@ -46,17 +49,14 @@ type MuxingFormat = 'sse' | 'json-nl';
|
||||
*/
|
||||
type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };
|
||||
|
||||
const streamingContextSchema = z.object({
|
||||
method: z.literal('chat-stream'),
|
||||
name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
|
||||
ref: z.string(),
|
||||
});
|
||||
|
||||
const chatStreamingInputSchema = z.object({
|
||||
access: z.union([anthropicAccessSchema, geminiAccessSchema, ollamaAccessSchema, openAIAccessSchema]),
|
||||
model: openAIModelSchema,
|
||||
history: openAIHistorySchema,
|
||||
context: streamingContextSchema,
|
||||
// NOTE: made it optional for now as we have some old requests without it
|
||||
// 2024-07-07: remove .optional()
|
||||
context: llmsStreamingContextSchema.optional(),
|
||||
});
|
||||
export type ChatStreamingInputSchema = z.infer<typeof chatStreamingInputSchema>;
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ const pricingSchema = z.object({
|
||||
const benchmarkSchema = z.object({
|
||||
cbaElo: z.number().optional(),
|
||||
cbaMmlu: z.number().optional(),
|
||||
heCode: z.number().optional(), // HumanEval, code, 0-shot
|
||||
vqaMmmu: z.number().optional(), // Visual Question Answering, MMMU, 0-shot
|
||||
});
|
||||
|
||||
// const rateLimitsSchema = z.object({
|
||||
@@ -46,6 +48,25 @@ export const llmsListModelsOutputSchema = z.object({
|
||||
});
|
||||
|
||||
|
||||
// Chat Generation Input (some parts of)
|
||||
|
||||
const generateContextNameSchema = z.enum(['chat-ai-title', 'chat-ai-summarize', 'chat-followup-diagram', 'chat-react-turn', 'draw-expand-prompt']);
|
||||
export type GenerateContextNameSchema = z.infer<typeof generateContextNameSchema>;
|
||||
export const llmsGenerateContextSchema = z.object({
|
||||
method: z.literal('chat-generate'),
|
||||
name: generateContextNameSchema,
|
||||
ref: z.string(),
|
||||
});
|
||||
|
||||
const streamingContextNameSchema = z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']);
|
||||
export type StreamingContextNameSchema = z.infer<typeof streamingContextNameSchema>;
|
||||
export const llmsStreamingContextSchema = z.object({
|
||||
method: z.literal('chat-stream'),
|
||||
name: streamingContextNameSchema,
|
||||
ref: z.string(),
|
||||
});
|
||||
|
||||
|
||||
// (non-streaming) Chat Generation Output
|
||||
|
||||
export const llmsChatGenerateOutputSchema = z.object({
|
||||
|
||||
@@ -11,7 +11,7 @@ import { capitalizeFirstLetter } from '~/common/util/textUtils';
|
||||
import { fixupHost } from '~/common/util/urlUtils';
|
||||
|
||||
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
|
||||
import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
|
||||
import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
|
||||
|
||||
import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
|
||||
import { WireOllamaChatCompletionInput, wireOllamaChunkedOutputSchema, wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama.wiretypes';
|
||||
@@ -117,8 +117,11 @@ const adminPullModelSchema = z.object({
|
||||
|
||||
const chatGenerateInputSchema = z.object({
|
||||
access: ollamaAccessSchema,
|
||||
model: openAIModelSchema, history: openAIHistorySchema,
|
||||
// functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
|
||||
model: openAIModelSchema,
|
||||
history: openAIHistorySchema,
|
||||
// functions: openAIFunctionsSchema.optional(),
|
||||
// forceFunctionName: z.string().optional(),
|
||||
context: llmsGenerateContextSchema.optional(),
|
||||
});
|
||||
|
||||
const listPullableOutputSchema = z.object({
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
// here for reference only - for future mapping of CBA scores to the model IDs
|
||||
// const modelIdToPrefixMap: { [key: string]: string } = {
|
||||
// // Anthropic models
|
||||
// 'Claude 3.5 Sonnet': 'claude-3-5-sonnet-20240620',
|
||||
// 'Claude 3 Opus': 'claude-3-opus-20240229',
|
||||
// 'Claude 3 Sonnet': 'claude-3-sonnet-20240229',
|
||||
// 'Claude 3 Haiku': 'claude-3-haiku-20240307',
|
||||
// 'Claude-2.1': 'claude-2.1',
|
||||
// 'Claude-2.0': 'claude-2.0',
|
||||
// 'Claude-1': '', // No exact match
|
||||
// 'Claude-Instant-1': 'claude-instant-1.2', // Closest match
|
||||
//
|
||||
// // Gemini models
|
||||
// 'Gemini-1.5-Pro-Exp-0801': 'models/gemini-1.5-pro-latest', // Closest match
|
||||
// 'Gemini Advanced App (2024-05-14)': '', // No exact match
|
||||
// 'Gemini-1.5-Pro-001': 'models/gemini-1.5-pro-001',
|
||||
// 'Gemini-1.5-Pro-Preview-0409': 'models/gemini-1.5-pro-latest', // Closest match
|
||||
// 'Gemini-1.5-Flash-001': 'models/gemini-1.5-flash-001',
|
||||
// 'Gemini App (2024-01-24)': '', // No exact match
|
||||
// 'Gemini-1.0-Pro-001': 'models/gemini-1.0-pro-001',
|
||||
// 'Gemini Pro': 'models/gemini-pro',
|
||||
//
|
||||
// // OpenAI models (from the previous file)
|
||||
// 'GPT-4o-2024-05-13': 'gpt-4o-2024-05-13',
|
||||
// 'GPT-4o-mini-2024-07-18': 'gpt-4o-mini-2024-07-18',
|
||||
// 'GPT-4-Turbo-2024-04-09': 'gpt-4-turbo-2024-04-09',
|
||||
// 'GPT-4-1106-preview': 'gpt-4-1106-preview',
|
||||
// 'GPT-4-0125-preview': 'gpt-4-0125-preview',
|
||||
// 'GPT-4-0314': 'gpt-4-0314',
|
||||
// 'GPT-4-0613': 'gpt-4-0613',
|
||||
// 'GPT-3.5-Turbo-0613': 'gpt-3.5-turbo-0613',
|
||||
// 'GPT-3.5-Turbo-0314': 'gpt-3.5-turbo-0314',
|
||||
// 'GPT-3.5-Turbo-0125': 'gpt-3.5-turbo-0125',
|
||||
//
|
||||
// // Mistral models (from the previous file)
|
||||
// 'Mistral-Large-2402': 'mistral-large-2402',
|
||||
// 'Mixtral-8x7b-Instruct-v0.1': 'mistralai/Mixtral-8x7B-Instruct-v0.1',
|
||||
//
|
||||
// // Other models without matches
|
||||
// 'Gemini-1.5-Pro-Exp-0801': '',
|
||||
// 'Meta-Llama-3.1-405b-Instruct': '',
|
||||
// 'Gemini-1.5-Pro-001': '',
|
||||
// 'Meta-Llama-3.1-70b-Instruct': '',
|
||||
// 'Yi-Large-preview': '',
|
||||
// 'Deepseek-v2-API-0628': '',
|
||||
// 'Gemma-2-27b-it': '',
|
||||
// 'Yi-Large': '',
|
||||
// 'Nemotron-4-340B-Instruct': '',
|
||||
// 'GLM-4-0520': '',
|
||||
// 'Llama-3-70b-Instruct': '',
|
||||
// 'Reka-Core-20240501': '',
|
||||
// 'Command R+': '',
|
||||
// 'Gemma-2-9b-it': '',
|
||||
// 'Qwen2-72B-Instruct': '',
|
||||
// 'GLM-4-0116': '',
|
||||
// 'Qwen-Max-0428': '',
|
||||
// 'DeepSeek-Coder-V2-Instruct': '',
|
||||
// 'Reka-Flash-Preview-20240611': '',
|
||||
// 'Meta-Llama-3.1-8b-Instruct': '',
|
||||
// 'Qwen1.5-110B-Chat': '',
|
||||
// 'Yi-1.5-34B-Chat': '',
|
||||
// 'Reka-Flash-21B-online': '',
|
||||
// 'Llama-3-8b-Instruct': '',
|
||||
// 'Command R': '',
|
||||
// 'Reka-Flash-21B': '',
|
||||
// 'Qwen1.5-72B-Chat': '',
|
||||
// 'Mixtral-8x22b-Instruct-v0.1': '',
|
||||
// 'Zephyr-ORPO-141b-A35b-v0.1': '',
|
||||
// 'Qwen1.5-32B-Chat': '',
|
||||
// 'Mistral-Next': '',
|
||||
// 'Phi-3-Medium-4k-Instruct': '',
|
||||
// 'Starling-LM-7B-beta': '',
|
||||
// 'Yi-34B-Chat': '',
|
||||
// 'Qwen1.5-14B-Chat': '',
|
||||
// 'WizardLM-70B-v1.0': '',
|
||||
// 'Tulu-2-DPO-70B': '',
|
||||
// 'DBRX-Instruct-Preview': '',
|
||||
// 'Phi-3-Small-8k-Instruct': '',
|
||||
// 'Llama-2-70b-chat': '',
|
||||
// 'OpenChat-3.5-0106': '',
|
||||
// 'Vicuna-33B': '',
|
||||
// 'Snowflake Arctic Instruct': '',
|
||||
// 'Starling-LM-7B-alpha': '',
|
||||
// };
|
||||
@@ -9,9 +9,37 @@ import { wireTogetherAIListOutputSchema } from './togetherai.wiretypes';
|
||||
|
||||
|
||||
// [Azure] / [OpenAI]
|
||||
// https://platform.openai.com/docs/models
|
||||
const _knownOpenAIChatModels: ManualMappings = [
|
||||
|
||||
// GPT-4o -> 2024-05-13
|
||||
// GPT-4o mini
|
||||
{
|
||||
idPrefix: 'gpt-4o-mini-2024-07-18',
|
||||
label: 'GPT-4o Mini (2024-07-18)',
|
||||
description: 'Affordable model for fast, lightweight tasks. GPT-4o mini is cheaper and more capable than GPT-3.5 Turbo.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 16384,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 0.15, chatOut: 0.60 },
|
||||
benchmark: { cbaElo: 1277, cbaMmlu: 82.0 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4o-mini',
|
||||
label: 'GPT-4o mini',
|
||||
description: 'Currently points to gpt-4o-mini-2024-07-18.',
|
||||
symLink: 'gpt-4o-mini-2024-07-18',
|
||||
hidden: true,
|
||||
// copied from symlinked
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 16384,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 0.15, chatOut: 0.60 },
|
||||
benchmark: { cbaElo: 1277, cbaMmlu: 82.0 },
|
||||
},
|
||||
|
||||
// GPT-4o -> 2024-05-13 (will be update to 2024-08-06 in the future (3 weeks notice))
|
||||
{
|
||||
idPrefix: 'gpt-4o',
|
||||
label: 'GPT-4o',
|
||||
@@ -24,10 +52,21 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 5, chatOut: 15 },
|
||||
benchmark: { cbaElo: 1310 },
|
||||
benchmark: { cbaElo: 1286 },
|
||||
},
|
||||
{
|
||||
isLatest: true,
|
||||
idPrefix: 'gpt-4o-2024-08-06',
|
||||
label: 'GPT-4o (2024-08-06)',
|
||||
description: 'Latest snapshot that supports Structured Outputs',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 16384,
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 2.5, chatOut: 10 },
|
||||
benchmark: { cbaElo: 1286 + 1 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4o-2024-05-13',
|
||||
label: 'GPT-4o (2024-05-13)',
|
||||
description: 'Advanced, multimodal flagship model that’s cheaper and faster than GPT-4 Turbo.',
|
||||
@@ -36,7 +75,8 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Oct 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 5, chatOut: 15 },
|
||||
benchmark: { cbaElo: 1310 },
|
||||
benchmark: { cbaElo: 1286 },
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
// GPT4 Turbo with Vision -> 2024-04-09
|
||||
@@ -52,7 +92,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Dec 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1261 },
|
||||
benchmark: { cbaElo: 1257 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-turbo-2024-04-09',
|
||||
@@ -63,7 +103,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Dec 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1261 },
|
||||
benchmark: { cbaElo: 1257 },
|
||||
},
|
||||
|
||||
// GPT4 Turbo Previews
|
||||
@@ -80,32 +120,30 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Dec 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1251 },
|
||||
benchmark: { cbaElo: 1245 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-0125-preview', // GPT-4 Turbo preview model
|
||||
label: 'GPT-4 Turbo (0125)',
|
||||
description: 'GPT-4 Turbo preview model intended to reduce cases of "laziness" where the model doesn\'t complete a task. Returns a maximum of 4,096 output tokens.',
|
||||
isPreview: true,
|
||||
description: 'GPT-4 Turbo preview model intended to reduce cases of "laziness" where the model doesn\'t complete a task.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Dec 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1251 },
|
||||
benchmark: { cbaElo: 1245 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-1106-preview', // GPT-4 Turbo preview model
|
||||
label: 'GPT-4 Turbo (1106)',
|
||||
description: 'GPT-4 Turbo preview model featuring improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Returns a maximum of 4,096 output tokens.',
|
||||
isPreview: true,
|
||||
description: 'GPT-4 Turbo preview model featuring improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
|
||||
contextWindow: 128000,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Apr 2023',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
|
||||
pricing: { chatIn: 10, chatOut: 30 },
|
||||
benchmark: { cbaElo: 1255 },
|
||||
benchmark: { cbaElo: 1251 },
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
@@ -182,7 +220,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 30, chatOut: 60 },
|
||||
benchmark: { cbaElo: 1164 },
|
||||
benchmark: { cbaElo: 1161 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-4-0314',
|
||||
@@ -192,7 +230,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 30, chatOut: 60 },
|
||||
benchmark: { cbaElo: 1189 },
|
||||
benchmark: { cbaElo: 1186 },
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
@@ -206,7 +244,8 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 30, chatOut: 60 },
|
||||
benchmark: { cbaElo: 1164 },
|
||||
benchmark: { cbaElo: 1161 },
|
||||
isLegacy: true,
|
||||
},
|
||||
|
||||
|
||||
@@ -222,23 +261,22 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
hidden: true,
|
||||
},
|
||||
|
||||
|
||||
// 3.5-Turbo-16k's
|
||||
// 3.5-Turbo's (16ks)
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo-0125',
|
||||
label: '3.5-Turbo (0125)',
|
||||
description: 'The latest GPT-3.5 Turbo model with higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls. Returns a maximum of 4,096 output tokens.',
|
||||
description: 'The latest GPT-3.5 Turbo model with higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.',
|
||||
contextWindow: 16385,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 0.5, chatOut: 1.5 },
|
||||
benchmark: { cbaElo: 1104 },
|
||||
benchmark: { cbaElo: 1105 },
|
||||
},
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo-1106',
|
||||
label: '3.5-Turbo (1106)',
|
||||
description: 'The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
|
||||
description: 'GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.',
|
||||
contextWindow: 16385,
|
||||
maxCompletionTokens: 4096,
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
@@ -250,7 +288,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
{
|
||||
idPrefix: 'gpt-3.5-turbo',
|
||||
label: '3.5-Turbo',
|
||||
description: 'Currently points to gpt-3.5-turbo-0125.',
|
||||
description: 'Currently points to gpt-3.5-turbo-0125. As of July 2024, gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.',
|
||||
symLink: 'gpt-3.5-turbo-0125',
|
||||
hidden: true,
|
||||
// copied
|
||||
@@ -259,7 +297,7 @@ const _knownOpenAIChatModels: ManualMappings = [
|
||||
trainingDataCutoff: 'Sep 2021',
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
pricing: { chatIn: 0.5, chatOut: 1.5 },
|
||||
benchmark: { cbaElo: 1104 },
|
||||
benchmark: { cbaElo: 1105 },
|
||||
},
|
||||
|
||||
|
||||
@@ -859,41 +897,84 @@ export function perplexityAIModelSort(a: ModelDescriptionSchema, b: ModelDescrip
|
||||
const _knownGroqModels: ManualMappings = [
|
||||
{
|
||||
isLatest: true,
|
||||
idPrefix: 'llama-3.1-405b-reasoning',
|
||||
label: 'Llama 3.1 · 405B',
|
||||
description: 'LLaMA 3.1 405B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: 8000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
isLatest: true,
|
||||
idPrefix: 'llama-3.1-70b-versatile',
|
||||
label: 'Llama 3.1 · 70B',
|
||||
description: 'LLaMA 3.1 70B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: 8000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
isLatest: true,
|
||||
idPrefix: 'llama-3.1-8b-instant',
|
||||
label: 'Llama 3.1 · 8B',
|
||||
description: 'LLaMA 3.1 8B developed by Meta with a context window of 131,072 tokens. Supports tool use.',
|
||||
contextWindow: 131072,
|
||||
maxCompletionTokens: 8000,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'llama3-groq-70b-8192-tool-use-preview',
|
||||
label: 'Llama 3 Groq · 70B Tool Use',
|
||||
description: 'LLaMA 3 70B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'llama3-groq-8b-8192-tool-use-preview',
|
||||
label: 'Llama 3 Groq · 8B Tool Use',
|
||||
description: 'LLaMA 3 8B Tool Use developed by Groq with a context window of 8,192 tokens. Optimized for tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'llama3-70b-8192',
|
||||
label: 'Llama 3 · 70B',
|
||||
description: 'LLaMA3 70b developed by Meta with a context window of 8,192 tokens.',
|
||||
description: 'LLaMA3 70B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
// isLegacy: true,
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
// isLatest: true,
|
||||
idPrefix: 'llama3-8b-8192',
|
||||
label: 'Llama 3 · 8B',
|
||||
description: 'LLaMA3 8b developed by Meta with a context window of 8,192 tokens.',
|
||||
description: 'LLaMA3 8B developed by Meta with a context window of 8,192 tokens. Supports tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
},
|
||||
{
|
||||
idPrefix: 'llama2-70b-4096',
|
||||
label: 'Llama 2 · 70B',
|
||||
description: 'LLaMA2 70b developed by Meta with a context window of 4,096 tokens.',
|
||||
contextWindow: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
// isLegacy: true,
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
idPrefix: 'mixtral-8x7b-32768',
|
||||
label: 'Mixtral 8x7B',
|
||||
description: 'Mixtral 8x7b developed by Mistral with a context window of 32,768 tokens.',
|
||||
description: 'Mixtral 8x7B developed by Mistral with a context window of 32,768 tokens. Supports tool use.',
|
||||
contextWindow: 32768,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'gemma2-9b-it',
|
||||
label: 'Gemma 2 · 9B Instruct',
|
||||
description: 'Gemma 2 9B developed by Google with a context window of 8,192 tokens. Supports tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
|
||||
},
|
||||
{
|
||||
idPrefix: 'gemma-7b-it',
|
||||
label: 'Gemma 1.1 · 7B Instruct',
|
||||
description: 'Gemma 7b developed by Google with a context window of 8,192 tokens.',
|
||||
description: 'Gemma 7B developed by Google with a context window of 8,192 tokens. Supports tool use.',
|
||||
contextWindow: 8192,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -910,6 +991,11 @@ export function groqModelToModelDescription(_model: unknown): ModelDescriptionSc
|
||||
}
|
||||
|
||||
export function groqModelSortFn(a: ModelDescriptionSchema, b: ModelDescriptionSchema): number {
|
||||
// sort hidden at the end
|
||||
if (a.hidden && !b.hidden)
|
||||
return 1;
|
||||
if (!a.hidden && b.hidden)
|
||||
return -1;
|
||||
// sort as per their order in the known models
|
||||
const aIndex = _knownGroqModels.findIndex(base => a.id.startsWith(base.idPrefix));
|
||||
const bIndex = _knownGroqModels.findIndex(base => b.id.startsWith(base.idPrefix));
|
||||
|
||||
@@ -12,7 +12,7 @@ import { fixupHost } from '~/common/util/urlUtils';
|
||||
|
||||
import { OpenAIWire, WireOpenAICreateImageOutput, wireOpenAICreateImageOutputSchema, WireOpenAICreateImageRequest } from './openai.wiretypes';
|
||||
import { azureModelToModelDescription, groqModelSortFn, groqModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, mistralModelsSort, mistralModelToModelDescription, oobaboogaModelToModelDescription, openAIModelFilter, openAIModelToModelDescription, openRouterModelFamilySortFn, openRouterModelToModelDescription, perplexityAIModelDescriptions, perplexityAIModelSort, togetherAIModelsToModelDescriptions } from './models.data';
|
||||
import { llmsChatGenerateWithFunctionsOutputSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
|
||||
import { llmsChatGenerateWithFunctionsOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
|
||||
import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
|
||||
|
||||
|
||||
@@ -72,8 +72,11 @@ const listModelsInputSchema = z.object({
|
||||
|
||||
const chatGenerateWithFunctionsInputSchema = z.object({
|
||||
access: openAIAccessSchema,
|
||||
model: openAIModelSchema, history: openAIHistorySchema,
|
||||
functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
|
||||
model: openAIModelSchema,
|
||||
history: openAIHistorySchema,
|
||||
functions: openAIFunctionsSchema.optional(),
|
||||
forceFunctionName: z.string().optional(),
|
||||
context: llmsGenerateContextSchema.optional(),
|
||||
});
|
||||
|
||||
const createImagesInputSchema = z.object({
|
||||
@@ -108,7 +111,7 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
|
||||
// [Azure]: use an older 'deployments' API to enumerate the models, and a modified OpenAI id to description mapping
|
||||
if (access.dialect === 'azure') {
|
||||
const azureModels = await openaiGET(access, `/openai/deployments?api-version=2023-03-15-preview`);
|
||||
const azureModels = await openaiGETOrThrow(access, `/openai/deployments?api-version=2023-03-15-preview`);
|
||||
|
||||
const wireAzureListDeploymentsSchema = z.object({
|
||||
data: z.array(z.object({
|
||||
@@ -146,7 +149,7 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
|
||||
|
||||
// [non-Azure]: fetch openAI-style for all but Azure (will be then used in each dialect)
|
||||
const openAIWireModelsResponse = await openaiGET<OpenAIWire.Models.Response>(access, '/v1/models');
|
||||
const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire.Models.Response>(access, '/v1/models');
|
||||
|
||||
// [Together] missing the .data property
|
||||
if (access.dialect === 'togetherai')
|
||||
@@ -267,17 +270,22 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
.output(llmsChatGenerateWithFunctionsOutputSchema)
|
||||
.mutation(async ({ input }) => {
|
||||
|
||||
const { access, model, history, functions, forceFunctionName } = input;
|
||||
const { access, model, history, functions, forceFunctionName, context } = input;
|
||||
const isFunctionsCall = !!functions && functions.length > 0;
|
||||
|
||||
const completionsBody = openAIChatCompletionPayload(access.dialect, model, history, isFunctionsCall ? functions : null, forceFunctionName ?? null, 1, false);
|
||||
const wireCompletions = await openaiPOST<OpenAIWire.ChatCompletion.Response, OpenAIWire.ChatCompletion.Request>(
|
||||
const wireCompletions = await openaiPOSTOrThrow<OpenAIWire.ChatCompletion.Response, OpenAIWire.ChatCompletion.Request>(
|
||||
access, model.id, completionsBody, '/v1/chat/completions',
|
||||
);
|
||||
|
||||
// expect a single output
|
||||
if (wireCompletions?.choices?.length !== 1)
|
||||
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[OpenAI Issue] Expected 1 completion, got ${wireCompletions?.choices?.length}` });
|
||||
if (wireCompletions?.choices?.length !== 1) {
|
||||
console.error(`[POST] llmOpenAI.chatGenerateWithFunctions: ${access.dialect}: ${context?.name || 'no context'}: unexpected output${forceFunctionName ? ` (fn: ${forceFunctionName})` : ''}:`, model.id, wireCompletions?.choices);
|
||||
throw new TRPCError({
|
||||
code: 'UNPROCESSABLE_CONTENT',
|
||||
message: `[OpenAI Issue] Expected 1 completion, got ${wireCompletions?.choices?.length}`,
|
||||
});
|
||||
}
|
||||
let { message, finish_reason } = wireCompletions.choices[0];
|
||||
|
||||
// LocalAI hack/workaround, until https://github.com/go-skynet/LocalAI/issues/788 is fixed
|
||||
@@ -318,7 +326,7 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
delete requestBody.response_format;
|
||||
|
||||
// create 1 image (dall-e-3 won't support more than 1, so better transfer the burden to the client)
|
||||
const wireOpenAICreateImageOutput = await openaiPOST<WireOpenAICreateImageOutput, WireOpenAICreateImageRequest>(
|
||||
const wireOpenAICreateImageOutput = await openaiPOSTOrThrow<WireOpenAICreateImageOutput, WireOpenAICreateImageRequest>(
|
||||
access, null, requestBody, '/v1/images/generations',
|
||||
);
|
||||
|
||||
@@ -340,7 +348,7 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
.mutation(async ({ input: { access, text } }): Promise<OpenAIWire.Moderation.Response> => {
|
||||
try {
|
||||
|
||||
return await openaiPOST<OpenAIWire.Moderation.Response, OpenAIWire.Moderation.Request>(access, null, {
|
||||
return await openaiPOSTOrThrow<OpenAIWire.Moderation.Response, OpenAIWire.Moderation.Request>(access, null, {
|
||||
input: text,
|
||||
model: 'text-moderation-latest',
|
||||
}, '/v1/moderations');
|
||||
@@ -361,7 +369,7 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
dialectLocalAI_galleryModelsAvailable: publicProcedure
|
||||
.input(listModelsInputSchema)
|
||||
.query(async ({ input: { access } }) => {
|
||||
const wireLocalAIModelsAvailable = await openaiGET(access, '/models/available');
|
||||
const wireLocalAIModelsAvailable = await openaiGETOrThrow(access, '/models/available');
|
||||
return wireLocalAIModelsAvailableOutputSchema.parse(wireLocalAIModelsAvailable);
|
||||
}),
|
||||
|
||||
@@ -374,7 +382,7 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
}))
|
||||
.mutation(async ({ input: { access, galleryName, modelName } }) => {
|
||||
const galleryModelId = `${galleryName}@${modelName}`;
|
||||
const wireLocalAIModelApply = await openaiPOST(access, null, { id: galleryModelId }, '/models/apply');
|
||||
const wireLocalAIModelApply = await openaiPOSTOrThrow(access, null, { id: galleryModelId }, '/models/apply');
|
||||
return wilreLocalAIModelsApplyOutputSchema.parse(wireLocalAIModelApply);
|
||||
}),
|
||||
|
||||
@@ -385,7 +393,7 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
jobId: z.string(),
|
||||
}))
|
||||
.query(async ({ input: { access, jobId } }) => {
|
||||
const wireLocalAIModelsJobs = await openaiGET(access, `/models/jobs/${jobId}`);
|
||||
const wireLocalAIModelsJobs = await openaiGETOrThrow(access, `/models/jobs/${jobId}`);
|
||||
return wireLocalAIModelsListOutputSchema.parse(wireLocalAIModelsJobs);
|
||||
}),
|
||||
|
||||
@@ -623,12 +631,12 @@ export function openAIChatCompletionPayload(dialect: OpenAIDialects, model: Open
|
||||
};
|
||||
}
|
||||
|
||||
async function openaiGET<TOut extends object>(access: OpenAIAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
async function openaiGETOrThrow<TOut extends object>(access: OpenAIAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
const { headers, url } = openAIAccess(access, null, apiPath);
|
||||
return await fetchJsonOrTRPCError<TOut>(url, 'GET', headers, undefined, `OpenAI/${access.dialect}`);
|
||||
}
|
||||
|
||||
async function openaiPOST<TOut extends object, TPostBody extends object>(access: OpenAIAccessSchema, modelRefId: string | null, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
async function openaiPOSTOrThrow<TOut extends object, TPostBody extends object>(access: OpenAIAccessSchema, modelRefId: string | null, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
const { headers, url } = openAIAccess(access, modelRefId, apiPath);
|
||||
return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, `OpenAI/${access.dialect}`);
|
||||
}
|
||||
|
||||
+3
-2
@@ -8,7 +8,7 @@ import type { DLLM, DLLMId, DModelSourceId } from '../store-llms';
|
||||
import type { ModelDescriptionSchema } from '../server/llm.server.types';
|
||||
import type { ModelVendorId } from './vendors.registry';
|
||||
import type { StreamingClientUpdate } from './unifiedStreamingClient';
|
||||
import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
|
||||
import type { VChatContextRef, VChatFunctionIn, VChatGenerateContextName, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut, VChatStreamContextName } from '../llm.client';
|
||||
|
||||
|
||||
export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown, TDLLM = DLLM<TSourceSetup, TLLMOptions>> {
|
||||
@@ -44,6 +44,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
|
||||
access: TAccess,
|
||||
llmOptions: TLLMOptions,
|
||||
messages: VChatMessageIn[],
|
||||
contextName: VChatGenerateContextName, contextRef: VChatContextRef | null,
|
||||
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
|
||||
maxTokens?: number,
|
||||
) => Promise<VChatMessageOut | VChatMessageOrFunctionCallOut>;
|
||||
@@ -53,7 +54,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
|
||||
llmId: DLLMId,
|
||||
llmOptions: TLLMOptions,
|
||||
messages: VChatMessageIn[],
|
||||
contextName: VChatContextName, contexRef: VChatContextRef,
|
||||
contextName: VChatStreamContextName, contextRef: VChatContextRef,
|
||||
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
|
||||
abortSignal: AbortSignal,
|
||||
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
|
||||
|
||||
+7
-2
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
|
||||
|
||||
import type { AnthropicAccessSchema } from '../../server/anthropic/anthropic.router';
|
||||
import type { IModelVendor } from '../IModelVendor';
|
||||
import type { VChatMessageOut } from '../../llm.client';
|
||||
import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
|
||||
import { unifiedStreamingClient } from '../unifiedStreamingClient';
|
||||
|
||||
import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE, LLMOptionsOpenAI } from '../openai/openai.vendor';
|
||||
@@ -47,7 +47,7 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
|
||||
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmAnthropic.listModels.query({ access }),
|
||||
|
||||
// Chat Generate (non-streaming) with Functions
|
||||
rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
|
||||
rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
|
||||
if (functions?.length || forceFunctionName)
|
||||
throw new Error('Anthropic does not support functions');
|
||||
|
||||
@@ -61,6 +61,11 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
|
||||
maxTokens: maxTokens || llmResponseTokens || FALLBACK_LLM_RESPONSE_TOKENS,
|
||||
},
|
||||
history: messages,
|
||||
context: contextRef ? {
|
||||
method: 'chat-generate',
|
||||
name: contextName,
|
||||
ref: contextRef,
|
||||
} : undefined,
|
||||
}) as VChatMessageOut;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error?.message || error?.toString() || 'Anthropic Chat Generate Error';
|
||||
|
||||
+8
-3
@@ -1,10 +1,10 @@
|
||||
import { GeminiIcon } from '~/common/components/icons/vendors/GeminiIcon';
|
||||
import { apiAsync } from '~/common/util/trpc.client';
|
||||
import { apiAsync } from '~/common/util/trpc.client';
|
||||
|
||||
import type { GeminiAccessSchema } from '../../server/gemini/gemini.router';
|
||||
import type { GeminiBlockSafetyLevel } from '../../server/gemini/gemini.wiretypes';
|
||||
import type { IModelVendor } from '../IModelVendor';
|
||||
import type { VChatMessageOut } from '../../llm.client';
|
||||
import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
|
||||
import { unifiedStreamingClient } from '../unifiedStreamingClient';
|
||||
|
||||
import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE } from '../openai/openai.vendor';
|
||||
@@ -60,7 +60,7 @@ export const ModelVendorGemini: IModelVendor<SourceSetupGemini, GeminiAccessSche
|
||||
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmGemini.listModels.query({ access }),
|
||||
|
||||
// Chat Generate (non-streaming) with Functions
|
||||
rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
|
||||
rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
|
||||
if (functions?.length || forceFunctionName)
|
||||
throw new Error('Gemini does not support functions');
|
||||
|
||||
@@ -74,6 +74,11 @@ export const ModelVendorGemini: IModelVendor<SourceSetupGemini, GeminiAccessSche
|
||||
maxTokens: maxTokens || maxOutputTokens || FALLBACK_LLM_RESPONSE_TOKENS,
|
||||
},
|
||||
history: messages,
|
||||
context: contextRef ? {
|
||||
method: 'chat-generate',
|
||||
name: contextName,
|
||||
ref: contextRef,
|
||||
} : undefined,
|
||||
}) as VChatMessageOut;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error?.message || error?.toString() || 'Gemini Chat Generate Error';
|
||||
|
||||
+7
-2
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
|
||||
|
||||
import type { IModelVendor } from '../IModelVendor';
|
||||
import type { OllamaAccessSchema } from '../../server/ollama/ollama.router';
|
||||
import type { VChatMessageOut } from '../../llm.client';
|
||||
import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
|
||||
import { unifiedStreamingClient } from '../unifiedStreamingClient';
|
||||
|
||||
import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE, LLMOptionsOpenAI } from '../openai/openai.vendor';
|
||||
@@ -42,7 +42,7 @@ export const ModelVendorOllama: IModelVendor<SourceSetupOllama, OllamaAccessSche
|
||||
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOllama.listModels.query({ access }),
|
||||
|
||||
// Chat Generate (non-streaming) with Functions
|
||||
rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
|
||||
rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
|
||||
if (functions?.length || forceFunctionName)
|
||||
throw new Error('Ollama does not support functions');
|
||||
|
||||
@@ -56,6 +56,11 @@ export const ModelVendorOllama: IModelVendor<SourceSetupOllama, OllamaAccessSche
|
||||
maxTokens: maxTokens || llmResponseTokens || FALLBACK_LLM_RESPONSE_TOKENS,
|
||||
},
|
||||
history: messages,
|
||||
context: contextRef ? {
|
||||
method: 'chat-generate',
|
||||
name: contextName,
|
||||
ref: contextRef,
|
||||
} : undefined,
|
||||
}) as VChatMessageOut;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error?.message || error?.toString() || 'Ollama Chat Generate Error';
|
||||
|
||||
+7
-2
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
|
||||
|
||||
import type { IModelVendor } from '../IModelVendor';
|
||||
import type { OpenAIAccessSchema } from '../../server/openai/openai.router';
|
||||
import type { VChatMessageOrFunctionCallOut } from '../../llm.client';
|
||||
import type { VChatContextRef, VChatGenerateContextName, VChatMessageOrFunctionCallOut } from '../../llm.client';
|
||||
import { unifiedStreamingClient } from '../unifiedStreamingClient';
|
||||
|
||||
import { OpenAILLMOptions } from './OpenAILLMOptions';
|
||||
@@ -60,7 +60,7 @@ export const ModelVendorOpenAI: IModelVendor<SourceSetupOpenAI, OpenAIAccessSche
|
||||
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOpenAI.listModels.query({ access }),
|
||||
|
||||
// Chat Generate (non-streaming) with Functions
|
||||
rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
|
||||
rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
|
||||
const { llmRef, llmTemperature, llmResponseTokens } = llmOptions;
|
||||
try {
|
||||
return await apiAsync.llmOpenAI.chatGenerateWithFunctions.mutate({
|
||||
@@ -73,6 +73,11 @@ export const ModelVendorOpenAI: IModelVendor<SourceSetupOpenAI, OpenAIAccessSche
|
||||
functions: functions ?? undefined,
|
||||
forceFunctionName: forceFunctionName ?? undefined,
|
||||
history: messages,
|
||||
context: contextRef ? {
|
||||
method: 'chat-generate',
|
||||
name: contextName,
|
||||
ref: contextRef,
|
||||
} : undefined,
|
||||
}) as VChatMessageOrFunctionCallOut;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error?.message || error?.toString() || 'OpenAI Chat Generate Error';
|
||||
|
||||
+2
-2
@@ -3,7 +3,7 @@ import { frontendSideFetch } from '~/common/util/clientFetchers';
|
||||
|
||||
import type { ChatStreamingInputSchema, ChatStreamingPreambleModelSchema, ChatStreamingPreambleStartSchema } from '../server/llm.server.streaming';
|
||||
import type { DLLMId } from '../store-llms';
|
||||
import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn } from '../llm.client';
|
||||
import type { VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatStreamContextName } from '../llm.client';
|
||||
|
||||
import type { OpenAIAccessSchema } from '../server/openai/openai.router';
|
||||
import type { OpenAIWire } from '../server/openai/openai.wiretypes';
|
||||
@@ -29,7 +29,7 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
|
||||
llmId: DLLMId,
|
||||
llmOptions: TLLMOptions,
|
||||
messages: VChatMessageIn[],
|
||||
contextName: VChatContextName, contextRef: VChatContextRef,
|
||||
contextName: VChatStreamContextName, contextRef: VChatContextRef,
|
||||
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
|
||||
abortSignal: AbortSignal,
|
||||
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
|
||||
|
||||
Reference in New Issue
Block a user