mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 06:00:15 -07:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2f59e12e20 | |||
| 30e8652c2a | |||
| 5ee6aceb60 | |||
| 6940b6a6d1 | |||
| 4e33ce9415 | |||
| 944e22bde6 | |||
| 6054fa0a26 | |||
| 4db13cfed4 | |||
| 6a6adda2e0 | |||
| 4afa55c0db |
@@ -17,15 +17,16 @@ Or fork & run on Vercel
|
||||
|
||||
big-AGI is an open book; see the **[ready-to-ship and future ideas](https://github.com/users/enricoros/projects/4/views/2)** in our open roadmap
|
||||
|
||||
### What's New in 1.14.0 · March 6, 2024 · Modelmorphic
|
||||
### What's New in 1.14.1 · March 7, 2024 · Modelmorphic
|
||||
|
||||
- **Anthropic** [Claude-3](https://www.anthropic.com/news/claude-3-family) model family support. [#443](https://github.com/enricoros/big-AGI/issues/443)
|
||||
- New **[Perplexity](https://www.perplexity.ai/)** and **[Groq](https://groq.com/)** integration (thanks @Penagwin). [#407](https://github.com/enricoros/big-AGI/issues/407), [#427](https://github.com/enricoros/big-AGI/issues/427)
|
||||
- **[LocalAI](https://localai.io/models/)** deep integration, including support for [model galleries](https://github.com/enricoros/big-AGI/issues/411)
|
||||
- **Mistral** Large and Google **Gemini 1.5** support
|
||||
- Performance optimizations: runs [much faster](https://twitter.com/enricoros/status/1756553038293303434?utm_source=localhost:3000&utm_medium=big-agi), saves lots of power, reduces memory usage
|
||||
- Enhanced UX with auto-sizing charts, refined search and folder functionalities, perfected scaling
|
||||
- And with more UI improvements, documentation, bug fixes (20 tickets), and developer enhancements
|
||||
- [Release notes](https://github.com/enricoros/big-AGI/releases/tag/v1.14.0), and changes [v1.13.1...v1.14.0](https://github.com/enricoros/big-AGI/compare/v1.13.1...v1.14.0) (233 commits, 8,000+ lines changed)
|
||||
- [Release notes](https://github.com/enricoros/big-AGI/releases/tag/v1.14.0), and changes [v1.13.1...v1.14.1](https://github.com/enricoros/big-AGI/compare/v1.13.1...v1.14.1) (233 commits, 8,000+ lines changed)
|
||||
|
||||
### What's New in 1.13.0 · Feb 8, 2024 · Multi + Mind
|
||||
|
||||
|
||||
+2
-1
@@ -12,8 +12,9 @@ Prediction: OpenAI will release GPT-5 on March 14, 2024. We will support it on d
|
||||
- milestone: [1.15.0](https://github.com/enricoros/big-agi/milestone/15)
|
||||
- work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
|
||||
|
||||
### What's New in 1.14.0 · March 6, 2024 · Modelmorphic
|
||||
### What's New in 1.14.1 · March 7, 2024 · Modelmorphic
|
||||
|
||||
- **Anthropic** [Claude-3](https://www.anthropic.com/news/claude-3-family) model family support. [#443](https://github.com/enricoros/big-AGI/issues/443)
|
||||
- New **[Perplexity](https://www.perplexity.ai/)** and **[Groq](https://groq.com/)** integration (thanks @Penagwin). [#407](https://github.com/enricoros/big-AGI/issues/407), [#427](https://github.com/enricoros/big-AGI/issues/427)
|
||||
- **[LocalAI](https://localai.io/models/)** deep integration, including support for [model galleries](https://github.com/enricoros/big-AGI/issues/411)
|
||||
- **Mistral** Large and Google **Gemini 1.5** support
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "big-agi",
|
||||
"version": "1.14.0",
|
||||
"version": "1.14.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "big-agi",
|
||||
"version": "1.14.0",
|
||||
"version": "1.14.1",
|
||||
"hasInstallScript": true,
|
||||
"dependencies": {
|
||||
"@emotion/cache": "^11.11.0",
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "big-agi",
|
||||
"version": "1.14.0",
|
||||
"version": "1.14.1",
|
||||
"private": true,
|
||||
"author": "Enrico Ros <enrico.ros@gmail.com>",
|
||||
"repository": "https://github.com/enricoros/big-agi",
|
||||
|
||||
@@ -64,6 +64,7 @@ const avatarIconSx = { width: 36, height: 36 };
|
||||
export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['role'] | string, messageOriginLLM: string | undefined, messagePurposeId: SystemPurposeId | undefined, messageSender: string, messageTyping: boolean, size: 'sm' | undefined = undefined): React.JSX.Element {
|
||||
if (typeof messageAvatar === 'string' && messageAvatar)
|
||||
return <Avatar alt={messageSender} src={messageAvatar} />;
|
||||
|
||||
const mascotSx = size === 'sm' ? avatarIconSx : { width: 64, height: 64 };
|
||||
switch (messageRole) {
|
||||
case 'system':
|
||||
@@ -76,17 +77,18 @@ export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['
|
||||
// typing gif (people seem to love this, so keeping it after april fools')
|
||||
const isTextToImage = messageOriginLLM === 'DALL·E' || messageOriginLLM === 'Prodia';
|
||||
const isReact = messageOriginLLM?.startsWith('react-');
|
||||
if (messageTyping) {
|
||||
|
||||
// animation: message typing
|
||||
if (messageTyping)
|
||||
return <Avatar
|
||||
alt={messageSender} variant='plain'
|
||||
src={isTextToImage ? 'https://i.giphy.com/media/5t9ujj9cMisyVjUZ0m/giphy.webp'
|
||||
: isReact ? 'https://i.giphy.com/media/l44QzsOLXxcrigdgI/giphy.webp'
|
||||
: 'https://i.giphy.com/media/jJxaUysjzO9ri/giphy.webp'}
|
||||
src={isTextToImage ? 'https://i.giphy.com/media/5t9ujj9cMisyVjUZ0m/giphy.webp' // brush
|
||||
: isReact ? 'https://i.giphy.com/media/l44QzsOLXxcrigdgI/giphy.webp' // mind
|
||||
: 'https://i.giphy.com/media/jJxaUysjzO9ri/giphy.webp'} // typing
|
||||
sx={{ ...mascotSx, borderRadius: 'sm' }}
|
||||
/>;
|
||||
}
|
||||
|
||||
// text-to-image: icon
|
||||
// icon: text-to-image
|
||||
if (isTextToImage)
|
||||
return <FormatPaintIcon sx={{
|
||||
...avatarIconSx,
|
||||
@@ -95,15 +97,16 @@ export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['
|
||||
|
||||
// purpose symbol (if present)
|
||||
const symbol = SystemPurposes[messagePurposeId!]?.symbol;
|
||||
if (symbol) return <Box sx={{
|
||||
fontSize: '24px',
|
||||
textAlign: 'center',
|
||||
width: '100%',
|
||||
minWidth: `${avatarIconSx.width}px`,
|
||||
lineHeight: `${avatarIconSx.height}px`,
|
||||
}}>
|
||||
{symbol}
|
||||
</Box>;
|
||||
if (symbol)
|
||||
return <Box sx={{
|
||||
fontSize: '24px',
|
||||
textAlign: 'center',
|
||||
width: '100%',
|
||||
minWidth: `${avatarIconSx.width}px`,
|
||||
lineHeight: `${avatarIconSx.height}px`,
|
||||
}}>
|
||||
{symbol}
|
||||
</Box>;
|
||||
|
||||
// default assistant avatar
|
||||
return <SmartToyOutlinedIcon sx={avatarIconSx} />; // https://mui.com/static/images/avatar/2.jpg
|
||||
|
||||
@@ -7,6 +7,7 @@ import AutoStoriesOutlinedIcon from '@mui/icons-material/AutoStoriesOutlined';
|
||||
import GoogleIcon from '@mui/icons-material/Google';
|
||||
import LaunchIcon from '@mui/icons-material/Launch';
|
||||
|
||||
import { AnthropicIcon } from '~/common/components/icons/vendors/AnthropicIcon';
|
||||
import { GroqIcon } from '~/common/components/icons/vendors/GroqIcon';
|
||||
import { LocalAIIcon } from '~/common/components/icons/vendors/LocalAIIcon';
|
||||
import { MistralIcon } from '~/common/components/icons/vendors/MistralIcon';
|
||||
@@ -54,11 +55,12 @@ export const NewsItems: NewsItem[] = [
|
||||
]
|
||||
}*/
|
||||
{
|
||||
versionCode: '1.14.0',
|
||||
versionCode: '1.14.1',
|
||||
versionName: 'Modelmorphic',
|
||||
versionCoverImage: coverV114,
|
||||
versionDate: new Date('2024-03-06T08:00:00Z'),
|
||||
versionDate: new Date('2024-03-07T08:00:00Z'),
|
||||
items: [
|
||||
{ text: <>Anthropic <B href='https://www.anthropic.com/news/claude-3-family'>Claude-3</B> support for smarter chats</>, issue: 443, icon: AnthropicIcon },
|
||||
{ text: <><B issue={407}>Perplexity</B> support, including Online models</>, issue: 407, icon: PerplexityIcon },
|
||||
{ text: <><B issue={427}>Groq</B> support, with speeds up to 500 tok/s</>, issue: 427, icon: GroqIcon },
|
||||
{ text: <>Support for new Mistral-Large models</>, icon: MistralIcon },
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
import { useAppStateStore } from '~/common/state/store-appstate';
|
||||
|
||||
|
||||
export const incrementalNewsVersion: number = 14;
|
||||
export const incrementalNewsVersion: number = 14.1;
|
||||
|
||||
|
||||
export function shallRedirectToNews() {
|
||||
|
||||
@@ -15,6 +15,9 @@ export function prettyBaseModel(model: string | undefined): string {
|
||||
if (model.includes('gpt-3.5-turbo-16k')) return '3.5 Turbo 16k';
|
||||
if (model.includes('gpt-3.5-turbo')) return '3.5 Turbo';
|
||||
if (model.endsWith('.bin')) return model.slice(0, -4);
|
||||
// [Anthropic]
|
||||
if (model.includes('claude-3-opus')) return 'Claude 3 Opus';
|
||||
if (model.includes('claude-3-sonnet')) return 'Claude 3 Sonnet';
|
||||
// [LM Studio]
|
||||
if (model.startsWith('C:\\') || model.startsWith('D:\\'))
|
||||
return getModelFromFile(model).replace('.gguf', '');
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
export function capitalizeFirstLetter(string: string) {
|
||||
return string.charAt(0).toUpperCase() + string.slice(1);
|
||||
return string?.length ? (string.charAt(0).toUpperCase() + string.slice(1)) : string;
|
||||
}
|
||||
|
||||
export function createBase36Uid(checkDuplicates: string[]): string {
|
||||
|
||||
@@ -40,6 +40,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
created: roundTime('2023-11-21'),
|
||||
description: 'Superior performance on tasks that require complex reasoning, with reduced model hallucination rates',
|
||||
contextWindow: 200000,
|
||||
maxCompletionTokens: 4096,
|
||||
pricing: {
|
||||
cpmPrompt: 0.008,
|
||||
cpmCompletion: 0.024,
|
||||
@@ -52,6 +53,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
created: roundTime('2023-07-11'),
|
||||
description: 'Superior performance on tasks that require complex reasoning',
|
||||
contextWindow: 100000,
|
||||
maxCompletionTokens: 4096,
|
||||
pricing: {
|
||||
cpmPrompt: 0.008,
|
||||
cpmCompletion: 0.024,
|
||||
@@ -65,6 +67,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
created: roundTime('2023-08-09'),
|
||||
description: 'Low-latency, high throughput model',
|
||||
contextWindow: 100000,
|
||||
maxCompletionTokens: 4096,
|
||||
pricing: {
|
||||
cpmPrompt: 0.00163,
|
||||
cpmCompletion: 0.00551,
|
||||
@@ -77,6 +80,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
created: roundTime('2023-03-14'),
|
||||
description: 'Precise and fast',
|
||||
contextWindow: 100000,
|
||||
maxCompletionTokens: 2048,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
@@ -86,16 +90,8 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
|
||||
created: roundTime('2023-03-14'),
|
||||
description: 'Claude 1.3 is the latest version of Claude v1',
|
||||
contextWindow: 100000,
|
||||
maxCompletionTokens: 4096,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
id: 'claude-1.0',
|
||||
label: 'Claude 1',
|
||||
created: roundTime('2023-03-14'),
|
||||
description: 'Claude 1.0 is the first version of Claude',
|
||||
contextWindow: 9000,
|
||||
interfaces: [LLM_IF_OAI_Chat],
|
||||
hidden: true,
|
||||
},
|
||||
}
|
||||
];
|
||||
@@ -8,30 +8,36 @@ import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
|
||||
import { fixupHost } from '~/common/util/urlUtils';
|
||||
|
||||
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
|
||||
import { llmsListModelsOutputSchema, llmsChatGenerateOutputSchema } from '../llm.server.types';
|
||||
import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
|
||||
|
||||
import { AnthropicWire } from './anthropic.wiretypes';
|
||||
import { AnthropicWireMessagesRequest, anthropicWireMessagesRequestSchema, AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic.wiretypes';
|
||||
import { hardcodedAnthropicModels } from './anthropic.models';
|
||||
|
||||
|
||||
// Default hosts
|
||||
const DEFAULT_API_VERSION_HEADERS = {
|
||||
'anthropic-version': '2023-06-01',
|
||||
'anthropic-beta': 'messages-2023-12-15',
|
||||
};
|
||||
const DEFAULT_MAX_TOKENS = 2048;
|
||||
const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
|
||||
const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com';
|
||||
|
||||
|
||||
// Mappers
|
||||
|
||||
export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string): { headers: HeadersInit, url: string } {
|
||||
// API version
|
||||
const apiVersion = '2023-06-01';
|
||||
async function anthropicPOST<TOut extends object, TPostBody extends object>(access: AnthropicAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
const { headers, url } = anthropicAccess(access, apiPath);
|
||||
return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, 'Anthropic');
|
||||
}
|
||||
|
||||
export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string): { headers: HeadersInit, url: string } {
|
||||
// API key
|
||||
const anthropicKey = access.anthropicKey || env.ANTHROPIC_API_KEY || '';
|
||||
|
||||
// break for the missing key only on the default host
|
||||
if (!anthropicKey)
|
||||
if (!access.anthropicHost && !env.ANTHROPIC_API_HOST)
|
||||
throw new Error('Missing Anthropic API Key. Add it on the UI (Models Setup) or server side (your deployment).');
|
||||
if (!anthropicKey && !(access.anthropicHost || env.ANTHROPIC_API_HOST))
|
||||
throw new Error('Missing Anthropic API Key. Add it on the UI (Models Setup) or server side (your deployment).');
|
||||
|
||||
// API host
|
||||
let anthropicHost = fixupHost(access.anthropicHost || env.ANTHROPIC_API_HOST || DEFAULT_ANTHROPIC_HOST, apiPath);
|
||||
@@ -49,7 +55,7 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string):
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
'anthropic-version': apiVersion,
|
||||
...DEFAULT_API_VERSION_HEADERS,
|
||||
'X-API-Key': anthropicKey,
|
||||
...(heliKey && { 'Helicone-Auth': `Bearer ${heliKey}` }),
|
||||
},
|
||||
@@ -57,23 +63,68 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string):
|
||||
};
|
||||
}
|
||||
|
||||
export function anthropicChatCompletionPayload(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): AnthropicWire.Complete.Request {
|
||||
// encode the prompt for Claude models
|
||||
const prompt = history.map(({ role, content }) => {
|
||||
return role === 'assistant' ? `\n\nAssistant: ${content}` : `\n\nHuman: ${content}`;
|
||||
}).join('') + '\n\nAssistant:';
|
||||
return {
|
||||
prompt,
|
||||
model: model.id,
|
||||
stream,
|
||||
...(model.temperature && { temperature: model.temperature }),
|
||||
...(model.maxTokens && { max_tokens_to_sample: model.maxTokens })
|
||||
};
|
||||
}
|
||||
export function anthropicMessagesPayloadOrThrow(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): AnthropicWireMessagesRequest {
|
||||
|
||||
async function anthropicPOST<TOut extends object, TPostBody extends object>(access: AnthropicAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
const { headers, url } = anthropicAccess(access, apiPath);
|
||||
return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, 'Anthropic');
|
||||
// Take the System prompt, if it's the first message
|
||||
// But if it's the only message, treat it as a user message
|
||||
history = [...history];
|
||||
let systemPrompt: string | undefined = undefined;
|
||||
if (history[0]?.role === 'system' && history.length > 1)
|
||||
systemPrompt = history.shift()?.content;
|
||||
|
||||
// Transform the OpenAIHistorySchema into the target messages format, ensuring that roles alternate between 'user' and 'assistant's
|
||||
const messages = history.reduce(
|
||||
(acc, historyItem, index) => {
|
||||
|
||||
const lastMessage: AnthropicWireMessagesRequest['messages'][number] | undefined = acc[acc.length - 1];
|
||||
const anthropicRole = historyItem.role === 'assistant' ? 'assistant' : 'user';
|
||||
|
||||
if (index === 0 || anthropicRole !== lastMessage?.role) {
|
||||
// Add a new message object if the role is different from the previous message
|
||||
acc.push({
|
||||
role: anthropicRole,
|
||||
content: [
|
||||
{ type: 'text', text: historyItem.content },
|
||||
],
|
||||
});
|
||||
} else {
|
||||
// Merge consecutive messages with the same role
|
||||
(lastMessage.content as AnthropicWireMessagesRequest['messages'][number]['content']).push(
|
||||
{ type: 'text', text: historyItem.content },
|
||||
);
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
[] as AnthropicWireMessagesRequest['messages'],
|
||||
);
|
||||
|
||||
// NOTE: if the last message is 'assistant', then the API will perform a continuation - shall we add a user message? TBD
|
||||
|
||||
// NOTE: the following code has been disabled because Anthropic will reject empty text blocks
|
||||
// If the messages array is empty, add a default user message
|
||||
// if (messages.length === 0)
|
||||
// messages.push({ role: 'user', content: [{ type: 'text', text: '' }] });
|
||||
|
||||
// Construct the request payload
|
||||
const payload: AnthropicWireMessagesRequest = {
|
||||
model: model.id,
|
||||
...(systemPrompt !== undefined && { system: systemPrompt }),
|
||||
messages: messages,
|
||||
max_tokens: model.maxTokens || DEFAULT_MAX_TOKENS,
|
||||
stream: stream,
|
||||
...(model.temperature !== undefined && { temperature: model.temperature }),
|
||||
// metadata: not useful to us
|
||||
// stop_sequences: not useful to us
|
||||
// top_p: not useful to us
|
||||
// top_k: not useful to us
|
||||
};
|
||||
|
||||
// Validate the payload against the schema to ensure correctness
|
||||
const validated = anthropicWireMessagesRequestSchema.safeParse(payload);
|
||||
if (!validated.success)
|
||||
throw new Error(`Invalid message sequence for Anthropic models: ${validated.error.errors?.[0]?.message || validated.error}`);
|
||||
|
||||
return validated.data;
|
||||
}
|
||||
|
||||
|
||||
@@ -101,45 +152,36 @@ const chatGenerateInputSchema = z.object({
|
||||
|
||||
export const llmAnthropicRouter = createTRPCRouter({
|
||||
|
||||
/* Anthropic: list models
|
||||
*
|
||||
* See https://github.com/anthropics/anthropic-sdk-typescript/commit/7c53ded6b7f5f3efec0df295181f18469c37e09d?diff=unified for
|
||||
* some details on the models, as the API docs are scarce: https://docs.anthropic.com/claude/reference/selecting-a-model
|
||||
*/
|
||||
/* [Anthropic] list models - https://docs.anthropic.com/claude/docs/models-overview */
|
||||
listModels: publicProcedure
|
||||
.input(listModelsInputSchema)
|
||||
.output(llmsListModelsOutputSchema)
|
||||
.query(() => ({ models: hardcodedAnthropicModels })),
|
||||
|
||||
/* Anthropic: Chat generation */
|
||||
chatGenerate: publicProcedure
|
||||
/* [Anthropic] Message generation (non-streaming) */
|
||||
chatGenerateMessage: publicProcedure
|
||||
.input(chatGenerateInputSchema)
|
||||
.output(llmsChatGenerateOutputSchema)
|
||||
.mutation(async ({ input }) => {
|
||||
.mutation(async ({ input: { access, model, history } }) => {
|
||||
|
||||
const { access, model, history } = input;
|
||||
// NOTES: doesn't support functions yet, supports multi-modal inputs (but they're not in our history, yet)
|
||||
|
||||
// ensure history has at least one message, and not from the assistant
|
||||
if (history.length === 0 || history[0].role === 'assistant')
|
||||
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] Need one human character at least` });
|
||||
// throw if the message sequence is not okay
|
||||
const payload = anthropicMessagesPayloadOrThrow(model, history, false);
|
||||
const response = await anthropicPOST<AnthropicWireMessagesResponse, AnthropicWireMessagesRequest>(access, payload, '/v1/messages');
|
||||
const completion = anthropicWireMessagesResponseSchema.parse(response);
|
||||
|
||||
const wireCompletions = await anthropicPOST<AnthropicWire.Complete.Response, AnthropicWire.Complete.Request>(
|
||||
access,
|
||||
anthropicChatCompletionPayload(model, history, false),
|
||||
'/v1/complete',
|
||||
);
|
||||
// validate output
|
||||
if (!completion || completion.type !== 'message' || completion.role !== 'assistant' || completion.stop_reason === undefined)
|
||||
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] Invalid Message` });
|
||||
if (completion.content.length !== 1 || completion.content[0].type !== 'text')
|
||||
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No Single Text Message (${completion.content.length})` });
|
||||
|
||||
// expect a single output
|
||||
if (wireCompletions.completion === undefined)
|
||||
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No completions` });
|
||||
if (wireCompletions.stop_reason === undefined)
|
||||
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No stop_reason` });
|
||||
|
||||
// check for a function output
|
||||
// got the completion (non-streaming)
|
||||
return {
|
||||
role: 'assistant',
|
||||
finish_reason: wireCompletions.stop_reason === 'stop_sequence' ? 'stop' : 'length',
|
||||
content: wireCompletions.completion || '',
|
||||
role: completion.role,
|
||||
content: completion.content[0].text,
|
||||
finish_reason: completion.stop_reason === 'max_tokens' ? 'length' : 'stop',
|
||||
};
|
||||
}),
|
||||
|
||||
|
||||
@@ -1,29 +1,151 @@
|
||||
export namespace AnthropicWire {
|
||||
export namespace Complete {
|
||||
export interface Request {
|
||||
prompt: string;
|
||||
model: string;
|
||||
max_tokens_to_sample?: number;
|
||||
stop_sequences?: string[];
|
||||
stream?: boolean;
|
||||
temperature?: number;
|
||||
top_k?: number;
|
||||
top_p?: number;
|
||||
metadata?: {
|
||||
user_id?: string;
|
||||
};
|
||||
}
|
||||
import { z } from 'zod';
|
||||
|
||||
export interface Response {
|
||||
completion: string;
|
||||
stop_reason: 'stop_sequence' | 'max_tokens' | string;
|
||||
model: string;
|
||||
stop: string | null; // the stop sequence, if stop_reason is 'stop_sequence'
|
||||
log_id: string; // some log
|
||||
|
||||
// removed since the 2023-06-01 API version
|
||||
// truncated: boolean;
|
||||
// exception: string | null;
|
||||
}
|
||||
}
|
||||
}
|
||||
// text, e.g.: { 'type': 'text', 'text': 'Hello, Claude' }
|
||||
const anthropicWireTextBlockSchema = z.object({
|
||||
type: z.literal('text'),
|
||||
text: z.string(),
|
||||
});
|
||||
|
||||
// image, e.g.: { 'type': 'image', 'source': { 'type': 'base64', 'media_type': 'image/jpeg', 'data': '/9j/4AAQSkZJRg...' } }
|
||||
const anthropicWireImageBlockSchema = z.object({
|
||||
type: z.literal('image'),
|
||||
source: z.object({
|
||||
type: z.enum(['base64']),
|
||||
media_type: z.enum(['image/jpeg', 'image/png', 'image/gif', 'image/webp']),
|
||||
data: z.string(),
|
||||
}),
|
||||
});
|
||||
|
||||
const anthropicWireMessagesSchema = z.array(
|
||||
z.object({
|
||||
role: z.enum(['user', 'assistant']),
|
||||
// NOTE: could be a string or an array of text/image blocks, but for a better implementation
|
||||
// we will assume it's always an array
|
||||
// content: z.union([
|
||||
// z.array(z.union([anthropicWireTextBlockSchema, anthropicWireImageBlockSchema])),
|
||||
// z.string(),
|
||||
// ]),
|
||||
content: z.array(
|
||||
z.union([
|
||||
anthropicWireTextBlockSchema,
|
||||
anthropicWireImageBlockSchema,
|
||||
]),
|
||||
),
|
||||
}),
|
||||
);
|
||||
|
||||
export const anthropicWireMessagesRequestSchema = z.object({
|
||||
model: z.string(),
|
||||
|
||||
/**
|
||||
* If you want to include a system prompt, you can use the top-level system parameter — there is no "system" role for input messages in the Messages API.
|
||||
*/
|
||||
system: z.string().optional(),
|
||||
|
||||
/**
|
||||
* (required) Input messages. - operates on alternating user and assistant conversational turns - the first message must always use the user role
|
||||
* If the final message uses the assistant role, the response content will continue immediately from the content in that message.
|
||||
* This can be used to constrain part of the model's response.
|
||||
*/
|
||||
messages: anthropicWireMessagesSchema.refine(
|
||||
(messages) => {
|
||||
|
||||
// Ensure the first message uses the user role
|
||||
if (messages.length === 0 || messages[0].role !== 'user')
|
||||
return false;
|
||||
|
||||
// Ensure messages alternate between user and assistant roles
|
||||
for (let i = 1; i < messages.length; i++)
|
||||
if (messages[i].role === messages[i - 1].role)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
},
|
||||
{ message: `messages must alternate between User and Assistant roles, starting with the User role` },
|
||||
),
|
||||
|
||||
/**
|
||||
* (required) The maximum number of tokens to generate before stopping.
|
||||
*/
|
||||
max_tokens: z.number(),
|
||||
|
||||
|
||||
/**
|
||||
* (optional) Metadata to include with the request.
|
||||
* user_id: This should be a uuid, hash value, or other opaque identifier.
|
||||
*/
|
||||
metadata: z.object({
|
||||
user_id: z.string().optional(),
|
||||
}).optional(),
|
||||
|
||||
/**
|
||||
* Custom text sequences that will cause the model to stop generating.
|
||||
*/
|
||||
stop_sequences: z.array(z.string()).optional(),
|
||||
|
||||
/**
|
||||
* Whether to incrementally stream the response using server-sent events. Default: false
|
||||
*/
|
||||
stream: z.boolean().optional(),
|
||||
|
||||
/**
|
||||
* Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks.
|
||||
*/
|
||||
temperature: z.number().optional(),
|
||||
|
||||
/**
|
||||
* Use nucleus sampling.
|
||||
* Recommended for advanced use cases only. You usually only need to use temperature.
|
||||
*/
|
||||
top_p: z.number().optional(),
|
||||
|
||||
/**
|
||||
* Only sample from the top K options for each subsequent token.
|
||||
* Recommended for advanced use cases only. You usually only need to use temperature.
|
||||
*/
|
||||
top_k: z.number().optional(),
|
||||
});
|
||||
export type AnthropicWireMessagesRequest = z.infer<typeof anthropicWireMessagesRequestSchema>;
|
||||
|
||||
|
||||
export const anthropicWireMessagesResponseSchema = z.object({
|
||||
// Unique object identifier.
|
||||
id: z.string(),
|
||||
|
||||
// For Messages, this is always "message".
|
||||
type: z.literal('message'),
|
||||
// Conversational role of the generated message. This will always be "assistant".
|
||||
role: z.literal('assistant'),
|
||||
/**
|
||||
* Content generated by the model.
|
||||
* This is an array of content blocks, each of which has a type that determines its shape. Currently, the only type in responses is "text".
|
||||
*/
|
||||
content: z.array(anthropicWireTextBlockSchema),
|
||||
|
||||
// The model that handled the request.
|
||||
model: z.string(),
|
||||
|
||||
/**
|
||||
* This may be one the following values:
|
||||
*
|
||||
* "end_turn": the model reached a natural stopping point
|
||||
* "max_tokens": we exceeded the requested max_tokens or the model's maximum
|
||||
* "stop_sequence": one of your provided custom stop_sequences was generated
|
||||
* Note that these values are different than those in /v1/complete, where end_turn and stop_sequence were not differentiated.
|
||||
*
|
||||
* In non-streaming mode this value is always non-null. In streaming mode, it is null in the message_start event and non-null otherwise.
|
||||
*/
|
||||
stop_reason: z.enum(['end_turn', 'max_tokens', 'stop_sequence']).nullable(),
|
||||
|
||||
// Which custom stop sequence was generated, if any.
|
||||
stop_sequence: z.string().nullable(),
|
||||
|
||||
// Billing and rate-limit usage.
|
||||
usage: z.object({
|
||||
input_tokens: z.number(),
|
||||
output_tokens: z.number(),
|
||||
}),
|
||||
|
||||
});
|
||||
export type AnthropicWireMessagesResponse = z.infer<typeof anthropicWireMessagesResponseSchema>;
|
||||
|
||||
@@ -2,12 +2,12 @@ import { z } from 'zod';
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { createParser as createEventsourceParser, EventSourceParseCallback, EventSourceParser, ParsedEvent, ReconnectInterval } from 'eventsource-parser';
|
||||
|
||||
import { createEmptyReadableStream, debugGenerateCurlCommand, nonTrpcServerFetchOrThrow, safeErrorString, SERVER_DEBUG_WIRE } from '~/server/wire';
|
||||
import { createEmptyReadableStream, debugGenerateCurlCommand, nonTrpcServerFetchOrThrow, safeErrorString, SERVER_DEBUG_WIRE, serverCapitalizeFirstLetter } from '~/server/wire';
|
||||
|
||||
|
||||
// Anthropic server imports
|
||||
import type { AnthropicWire } from './anthropic/anthropic.wiretypes';
|
||||
import { anthropicAccess, anthropicAccessSchema, anthropicChatCompletionPayload } from './anthropic/anthropic.router';
|
||||
import { AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic/anthropic.wiretypes';
|
||||
import { anthropicAccess, anthropicAccessSchema, anthropicMessagesPayloadOrThrow } from './anthropic/anthropic.router';
|
||||
|
||||
// Gemini server imports
|
||||
import { geminiAccess, geminiAccessSchema, geminiGenerateContentTextPayload } from './gemini/gemini.router';
|
||||
@@ -38,7 +38,7 @@ type MuxingFormat = 'sse' | 'json-nl';
|
||||
* The peculiarity of our parser is the injection of a JSON structure at the beginning of the stream, to
|
||||
* communicate parameters before the text starts flowing to the client.
|
||||
*/
|
||||
type AIStreamParser = (data: string) => { text: string, close: boolean };
|
||||
type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };
|
||||
|
||||
|
||||
const chatStreamingInputSchema = z.object({
|
||||
@@ -74,9 +74,9 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
|
||||
let body: object;
|
||||
switch (access.dialect) {
|
||||
case 'anthropic':
|
||||
requestAccess = anthropicAccess(access, '/v1/complete');
|
||||
body = anthropicChatCompletionPayload(model, history, true);
|
||||
vendorStreamParser = createStreamParserAnthropic();
|
||||
requestAccess = anthropicAccess(access, '/v1/messages');
|
||||
body = anthropicMessagesPayloadOrThrow(model, history, true);
|
||||
vendorStreamParser = createStreamParserAnthropicMessages();
|
||||
break;
|
||||
|
||||
case 'gemini':
|
||||
@@ -121,7 +121,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
|
||||
console.error(`/api/llms/stream: fetch issue:`, access.dialect, fetchOrVendorError, requestAccess?.url);
|
||||
|
||||
// client-side users visible message
|
||||
return new NextResponse(`[Issue] ${access.dialect}: ${fetchOrVendorError}`
|
||||
return new NextResponse(`[Issue] ${serverCapitalizeFirstLetter(access.dialect)}: ${fetchOrVendorError}`
|
||||
+ (process.env.NODE_ENV === 'development' ? ` · [URL: ${requestAccess?.url}]` : ''), { status: 500 });
|
||||
}
|
||||
|
||||
@@ -217,7 +217,7 @@ function createEventStreamTransformer(muxingFormat: MuxingFormat, vendorTextPars
|
||||
}
|
||||
|
||||
try {
|
||||
const { text, close } = vendorTextParser(event.data);
|
||||
const { text, close } = vendorTextParser(event.data, event.event);
|
||||
if (text)
|
||||
controller.enqueue(textEncoder.encode(text));
|
||||
if (close)
|
||||
@@ -246,19 +246,94 @@ function createEventStreamTransformer(muxingFormat: MuxingFormat, vendorTextPars
|
||||
|
||||
/// Stream Parsers
|
||||
|
||||
function createStreamParserAnthropic(): AIStreamParser {
|
||||
let hasBegun = false;
|
||||
function createStreamParserAnthropicMessages(): AIStreamParser {
|
||||
let responseMessage: AnthropicWireMessagesResponse | null = null;
|
||||
let hasErrored = false;
|
||||
|
||||
return (data: string) => {
|
||||
// Note: at this stage, the parser only returns the text content as text, which is streamed as text
|
||||
// to the client. It is however building in parallel the responseMessage object, which is not
|
||||
// yet used, but contains token counts, for instance.
|
||||
return (data: string, eventName?: string) => {
|
||||
let text = '';
|
||||
|
||||
const json: AnthropicWire.Complete.Response = JSON.parse(data);
|
||||
let text = json.completion;
|
||||
// if we've errored, we should not be receiving more data
|
||||
if (hasErrored)
|
||||
console.log('Anthropic stream has errored already, but received more data:', data);
|
||||
|
||||
// hack: prepend the model name to the first packet
|
||||
if (!hasBegun) {
|
||||
hasBegun = true;
|
||||
const firstPacket: ChatStreamingFirstOutputPacketSchema = { model: json.model };
|
||||
text = JSON.stringify(firstPacket) + text;
|
||||
switch (eventName) {
|
||||
// Ignore pings
|
||||
case 'ping':
|
||||
break;
|
||||
|
||||
// Initialize the message content for a new message
|
||||
case 'message_start':
|
||||
const firstMessage = !responseMessage;
|
||||
const { message } = JSON.parse(data);
|
||||
responseMessage = anthropicWireMessagesResponseSchema.parse(message);
|
||||
// hack: prepend the model name to the first packet
|
||||
if (firstMessage) {
|
||||
const firstPacket: ChatStreamingFirstOutputPacketSchema = { model: responseMessage.model };
|
||||
text = JSON.stringify(firstPacket);
|
||||
}
|
||||
break;
|
||||
|
||||
// Initialize content block if needed
|
||||
case 'content_block_start':
|
||||
if (responseMessage) {
|
||||
const { index, content_block } = JSON.parse(data);
|
||||
if (responseMessage.content[index] === undefined)
|
||||
responseMessage.content[index] = content_block;
|
||||
text = responseMessage.content[index].text;
|
||||
} else
|
||||
throw new Error('Unexpected content block start');
|
||||
break;
|
||||
|
||||
// Append delta text to the current message content
|
||||
case 'content_block_delta':
|
||||
if (responseMessage) {
|
||||
const { index, delta } = JSON.parse(data);
|
||||
if (delta.type !== 'text_delta')
|
||||
throw new Error(`Unexpected content block non-text delta (${delta.type})`);
|
||||
if (responseMessage.content[index] === undefined)
|
||||
throw new Error(`Unexpected content block delta location (${index})`);
|
||||
responseMessage.content[index].text += delta.text;
|
||||
text = delta.text;
|
||||
} else
|
||||
throw new Error('Unexpected content block delta');
|
||||
break;
|
||||
|
||||
// Finalize content block if needed.
|
||||
case 'content_block_stop':
|
||||
if (responseMessage) {
|
||||
const { index } = JSON.parse(data);
|
||||
if (responseMessage.content[index] === undefined)
|
||||
throw new Error(`Unexpected content block end location (${index})`);
|
||||
} else
|
||||
throw new Error('Unexpected content block stop');
|
||||
break;
|
||||
|
||||
// Optionally handle top-level message changes. Example: updating stop_reason
|
||||
case 'message_delta':
|
||||
if (responseMessage) {
|
||||
const { delta } = JSON.parse(data);
|
||||
Object.assign(responseMessage, delta);
|
||||
} else
|
||||
throw new Error('Unexpected message delta');
|
||||
break;
|
||||
|
||||
// We can now close the message
|
||||
case 'message_stop':
|
||||
return { text: '', close: true };
|
||||
|
||||
// Occasionaly, the server will send errors, such as {"type": "error", "error": {"type": "overloaded_error", "message": "Overloaded"}}
|
||||
case 'error':
|
||||
hasErrored = true;
|
||||
const { error } = JSON.parse(data);
|
||||
const errorText = (error.type && error.message) ? `${error.type}: ${error.message}` : safeErrorString(error);
|
||||
return { text: `[Anthropic Server Error] ${errorText}`, close: true };
|
||||
|
||||
default:
|
||||
throw new Error(`Unexpected event name: ${eventName}`);
|
||||
}
|
||||
|
||||
return { text, close: false };
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import * as React from 'react';
|
||||
|
||||
import { Alert } from '@mui/joy';
|
||||
import WarningRoundedIcon from '@mui/icons-material/WarningRounded';
|
||||
|
||||
import { FormInputKey } from '~/common/components/forms/FormInputKey';
|
||||
import { FormTextField } from '~/common/components/forms/FormTextField';
|
||||
@@ -40,11 +39,9 @@ export function AnthropicSourceSetup(props: { sourceId: DModelSourceId }) {
|
||||
|
||||
return <>
|
||||
|
||||
<Alert variant='soft' color='warning' startDecorator={<WarningRoundedIcon color='warning' />}>
|
||||
<Alert variant='soft' color='success'>
|
||||
<div>
|
||||
Note: <strong>Claude-3</strong> API support is being added as the Anthropic API has changed. Please refer to <Link
|
||||
level='body-sm' href='https://github.com/enricoros/big-AGI/issues/443' target='_blank'>issue #443</Link> for
|
||||
updates.
|
||||
Note: <strong>Claude-3</strong> models are now supported.
|
||||
</div>
|
||||
</Alert>
|
||||
|
||||
@@ -86,4 +83,5 @@ export function AnthropicSourceSetup(props: { sourceId: DModelSourceId }) {
|
||||
{isError && <InlineError error={error} />}
|
||||
|
||||
</>;
|
||||
;
|
||||
}
|
||||
+1
-1
@@ -62,7 +62,7 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
|
||||
|
||||
const { llmRef, llmTemperature, llmResponseTokens } = llmOptions;
|
||||
try {
|
||||
return await apiAsync.llmAnthropic.chatGenerate.mutate({
|
||||
return await apiAsync.llmAnthropic.chatGenerateMessage.mutate({
|
||||
access,
|
||||
model: {
|
||||
id: llmRef,
|
||||
|
||||
@@ -48,6 +48,10 @@ export function safeErrorString(error: any): string | null {
|
||||
return error.toString();
|
||||
}
|
||||
|
||||
export function serverCapitalizeFirstLetter(string: string) {
|
||||
return string?.length ? (string.charAt(0).toUpperCase() + string.slice(1)) : string;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Weak (meaning the string could be encoded poorly) function that returns a string that can be used to debug a request
|
||||
|
||||
Reference in New Issue
Block a user