Compare commits

...

10 Commits

Author SHA1 Message Date
Enrico Ros 2f59e12e20 Remove log 2024-03-06 22:20:40 -08:00
Enrico Ros 30e8652c2a 1.14.1: Release for Claude-3 2024-03-06 22:10:41 -08:00
Enrico Ros 5ee6aceb60 cleanups 2024-03-06 21:51:15 -08:00
Enrico Ros 6940b6a6d1 Anthropic: Full support for Claude-3 models. Closes #443, #450
Thanks to @slapglif in #450 for a reference implementation.
2024-03-06 21:50:24 -08:00
Enrico Ros 4e33ce9415 misc 2024-03-06 20:56:32 -08:00
Enrico Ros 944e22bde6 Anthropic: if there's a single system message, treat it as-if it was a user message 2024-03-06 20:49:59 -08:00
Enrico Ros 6054fa0a26 Anthropic: use the new Messages format (thanks @slapglif #450) 2024-03-06 20:42:33 -08:00
Enrico Ros 4db13cfed4 Anthropic: wire types (fully switch to the new Messages API) 2024-03-06 20:33:59 -08:00
Enrico Ros 6a6adda2e0 misc 2024-03-06 20:33:12 -08:00
Enrico Ros 4afa55c0db Anthropic: update models 2024-03-06 18:36:07 -08:00
16 changed files with 386 additions and 139 deletions
+3 -2
View File
@@ -17,15 +17,16 @@ Or fork & run on Vercel
big-AGI is an open book; see the **[ready-to-ship and future ideas](https://github.com/users/enricoros/projects/4/views/2)** in our open roadmap
### What's New in 1.14.0 · March 6, 2024 · Modelmorphic
### What's New in 1.14.1 · March 7, 2024 · Modelmorphic
- **Anthropic** [Claude-3](https://www.anthropic.com/news/claude-3-family) model family support. [#443](https://github.com/enricoros/big-AGI/issues/443)
- New **[Perplexity](https://www.perplexity.ai/)** and **[Groq](https://groq.com/)** integration (thanks @Penagwin). [#407](https://github.com/enricoros/big-AGI/issues/407), [#427](https://github.com/enricoros/big-AGI/issues/427)
- **[LocalAI](https://localai.io/models/)** deep integration, including support for [model galleries](https://github.com/enricoros/big-AGI/issues/411)
- **Mistral** Large and Google **Gemini 1.5** support
- Performance optimizations: runs [much faster](https://twitter.com/enricoros/status/1756553038293303434?utm_source=localhost:3000&utm_medium=big-agi), saves lots of power, reduces memory usage
- Enhanced UX with auto-sizing charts, refined search and folder functionalities, perfected scaling
- And with more UI improvements, documentation, bug fixes (20 tickets), and developer enhancements
- [Release notes](https://github.com/enricoros/big-AGI/releases/tag/v1.14.0), and changes [v1.13.1...v1.14.0](https://github.com/enricoros/big-AGI/compare/v1.13.1...v1.14.0) (233 commits, 8,000+ lines changed)
- [Release notes](https://github.com/enricoros/big-AGI/releases/tag/v1.14.0), and changes [v1.13.1...v1.14.1](https://github.com/enricoros/big-AGI/compare/v1.13.1...v1.14.1) (233 commits, 8,000+ lines changed)
### What's New in 1.13.0 · Feb 8, 2024 · Multi + Mind
+2 -1
View File
@@ -12,8 +12,9 @@ Prediction: OpenAI will release GPT-5 on March 14, 2024. We will support it on d
- milestone: [1.15.0](https://github.com/enricoros/big-agi/milestone/15)
- work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
### What's New in 1.14.0 · March 6, 2024 · Modelmorphic
### What's New in 1.14.1 · March 7, 2024 · Modelmorphic
- **Anthropic** [Claude-3](https://www.anthropic.com/news/claude-3-family) model family support. [#443](https://github.com/enricoros/big-AGI/issues/443)
- New **[Perplexity](https://www.perplexity.ai/)** and **[Groq](https://groq.com/)** integration (thanks @Penagwin). [#407](https://github.com/enricoros/big-AGI/issues/407), [#427](https://github.com/enricoros/big-AGI/issues/427)
- **[LocalAI](https://localai.io/models/)** deep integration, including support for [model galleries](https://github.com/enricoros/big-AGI/issues/411)
- **Mistral** Large and Google **Gemini 1.5** support
+2 -2
View File
@@ -1,12 +1,12 @@
{
"name": "big-agi",
"version": "1.14.0",
"version": "1.14.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "big-agi",
"version": "1.14.0",
"version": "1.14.1",
"hasInstallScript": true,
"dependencies": {
"@emotion/cache": "^11.11.0",
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "big-agi",
"version": "1.14.0",
"version": "1.14.1",
"private": true,
"author": "Enrico Ros <enrico.ros@gmail.com>",
"repository": "https://github.com/enricoros/big-agi",
@@ -64,6 +64,7 @@ const avatarIconSx = { width: 36, height: 36 };
export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['role'] | string, messageOriginLLM: string | undefined, messagePurposeId: SystemPurposeId | undefined, messageSender: string, messageTyping: boolean, size: 'sm' | undefined = undefined): React.JSX.Element {
if (typeof messageAvatar === 'string' && messageAvatar)
return <Avatar alt={messageSender} src={messageAvatar} />;
const mascotSx = size === 'sm' ? avatarIconSx : { width: 64, height: 64 };
switch (messageRole) {
case 'system':
@@ -76,17 +77,18 @@ export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['
// typing gif (people seem to love this, so keeping it after april fools')
const isTextToImage = messageOriginLLM === 'DALL·E' || messageOriginLLM === 'Prodia';
const isReact = messageOriginLLM?.startsWith('react-');
if (messageTyping) {
// animation: message typing
if (messageTyping)
return <Avatar
alt={messageSender} variant='plain'
src={isTextToImage ? 'https://i.giphy.com/media/5t9ujj9cMisyVjUZ0m/giphy.webp'
: isReact ? 'https://i.giphy.com/media/l44QzsOLXxcrigdgI/giphy.webp'
: 'https://i.giphy.com/media/jJxaUysjzO9ri/giphy.webp'}
src={isTextToImage ? 'https://i.giphy.com/media/5t9ujj9cMisyVjUZ0m/giphy.webp' // brush
: isReact ? 'https://i.giphy.com/media/l44QzsOLXxcrigdgI/giphy.webp' // mind
: 'https://i.giphy.com/media/jJxaUysjzO9ri/giphy.webp'} // typing
sx={{ ...mascotSx, borderRadius: 'sm' }}
/>;
}
// text-to-image: icon
// icon: text-to-image
if (isTextToImage)
return <FormatPaintIcon sx={{
...avatarIconSx,
@@ -95,15 +97,16 @@ export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['
// purpose symbol (if present)
const symbol = SystemPurposes[messagePurposeId!]?.symbol;
if (symbol) return <Box sx={{
fontSize: '24px',
textAlign: 'center',
width: '100%',
minWidth: `${avatarIconSx.width}px`,
lineHeight: `${avatarIconSx.height}px`,
}}>
{symbol}
</Box>;
if (symbol)
return <Box sx={{
fontSize: '24px',
textAlign: 'center',
width: '100%',
minWidth: `${avatarIconSx.width}px`,
lineHeight: `${avatarIconSx.height}px`,
}}>
{symbol}
</Box>;
// default assistant avatar
return <SmartToyOutlinedIcon sx={avatarIconSx} />; // https://mui.com/static/images/avatar/2.jpg
+4 -2
View File
@@ -7,6 +7,7 @@ import AutoStoriesOutlinedIcon from '@mui/icons-material/AutoStoriesOutlined';
import GoogleIcon from '@mui/icons-material/Google';
import LaunchIcon from '@mui/icons-material/Launch';
import { AnthropicIcon } from '~/common/components/icons/vendors/AnthropicIcon';
import { GroqIcon } from '~/common/components/icons/vendors/GroqIcon';
import { LocalAIIcon } from '~/common/components/icons/vendors/LocalAIIcon';
import { MistralIcon } from '~/common/components/icons/vendors/MistralIcon';
@@ -54,11 +55,12 @@ export const NewsItems: NewsItem[] = [
]
}*/
{
versionCode: '1.14.0',
versionCode: '1.14.1',
versionName: 'Modelmorphic',
versionCoverImage: coverV114,
versionDate: new Date('2024-03-06T08:00:00Z'),
versionDate: new Date('2024-03-07T08:00:00Z'),
items: [
{ text: <>Anthropic <B href='https://www.anthropic.com/news/claude-3-family'>Claude-3</B> support for smarter chats</>, issue: 443, icon: AnthropicIcon },
{ text: <><B issue={407}>Perplexity</B> support, including Online models</>, issue: 407, icon: PerplexityIcon },
{ text: <><B issue={427}>Groq</B> support, with speeds up to 500 tok/s</>, issue: 427, icon: GroqIcon },
{ text: <>Support for new Mistral-Large models</>, icon: MistralIcon },
+1 -1
View File
@@ -4,7 +4,7 @@
import { useAppStateStore } from '~/common/state/store-appstate';
export const incrementalNewsVersion: number = 14;
export const incrementalNewsVersion: number = 14.1;
export function shallRedirectToNews() {
+3
View File
@@ -15,6 +15,9 @@ export function prettyBaseModel(model: string | undefined): string {
if (model.includes('gpt-3.5-turbo-16k')) return '3.5 Turbo 16k';
if (model.includes('gpt-3.5-turbo')) return '3.5 Turbo';
if (model.endsWith('.bin')) return model.slice(0, -4);
// [Anthropic]
if (model.includes('claude-3-opus')) return 'Claude 3 Opus';
if (model.includes('claude-3-sonnet')) return 'Claude 3 Sonnet';
// [LM Studio]
if (model.startsWith('C:\\') || model.startsWith('D:\\'))
return getModelFromFile(model).replace('.gguf', '');
+1 -1
View File
@@ -1,5 +1,5 @@
export function capitalizeFirstLetter(string: string) {
return string.charAt(0).toUpperCase() + string.slice(1);
return string?.length ? (string.charAt(0).toUpperCase() + string.slice(1)) : string;
}
export function createBase36Uid(checkDuplicates: string[]): string {
@@ -40,6 +40,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
created: roundTime('2023-11-21'),
description: 'Superior performance on tasks that require complex reasoning, with reduced model hallucination rates',
contextWindow: 200000,
maxCompletionTokens: 4096,
pricing: {
cpmPrompt: 0.008,
cpmCompletion: 0.024,
@@ -52,6 +53,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
created: roundTime('2023-07-11'),
description: 'Superior performance on tasks that require complex reasoning',
contextWindow: 100000,
maxCompletionTokens: 4096,
pricing: {
cpmPrompt: 0.008,
cpmCompletion: 0.024,
@@ -65,6 +67,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
created: roundTime('2023-08-09'),
description: 'Low-latency, high throughput model',
contextWindow: 100000,
maxCompletionTokens: 4096,
pricing: {
cpmPrompt: 0.00163,
cpmCompletion: 0.00551,
@@ -77,6 +80,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
created: roundTime('2023-03-14'),
description: 'Precise and fast',
contextWindow: 100000,
maxCompletionTokens: 2048,
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
},
@@ -86,16 +90,8 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
created: roundTime('2023-03-14'),
description: 'Claude 1.3 is the latest version of Claude v1',
contextWindow: 100000,
maxCompletionTokens: 4096,
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
},
{
id: 'claude-1.0',
label: 'Claude 1',
created: roundTime('2023-03-14'),
description: 'Claude 1.0 is the first version of Claude',
contextWindow: 9000,
interfaces: [LLM_IF_OAI_Chat],
hidden: true,
},
}
];
@@ -8,30 +8,36 @@ import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
import { fixupHost } from '~/common/util/urlUtils';
import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
import { llmsListModelsOutputSchema, llmsChatGenerateOutputSchema } from '../llm.server.types';
import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
import { AnthropicWire } from './anthropic.wiretypes';
import { AnthropicWireMessagesRequest, anthropicWireMessagesRequestSchema, AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic.wiretypes';
import { hardcodedAnthropicModels } from './anthropic.models';
// Default hosts
const DEFAULT_API_VERSION_HEADERS = {
'anthropic-version': '2023-06-01',
'anthropic-beta': 'messages-2023-12-15',
};
const DEFAULT_MAX_TOKENS = 2048;
const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com';
// Mappers
export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string): { headers: HeadersInit, url: string } {
// API version
const apiVersion = '2023-06-01';
async function anthropicPOST<TOut extends object, TPostBody extends object>(access: AnthropicAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
const { headers, url } = anthropicAccess(access, apiPath);
return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, 'Anthropic');
}
export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string): { headers: HeadersInit, url: string } {
// API key
const anthropicKey = access.anthropicKey || env.ANTHROPIC_API_KEY || '';
// break for the missing key only on the default host
if (!anthropicKey)
if (!access.anthropicHost && !env.ANTHROPIC_API_HOST)
throw new Error('Missing Anthropic API Key. Add it on the UI (Models Setup) or server side (your deployment).');
if (!anthropicKey && !(access.anthropicHost || env.ANTHROPIC_API_HOST))
throw new Error('Missing Anthropic API Key. Add it on the UI (Models Setup) or server side (your deployment).');
// API host
let anthropicHost = fixupHost(access.anthropicHost || env.ANTHROPIC_API_HOST || DEFAULT_ANTHROPIC_HOST, apiPath);
@@ -49,7 +55,7 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string):
headers: {
'Accept': 'application/json',
'Content-Type': 'application/json',
'anthropic-version': apiVersion,
...DEFAULT_API_VERSION_HEADERS,
'X-API-Key': anthropicKey,
...(heliKey && { 'Helicone-Auth': `Bearer ${heliKey}` }),
},
@@ -57,23 +63,68 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string):
};
}
export function anthropicChatCompletionPayload(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): AnthropicWire.Complete.Request {
// encode the prompt for Claude models
const prompt = history.map(({ role, content }) => {
return role === 'assistant' ? `\n\nAssistant: ${content}` : `\n\nHuman: ${content}`;
}).join('') + '\n\nAssistant:';
return {
prompt,
model: model.id,
stream,
...(model.temperature && { temperature: model.temperature }),
...(model.maxTokens && { max_tokens_to_sample: model.maxTokens })
};
}
export function anthropicMessagesPayloadOrThrow(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): AnthropicWireMessagesRequest {
async function anthropicPOST<TOut extends object, TPostBody extends object>(access: AnthropicAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
const { headers, url } = anthropicAccess(access, apiPath);
return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, 'Anthropic');
// Take the System prompt, if it's the first message
// But if it's the only message, treat it as a user message
history = [...history];
let systemPrompt: string | undefined = undefined;
if (history[0]?.role === 'system' && history.length > 1)
systemPrompt = history.shift()?.content;
// Transform the OpenAIHistorySchema into the target messages format, ensuring that roles alternate between 'user' and 'assistant's
const messages = history.reduce(
(acc, historyItem, index) => {
const lastMessage: AnthropicWireMessagesRequest['messages'][number] | undefined = acc[acc.length - 1];
const anthropicRole = historyItem.role === 'assistant' ? 'assistant' : 'user';
if (index === 0 || anthropicRole !== lastMessage?.role) {
// Add a new message object if the role is different from the previous message
acc.push({
role: anthropicRole,
content: [
{ type: 'text', text: historyItem.content },
],
});
} else {
// Merge consecutive messages with the same role
(lastMessage.content as AnthropicWireMessagesRequest['messages'][number]['content']).push(
{ type: 'text', text: historyItem.content },
);
}
return acc;
},
[] as AnthropicWireMessagesRequest['messages'],
);
// NOTE: if the last message is 'assistant', then the API will perform a continuation - shall we add a user message? TBD
// NOTE: the following code has been disabled because Anthropic will reject empty text blocks
// If the messages array is empty, add a default user message
// if (messages.length === 0)
// messages.push({ role: 'user', content: [{ type: 'text', text: '' }] });
// Construct the request payload
const payload: AnthropicWireMessagesRequest = {
model: model.id,
...(systemPrompt !== undefined && { system: systemPrompt }),
messages: messages,
max_tokens: model.maxTokens || DEFAULT_MAX_TOKENS,
stream: stream,
...(model.temperature !== undefined && { temperature: model.temperature }),
// metadata: not useful to us
// stop_sequences: not useful to us
// top_p: not useful to us
// top_k: not useful to us
};
// Validate the payload against the schema to ensure correctness
const validated = anthropicWireMessagesRequestSchema.safeParse(payload);
if (!validated.success)
throw new Error(`Invalid message sequence for Anthropic models: ${validated.error.errors?.[0]?.message || validated.error}`);
return validated.data;
}
@@ -101,45 +152,36 @@ const chatGenerateInputSchema = z.object({
export const llmAnthropicRouter = createTRPCRouter({
/* Anthropic: list models
*
* See https://github.com/anthropics/anthropic-sdk-typescript/commit/7c53ded6b7f5f3efec0df295181f18469c37e09d?diff=unified for
* some details on the models, as the API docs are scarce: https://docs.anthropic.com/claude/reference/selecting-a-model
*/
/* [Anthropic] list models - https://docs.anthropic.com/claude/docs/models-overview */
listModels: publicProcedure
.input(listModelsInputSchema)
.output(llmsListModelsOutputSchema)
.query(() => ({ models: hardcodedAnthropicModels })),
/* Anthropic: Chat generation */
chatGenerate: publicProcedure
/* [Anthropic] Message generation (non-streaming) */
chatGenerateMessage: publicProcedure
.input(chatGenerateInputSchema)
.output(llmsChatGenerateOutputSchema)
.mutation(async ({ input }) => {
.mutation(async ({ input: { access, model, history } }) => {
const { access, model, history } = input;
// NOTES: doesn't support functions yet, supports multi-modal inputs (but they're not in our history, yet)
// ensure history has at least one message, and not from the assistant
if (history.length === 0 || history[0].role === 'assistant')
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] Need one human character at least` });
// throw if the message sequence is not okay
const payload = anthropicMessagesPayloadOrThrow(model, history, false);
const response = await anthropicPOST<AnthropicWireMessagesResponse, AnthropicWireMessagesRequest>(access, payload, '/v1/messages');
const completion = anthropicWireMessagesResponseSchema.parse(response);
const wireCompletions = await anthropicPOST<AnthropicWire.Complete.Response, AnthropicWire.Complete.Request>(
access,
anthropicChatCompletionPayload(model, history, false),
'/v1/complete',
);
// validate output
if (!completion || completion.type !== 'message' || completion.role !== 'assistant' || completion.stop_reason === undefined)
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] Invalid Message` });
if (completion.content.length !== 1 || completion.content[0].type !== 'text')
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No Single Text Message (${completion.content.length})` });
// expect a single output
if (wireCompletions.completion === undefined)
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No completions` });
if (wireCompletions.stop_reason === undefined)
throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No stop_reason` });
// check for a function output
// got the completion (non-streaming)
return {
role: 'assistant',
finish_reason: wireCompletions.stop_reason === 'stop_sequence' ? 'stop' : 'length',
content: wireCompletions.completion || '',
role: completion.role,
content: completion.content[0].text,
finish_reason: completion.stop_reason === 'max_tokens' ? 'length' : 'stop',
};
}),
@@ -1,29 +1,151 @@
export namespace AnthropicWire {
export namespace Complete {
export interface Request {
prompt: string;
model: string;
max_tokens_to_sample?: number;
stop_sequences?: string[];
stream?: boolean;
temperature?: number;
top_k?: number;
top_p?: number;
metadata?: {
user_id?: string;
};
}
import { z } from 'zod';
export interface Response {
completion: string;
stop_reason: 'stop_sequence' | 'max_tokens' | string;
model: string;
stop: string | null; // the stop sequence, if stop_reason is 'stop_sequence'
log_id: string; // some log
// removed since the 2023-06-01 API version
// truncated: boolean;
// exception: string | null;
}
}
}
// text, e.g.: { 'type': 'text', 'text': 'Hello, Claude' }
const anthropicWireTextBlockSchema = z.object({
type: z.literal('text'),
text: z.string(),
});
// image, e.g.: { 'type': 'image', 'source': { 'type': 'base64', 'media_type': 'image/jpeg', 'data': '/9j/4AAQSkZJRg...' } }
const anthropicWireImageBlockSchema = z.object({
type: z.literal('image'),
source: z.object({
type: z.enum(['base64']),
media_type: z.enum(['image/jpeg', 'image/png', 'image/gif', 'image/webp']),
data: z.string(),
}),
});
const anthropicWireMessagesSchema = z.array(
z.object({
role: z.enum(['user', 'assistant']),
// NOTE: could be a string or an array of text/image blocks, but for a better implementation
// we will assume it's always an array
// content: z.union([
// z.array(z.union([anthropicWireTextBlockSchema, anthropicWireImageBlockSchema])),
// z.string(),
// ]),
content: z.array(
z.union([
anthropicWireTextBlockSchema,
anthropicWireImageBlockSchema,
]),
),
}),
);
export const anthropicWireMessagesRequestSchema = z.object({
model: z.string(),
/**
* If you want to include a system prompt, you can use the top-level system parameter — there is no "system" role for input messages in the Messages API.
*/
system: z.string().optional(),
/**
* (required) Input messages. - operates on alternating user and assistant conversational turns - the first message must always use the user role
* If the final message uses the assistant role, the response content will continue immediately from the content in that message.
* This can be used to constrain part of the model's response.
*/
messages: anthropicWireMessagesSchema.refine(
(messages) => {
// Ensure the first message uses the user role
if (messages.length === 0 || messages[0].role !== 'user')
return false;
// Ensure messages alternate between user and assistant roles
for (let i = 1; i < messages.length; i++)
if (messages[i].role === messages[i - 1].role)
return false;
return true;
},
{ message: `messages must alternate between User and Assistant roles, starting with the User role` },
),
/**
* (required) The maximum number of tokens to generate before stopping.
*/
max_tokens: z.number(),
/**
* (optional) Metadata to include with the request.
* user_id: This should be a uuid, hash value, or other opaque identifier.
*/
metadata: z.object({
user_id: z.string().optional(),
}).optional(),
/**
* Custom text sequences that will cause the model to stop generating.
*/
stop_sequences: z.array(z.string()).optional(),
/**
* Whether to incrementally stream the response using server-sent events. Default: false
*/
stream: z.boolean().optional(),
/**
* Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks.
*/
temperature: z.number().optional(),
/**
* Use nucleus sampling.
* Recommended for advanced use cases only. You usually only need to use temperature.
*/
top_p: z.number().optional(),
/**
* Only sample from the top K options for each subsequent token.
* Recommended for advanced use cases only. You usually only need to use temperature.
*/
top_k: z.number().optional(),
});
export type AnthropicWireMessagesRequest = z.infer<typeof anthropicWireMessagesRequestSchema>;
export const anthropicWireMessagesResponseSchema = z.object({
// Unique object identifier.
id: z.string(),
// For Messages, this is always "message".
type: z.literal('message'),
// Conversational role of the generated message. This will always be "assistant".
role: z.literal('assistant'),
/**
* Content generated by the model.
* This is an array of content blocks, each of which has a type that determines its shape. Currently, the only type in responses is "text".
*/
content: z.array(anthropicWireTextBlockSchema),
// The model that handled the request.
model: z.string(),
/**
* This may be one the following values:
*
* "end_turn": the model reached a natural stopping point
* "max_tokens": we exceeded the requested max_tokens or the model's maximum
* "stop_sequence": one of your provided custom stop_sequences was generated
* Note that these values are different than those in /v1/complete, where end_turn and stop_sequence were not differentiated.
*
* In non-streaming mode this value is always non-null. In streaming mode, it is null in the message_start event and non-null otherwise.
*/
stop_reason: z.enum(['end_turn', 'max_tokens', 'stop_sequence']).nullable(),
// Which custom stop sequence was generated, if any.
stop_sequence: z.string().nullable(),
// Billing and rate-limit usage.
usage: z.object({
input_tokens: z.number(),
output_tokens: z.number(),
}),
});
export type AnthropicWireMessagesResponse = z.infer<typeof anthropicWireMessagesResponseSchema>;
+94 -19
View File
@@ -2,12 +2,12 @@ import { z } from 'zod';
import { NextRequest, NextResponse } from 'next/server';
import { createParser as createEventsourceParser, EventSourceParseCallback, EventSourceParser, ParsedEvent, ReconnectInterval } from 'eventsource-parser';
import { createEmptyReadableStream, debugGenerateCurlCommand, nonTrpcServerFetchOrThrow, safeErrorString, SERVER_DEBUG_WIRE } from '~/server/wire';
import { createEmptyReadableStream, debugGenerateCurlCommand, nonTrpcServerFetchOrThrow, safeErrorString, SERVER_DEBUG_WIRE, serverCapitalizeFirstLetter } from '~/server/wire';
// Anthropic server imports
import type { AnthropicWire } from './anthropic/anthropic.wiretypes';
import { anthropicAccess, anthropicAccessSchema, anthropicChatCompletionPayload } from './anthropic/anthropic.router';
import { AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic/anthropic.wiretypes';
import { anthropicAccess, anthropicAccessSchema, anthropicMessagesPayloadOrThrow } from './anthropic/anthropic.router';
// Gemini server imports
import { geminiAccess, geminiAccessSchema, geminiGenerateContentTextPayload } from './gemini/gemini.router';
@@ -38,7 +38,7 @@ type MuxingFormat = 'sse' | 'json-nl';
* The peculiarity of our parser is the injection of a JSON structure at the beginning of the stream, to
* communicate parameters before the text starts flowing to the client.
*/
type AIStreamParser = (data: string) => { text: string, close: boolean };
type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };
const chatStreamingInputSchema = z.object({
@@ -74,9 +74,9 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
let body: object;
switch (access.dialect) {
case 'anthropic':
requestAccess = anthropicAccess(access, '/v1/complete');
body = anthropicChatCompletionPayload(model, history, true);
vendorStreamParser = createStreamParserAnthropic();
requestAccess = anthropicAccess(access, '/v1/messages');
body = anthropicMessagesPayloadOrThrow(model, history, true);
vendorStreamParser = createStreamParserAnthropicMessages();
break;
case 'gemini':
@@ -121,7 +121,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
console.error(`/api/llms/stream: fetch issue:`, access.dialect, fetchOrVendorError, requestAccess?.url);
// client-side users visible message
return new NextResponse(`[Issue] ${access.dialect}: ${fetchOrVendorError}`
return new NextResponse(`[Issue] ${serverCapitalizeFirstLetter(access.dialect)}: ${fetchOrVendorError}`
+ (process.env.NODE_ENV === 'development' ? ` · [URL: ${requestAccess?.url}]` : ''), { status: 500 });
}
@@ -217,7 +217,7 @@ function createEventStreamTransformer(muxingFormat: MuxingFormat, vendorTextPars
}
try {
const { text, close } = vendorTextParser(event.data);
const { text, close } = vendorTextParser(event.data, event.event);
if (text)
controller.enqueue(textEncoder.encode(text));
if (close)
@@ -246,19 +246,94 @@ function createEventStreamTransformer(muxingFormat: MuxingFormat, vendorTextPars
/// Stream Parsers
function createStreamParserAnthropic(): AIStreamParser {
let hasBegun = false;
function createStreamParserAnthropicMessages(): AIStreamParser {
let responseMessage: AnthropicWireMessagesResponse | null = null;
let hasErrored = false;
return (data: string) => {
// Note: at this stage, the parser only returns the text content as text, which is streamed as text
// to the client. It is however building in parallel the responseMessage object, which is not
// yet used, but contains token counts, for instance.
return (data: string, eventName?: string) => {
let text = '';
const json: AnthropicWire.Complete.Response = JSON.parse(data);
let text = json.completion;
// if we've errored, we should not be receiving more data
if (hasErrored)
console.log('Anthropic stream has errored already, but received more data:', data);
// hack: prepend the model name to the first packet
if (!hasBegun) {
hasBegun = true;
const firstPacket: ChatStreamingFirstOutputPacketSchema = { model: json.model };
text = JSON.stringify(firstPacket) + text;
switch (eventName) {
// Ignore pings
case 'ping':
break;
// Initialize the message content for a new message
case 'message_start':
const firstMessage = !responseMessage;
const { message } = JSON.parse(data);
responseMessage = anthropicWireMessagesResponseSchema.parse(message);
// hack: prepend the model name to the first packet
if (firstMessage) {
const firstPacket: ChatStreamingFirstOutputPacketSchema = { model: responseMessage.model };
text = JSON.stringify(firstPacket);
}
break;
// Initialize content block if needed
case 'content_block_start':
if (responseMessage) {
const { index, content_block } = JSON.parse(data);
if (responseMessage.content[index] === undefined)
responseMessage.content[index] = content_block;
text = responseMessage.content[index].text;
} else
throw new Error('Unexpected content block start');
break;
// Append delta text to the current message content
case 'content_block_delta':
if (responseMessage) {
const { index, delta } = JSON.parse(data);
if (delta.type !== 'text_delta')
throw new Error(`Unexpected content block non-text delta (${delta.type})`);
if (responseMessage.content[index] === undefined)
throw new Error(`Unexpected content block delta location (${index})`);
responseMessage.content[index].text += delta.text;
text = delta.text;
} else
throw new Error('Unexpected content block delta');
break;
// Finalize content block if needed.
case 'content_block_stop':
if (responseMessage) {
const { index } = JSON.parse(data);
if (responseMessage.content[index] === undefined)
throw new Error(`Unexpected content block end location (${index})`);
} else
throw new Error('Unexpected content block stop');
break;
// Optionally handle top-level message changes. Example: updating stop_reason
case 'message_delta':
if (responseMessage) {
const { delta } = JSON.parse(data);
Object.assign(responseMessage, delta);
} else
throw new Error('Unexpected message delta');
break;
// We can now close the message
case 'message_stop':
return { text: '', close: true };
// Occasionaly, the server will send errors, such as {"type": "error", "error": {"type": "overloaded_error", "message": "Overloaded"}}
case 'error':
hasErrored = true;
const { error } = JSON.parse(data);
const errorText = (error.type && error.message) ? `${error.type}: ${error.message}` : safeErrorString(error);
return { text: `[Anthropic Server Error] ${errorText}`, close: true };
default:
throw new Error(`Unexpected event name: ${eventName}`);
}
return { text, close: false };
@@ -1,7 +1,6 @@
import * as React from 'react';
import { Alert } from '@mui/joy';
import WarningRoundedIcon from '@mui/icons-material/WarningRounded';
import { FormInputKey } from '~/common/components/forms/FormInputKey';
import { FormTextField } from '~/common/components/forms/FormTextField';
@@ -40,11 +39,9 @@ export function AnthropicSourceSetup(props: { sourceId: DModelSourceId }) {
return <>
<Alert variant='soft' color='warning' startDecorator={<WarningRoundedIcon color='warning' />}>
<Alert variant='soft' color='success'>
<div>
Note: <strong>Claude-3</strong> API support is being added as the Anthropic API has changed. Please refer to <Link
level='body-sm' href='https://github.com/enricoros/big-AGI/issues/443' target='_blank'>issue #443</Link> for
updates.
Note: <strong>Claude-3</strong> models are now supported.
</div>
</Alert>
@@ -86,4 +83,5 @@ export function AnthropicSourceSetup(props: { sourceId: DModelSourceId }) {
{isError && <InlineError error={error} />}
</>;
;
}
+1 -1
View File
@@ -62,7 +62,7 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
const { llmRef, llmTemperature, llmResponseTokens } = llmOptions;
try {
return await apiAsync.llmAnthropic.chatGenerate.mutate({
return await apiAsync.llmAnthropic.chatGenerateMessage.mutate({
access,
model: {
id: llmRef,
+4
View File
@@ -48,6 +48,10 @@ export function safeErrorString(error: any): string | null {
return error.toString();
}
export function serverCapitalizeFirstLetter(string: string) {
return string?.length ? (string.charAt(0).toUpperCase() + string.slice(1)) : string;
}
/**
* Weak (meaning the string could be encoded poorly) function that returns a string that can be used to debug a request