Remove log

1.14.1: Release for Claude-3
cleanups
2026-05-11 06:00:15 -07:00 · 2024-03-06 22:20:40 -08:00 · 2024-03-06 22:10:41 -08:00 · 2024-03-06 21:51:15 -08:00 · 2024-03-06 21:50:24 -08:00 · 2024-03-06 20:56:32 -08:00
16 changed files with 386 additions and 139 deletions
@@ -17,15 +17,16 @@ Or fork & run on Vercel

 big-AGI is an open book; see the **[ready-to-ship and future ideas](https://github.com/users/enricoros/projects/4/views/2)** in our open roadmap

-### What's New in 1.14.0 · March 6, 2024 · Modelmorphic
+### What's New in 1.14.1 · March 7, 2024 · Modelmorphic

+- **Anthropic** [Claude-3](https://www.anthropic.com/news/claude-3-family) model family support. [#443](https://github.com/enricoros/big-AGI/issues/443)
 - New **[Perplexity](https://www.perplexity.ai/)** and **[Groq](https://groq.com/)** integration (thanks @Penagwin). [#407](https://github.com/enricoros/big-AGI/issues/407), [#427](https://github.com/enricoros/big-AGI/issues/427)
 - **[LocalAI](https://localai.io/models/)** deep integration, including support for [model galleries](https://github.com/enricoros/big-AGI/issues/411)
 - **Mistral** Large and Google **Gemini 1.5** support
 - Performance optimizations: runs [much faster](https://twitter.com/enricoros/status/1756553038293303434?utm_source=localhost:3000&utm_medium=big-agi), saves lots of power, reduces memory usage
 - Enhanced UX with auto-sizing charts, refined search and folder functionalities, perfected scaling
 - And with more UI improvements, documentation, bug fixes (20 tickets), and developer enhancements
- [Release notes](https://github.com/enricoros/big-AGI/releases/tag/v1.14.0), and changes [v1.13.1...v1.14.0](https://github.com/enricoros/big-AGI/compare/v1.13.1...v1.14.0) (233 commits, 8,000+ lines changed)
+- [Release notes](https://github.com/enricoros/big-AGI/releases/tag/v1.14.0), and changes [v1.13.1...v1.14.1](https://github.com/enricoros/big-AGI/compare/v1.13.1...v1.14.1) (233 commits, 8,000+ lines changed)

 ### What's New in 1.13.0 · Feb 8, 2024 · Multi + Mind

@@ -12,8 +12,9 @@ Prediction: OpenAI will release GPT-5 on March 14, 2024. We will support it on d
 - milestone: [1.15.0](https://github.com/enricoros/big-agi/milestone/15)
 - work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)

-### What's New in 1.14.0 · March 6, 2024 · Modelmorphic
+### What's New in 1.14.1 · March 7, 2024 · Modelmorphic

+- **Anthropic** [Claude-3](https://www.anthropic.com/news/claude-3-family) model family support. [#443](https://github.com/enricoros/big-AGI/issues/443)
 - New **[Perplexity](https://www.perplexity.ai/)** and **[Groq](https://groq.com/)** integration (thanks @Penagwin). [#407](https://github.com/enricoros/big-AGI/issues/407), [#427](https://github.com/enricoros/big-AGI/issues/427)
 - **[LocalAI](https://localai.io/models/)** deep integration, including support for [model galleries](https://github.com/enricoros/big-AGI/issues/411)
 - **Mistral** Large and Google **Gemini 1.5** support
@@ -1,12 +1,12 @@
 {
  "name": "big-agi",
-  "version": "1.14.0",
+  "version": "1.14.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "big-agi",
-      "version": "1.14.0",
+      "version": "1.14.1",
      "hasInstallScript": true,
      "dependencies": {
        "@emotion/cache": "^11.11.0",
@@ -1,6 +1,6 @@
 {
  "name": "big-agi",
-  "version": "1.14.0",
+  "version": "1.14.1",
  "private": true,
  "author": "Enrico Ros <enrico.ros@gmail.com>",
  "repository": "https://github.com/enricoros/big-agi",
@@ -64,6 +64,7 @@ const avatarIconSx = { width: 36, height: 36 };
 export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['role'] | string, messageOriginLLM: string | undefined, messagePurposeId: SystemPurposeId | undefined, messageSender: string, messageTyping: boolean, size: 'sm' | undefined = undefined): React.JSX.Element {
  if (typeof messageAvatar === 'string' && messageAvatar)
    return <Avatar alt={messageSender} src={messageAvatar} />;
+
  const mascotSx = size === 'sm' ? avatarIconSx : { width: 64, height: 64 };
  switch (messageRole) {
    case 'system':
@@ -76,17 +77,18 @@ export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['
      // typing gif (people seem to love this, so keeping it after april fools')
      const isTextToImage = messageOriginLLM === 'DALL·E' || messageOriginLLM === 'Prodia';
      const isReact = messageOriginLLM?.startsWith('react-');
-      if (messageTyping) {
+
+      // animation: message typing
+      if (messageTyping)
        return <Avatar
          alt={messageSender} variant='plain'
-          src={isTextToImage ? 'https://i.giphy.com/media/5t9ujj9cMisyVjUZ0m/giphy.webp'
-            : isReact ? 'https://i.giphy.com/media/l44QzsOLXxcrigdgI/giphy.webp'
-              : 'https://i.giphy.com/media/jJxaUysjzO9ri/giphy.webp'}
+          src={isTextToImage ? 'https://i.giphy.com/media/5t9ujj9cMisyVjUZ0m/giphy.webp' // brush
+            : isReact ? 'https://i.giphy.com/media/l44QzsOLXxcrigdgI/giphy.webp' // mind
+              : 'https://i.giphy.com/media/jJxaUysjzO9ri/giphy.webp'} // typing
          sx={{ ...mascotSx, borderRadius: 'sm' }}
        />;
-      }

-      // text-to-image: icon
+      // icon: text-to-image
      if (isTextToImage)
        return <FormatPaintIcon sx={{
          ...avatarIconSx,
@@ -95,15 +97,16 @@ export function makeAvatar(messageAvatar: string | null, messageRole: DMessage['

      // purpose symbol (if present)
      const symbol = SystemPurposes[messagePurposeId!]?.symbol;
-      if (symbol) return <Box sx={{
-        fontSize: '24px',
-        textAlign: 'center',
-        width: '100%',
-        minWidth: `${avatarIconSx.width}px`,
-        lineHeight: `${avatarIconSx.height}px`,
-      }}>
-        {symbol}
-      </Box>;
+      if (symbol)
+        return <Box sx={{
+          fontSize: '24px',
+          textAlign: 'center',
+          width: '100%',
+          minWidth: `${avatarIconSx.width}px`,
+          lineHeight: `${avatarIconSx.height}px`,
+        }}>
+          {symbol}
+        </Box>;

      // default assistant avatar
      return <SmartToyOutlinedIcon sx={avatarIconSx} />; // https://mui.com/static/images/avatar/2.jpg
@@ -7,6 +7,7 @@ import AutoStoriesOutlinedIcon from '@mui/icons-material/AutoStoriesOutlined';
 import GoogleIcon from '@mui/icons-material/Google';
 import LaunchIcon from '@mui/icons-material/Launch';

+import { AnthropicIcon } from '~/common/components/icons/vendors/AnthropicIcon';
 import { GroqIcon } from '~/common/components/icons/vendors/GroqIcon';
 import { LocalAIIcon } from '~/common/components/icons/vendors/LocalAIIcon';
 import { MistralIcon } from '~/common/components/icons/vendors/MistralIcon';
@@ -54,11 +55,12 @@ export const NewsItems: NewsItem[] = [
    ]
  }*/
  {
-    versionCode: '1.14.0',
+    versionCode: '1.14.1',
    versionName: 'Modelmorphic',
    versionCoverImage: coverV114,
-    versionDate: new Date('2024-03-06T08:00:00Z'),
+    versionDate: new Date('2024-03-07T08:00:00Z'),
    items: [
+      { text: <>Anthropic <B href='https://www.anthropic.com/news/claude-3-family'>Claude-3</B> support for smarter chats</>, issue: 443, icon: AnthropicIcon },
      { text: <><B issue={407}>Perplexity</B> support, including Online models</>, issue: 407, icon: PerplexityIcon },
      { text: <><B issue={427}>Groq</B> support, with speeds up to 500 tok/s</>, issue: 427, icon: GroqIcon },
      { text: <>Support for new Mistral-Large models</>, icon: MistralIcon },
@@ -4,7 +4,7 @@
 import { useAppStateStore } from '~/common/state/store-appstate';


-export const incrementalNewsVersion: number = 14;
+export const incrementalNewsVersion: number = 14.1;


 export function shallRedirectToNews() {
@@ -15,6 +15,9 @@ export function prettyBaseModel(model: string | undefined): string {
  if (model.includes('gpt-3.5-turbo-16k')) return '3.5 Turbo 16k';
  if (model.includes('gpt-3.5-turbo')) return '3.5 Turbo';
  if (model.endsWith('.bin')) return model.slice(0, -4);
+  // [Anthropic]
+  if (model.includes('claude-3-opus')) return 'Claude 3 Opus';
+  if (model.includes('claude-3-sonnet')) return 'Claude 3 Sonnet';
  // [LM Studio]
  if (model.startsWith('C:\\') || model.startsWith('D:\\'))
    return getModelFromFile(model).replace('.gguf', '');
@@ -1,5 +1,5 @@
 export function capitalizeFirstLetter(string: string) {
-  return string.charAt(0).toUpperCase() + string.slice(1);
+  return string?.length ? (string.charAt(0).toUpperCase() + string.slice(1)) : string;
 }

 export function createBase36Uid(checkDuplicates: string[]): string {
@@ -40,6 +40,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
    created: roundTime('2023-11-21'),
    description: 'Superior performance on tasks that require complex reasoning, with reduced model hallucination rates',
    contextWindow: 200000,
+    maxCompletionTokens: 4096,
    pricing: {
      cpmPrompt: 0.008,
      cpmCompletion: 0.024,
@@ -52,6 +53,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
    created: roundTime('2023-07-11'),
    description: 'Superior performance on tasks that require complex reasoning',
    contextWindow: 100000,
+    maxCompletionTokens: 4096,
    pricing: {
      cpmPrompt: 0.008,
      cpmCompletion: 0.024,
@@ -65,6 +67,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
    created: roundTime('2023-08-09'),
    description: 'Low-latency, high throughput model',
    contextWindow: 100000,
+    maxCompletionTokens: 4096,
    pricing: {
      cpmPrompt: 0.00163,
      cpmCompletion: 0.00551,
@@ -77,6 +80,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
    created: roundTime('2023-03-14'),
    description: 'Precise and fast',
    contextWindow: 100000,
+    maxCompletionTokens: 2048,
    interfaces: [LLM_IF_OAI_Chat],
    hidden: true,
  },
@@ -86,16 +90,8 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
    created: roundTime('2023-03-14'),
    description: 'Claude 1.3 is the latest version of Claude v1',
    contextWindow: 100000,
+    maxCompletionTokens: 4096,
    interfaces: [LLM_IF_OAI_Chat],
    hidden: true,
-  },
-  {
-    id: 'claude-1.0',
-    label: 'Claude 1',
-    created: roundTime('2023-03-14'),
-    description: 'Claude 1.0 is the first version of Claude',
-    contextWindow: 9000,
-    interfaces: [LLM_IF_OAI_Chat],
-    hidden: true,
-  },
+  }
 ];
@@ -8,30 +8,36 @@ import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
 import { fixupHost } from '~/common/util/urlUtils';

 import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
-import { llmsListModelsOutputSchema, llmsChatGenerateOutputSchema } from '../llm.server.types';
+import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';

-import { AnthropicWire } from './anthropic.wiretypes';
+import { AnthropicWireMessagesRequest, anthropicWireMessagesRequestSchema, AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic.wiretypes';
 import { hardcodedAnthropicModels } from './anthropic.models';


 // Default hosts
+const DEFAULT_API_VERSION_HEADERS = {
+  'anthropic-version': '2023-06-01',
+  'anthropic-beta': 'messages-2023-12-15',
+};
+const DEFAULT_MAX_TOKENS = 2048;
 const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com';
 const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com';


 // Mappers

-export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string): { headers: HeadersInit, url: string } {
-  // API version
-  const apiVersion = '2023-06-01';
+async function anthropicPOST<TOut extends object, TPostBody extends object>(access: AnthropicAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
+  const { headers, url } = anthropicAccess(access, apiPath);
+  return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, 'Anthropic');
+}

+export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string): { headers: HeadersInit, url: string } {
  // API key
  const anthropicKey = access.anthropicKey || env.ANTHROPIC_API_KEY || '';

  // break for the missing key only on the default host
-  if (!anthropicKey)
-    if (!access.anthropicHost && !env.ANTHROPIC_API_HOST)
-      throw new Error('Missing Anthropic API Key. Add it on the UI (Models Setup) or server side (your deployment).');
+  if (!anthropicKey && !(access.anthropicHost || env.ANTHROPIC_API_HOST))
+    throw new Error('Missing Anthropic API Key. Add it on the UI (Models Setup) or server side (your deployment).');

  // API host
  let anthropicHost = fixupHost(access.anthropicHost || env.ANTHROPIC_API_HOST || DEFAULT_ANTHROPIC_HOST, apiPath);
@@ -49,7 +55,7 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string):
    headers: {
      'Accept': 'application/json',
      'Content-Type': 'application/json',
-      'anthropic-version': apiVersion,
+      ...DEFAULT_API_VERSION_HEADERS,
      'X-API-Key': anthropicKey,
      ...(heliKey && { 'Helicone-Auth': `Bearer ${heliKey}` }),
    },
@@ -57,23 +63,68 @@ export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string):
  };
 }

-export function anthropicChatCompletionPayload(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): AnthropicWire.Complete.Request {
-  // encode the prompt for Claude models
-  const prompt = history.map(({ role, content }) => {
-    return role === 'assistant' ? `\n\nAssistant: ${content}` : `\n\nHuman: ${content}`;
-  }).join('') + '\n\nAssistant:';
-  return {
-    prompt,
-    model: model.id,
-    stream,
-    ...(model.temperature && { temperature: model.temperature }),
-    ...(model.maxTokens && { max_tokens_to_sample: model.maxTokens })
-  };
-}
+export function anthropicMessagesPayloadOrThrow(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): AnthropicWireMessagesRequest {

-async function anthropicPOST<TOut extends object, TPostBody extends object>(access: AnthropicAccessSchema, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
-  const { headers, url } = anthropicAccess(access, apiPath);
-  return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, 'Anthropic');
+  // Take the System prompt, if it's the first message
+  // But if it's the only message, treat it as a user message
+  history = [...history];
+  let systemPrompt: string | undefined = undefined;
+  if (history[0]?.role === 'system' && history.length > 1)
+    systemPrompt = history.shift()?.content;
+
+  // Transform the OpenAIHistorySchema into the target messages format, ensuring that roles alternate between 'user' and 'assistant's
+  const messages = history.reduce(
+    (acc, historyItem, index) => {
+
+      const lastMessage: AnthropicWireMessagesRequest['messages'][number] | undefined = acc[acc.length - 1];
+      const anthropicRole = historyItem.role === 'assistant' ? 'assistant' : 'user';
+
+      if (index === 0 || anthropicRole !== lastMessage?.role) {
+        // Add a new message object if the role is different from the previous message
+        acc.push({
+          role: anthropicRole,
+          content: [
+            { type: 'text', text: historyItem.content },
+          ],
+        });
+      } else {
+        // Merge consecutive messages with the same role
+        (lastMessage.content as AnthropicWireMessagesRequest['messages'][number]['content']).push(
+          { type: 'text', text: historyItem.content },
+        );
+      }
+      return acc;
+    },
+    [] as AnthropicWireMessagesRequest['messages'],
+  );
+
+  // NOTE: if the last message is 'assistant', then the API will perform a continuation - shall we add a user message? TBD
+
+  // NOTE: the following code has been disabled because Anthropic will reject empty text blocks
+  // If the messages array is empty, add a default user message
+  // if (messages.length === 0)
+  //   messages.push({ role: 'user', content: [{ type: 'text', text: '' }] });
+
+  // Construct the request payload
+  const payload: AnthropicWireMessagesRequest = {
+    model: model.id,
+    ...(systemPrompt !== undefined && { system: systemPrompt }),
+    messages: messages,
+    max_tokens: model.maxTokens || DEFAULT_MAX_TOKENS,
+    stream: stream,
+    ...(model.temperature !== undefined && { temperature: model.temperature }),
+    // metadata: not useful to us
+    // stop_sequences: not useful to us
+    // top_p: not useful to us
+    // top_k: not useful to us
+  };
+
+  // Validate the payload against the schema to ensure correctness
+  const validated = anthropicWireMessagesRequestSchema.safeParse(payload);
+  if (!validated.success)
+    throw new Error(`Invalid message sequence for Anthropic models: ${validated.error.errors?.[0]?.message || validated.error}`);
+
+  return validated.data;
 }


@@ -101,45 +152,36 @@ const chatGenerateInputSchema = z.object({

 export const llmAnthropicRouter = createTRPCRouter({

-  /* Anthropic: list models
-   *
-   * See https://github.com/anthropics/anthropic-sdk-typescript/commit/7c53ded6b7f5f3efec0df295181f18469c37e09d?diff=unified for
-   * some details on the models, as the API docs are scarce: https://docs.anthropic.com/claude/reference/selecting-a-model
-   */
+  /* [Anthropic] list models - https://docs.anthropic.com/claude/docs/models-overview */
  listModels: publicProcedure
    .input(listModelsInputSchema)
    .output(llmsListModelsOutputSchema)
    .query(() => ({ models: hardcodedAnthropicModels })),

-  /* Anthropic: Chat generation */
-  chatGenerate: publicProcedure
+  /* [Anthropic] Message generation (non-streaming) */
+  chatGenerateMessage: publicProcedure
    .input(chatGenerateInputSchema)
    .output(llmsChatGenerateOutputSchema)
-    .mutation(async ({ input }) => {
+    .mutation(async ({ input: { access, model, history } }) => {

-      const { access, model, history } = input;
+      // NOTES: doesn't support functions yet, supports multi-modal inputs (but they're not in our history, yet)

-      // ensure history has at least one message, and not from the assistant
-      if (history.length === 0 || history[0].role === 'assistant')
-        throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] Need one human character at least` });
+      // throw if the message sequence is not okay
+      const payload = anthropicMessagesPayloadOrThrow(model, history, false);
+      const response = await anthropicPOST<AnthropicWireMessagesResponse, AnthropicWireMessagesRequest>(access, payload, '/v1/messages');
+      const completion = anthropicWireMessagesResponseSchema.parse(response);

-      const wireCompletions = await anthropicPOST<AnthropicWire.Complete.Response, AnthropicWire.Complete.Request>(
-        access,
-        anthropicChatCompletionPayload(model, history, false),
-        '/v1/complete',
-      );
+      // validate output
+      if (!completion || completion.type !== 'message' || completion.role !== 'assistant' || completion.stop_reason === undefined)
+        throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] Invalid Message` });
+      if (completion.content.length !== 1 || completion.content[0].type !== 'text')
+        throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No Single Text Message (${completion.content.length})` });

-      // expect a single output
-      if (wireCompletions.completion === undefined)
-        throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No completions` });
-      if (wireCompletions.stop_reason === undefined)
-        throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[Anthropic Issue] No stop_reason` });
-
-      // check for a function output
+      // got the completion (non-streaming)
      return {
-        role: 'assistant',
-        finish_reason: wireCompletions.stop_reason === 'stop_sequence' ? 'stop' : 'length',
-        content: wireCompletions.completion || '',
+        role: completion.role,
+        content: completion.content[0].text,
+        finish_reason: completion.stop_reason === 'max_tokens' ? 'length' : 'stop',
      };
    }),

@@ -1,29 +1,151 @@
-export namespace AnthropicWire {
-  export namespace Complete {
-    export interface Request {
-      prompt: string;
-      model: string;
-      max_tokens_to_sample?: number;
-      stop_sequences?: string[];
-      stream?: boolean;
-      temperature?: number;
-      top_k?: number;
-      top_p?: number;
-      metadata?: {
-        user_id?: string;
-      };
-    }
+import { z } from 'zod';

-    export interface Response {
-      completion: string;
-      stop_reason: 'stop_sequence' | 'max_tokens' | string;
-      model: string;
-      stop: string | null; // the stop sequence, if stop_reason is 'stop_sequence'
-      log_id: string; // some log

-      // removed since the 2023-06-01 API version
-      // truncated: boolean;
-      // exception: string | null;
-    }
-  }
-}
+// text, e.g.: { 'type': 'text', 'text': 'Hello, Claude' }
+const anthropicWireTextBlockSchema = z.object({
+  type: z.literal('text'),
+  text: z.string(),
+});
+
+// image, e.g.: { 'type': 'image', 'source': { 'type': 'base64', 'media_type': 'image/jpeg', 'data': '/9j/4AAQSkZJRg...' } }
+const anthropicWireImageBlockSchema = z.object({
+  type: z.literal('image'),
+  source: z.object({
+    type: z.enum(['base64']),
+    media_type: z.enum(['image/jpeg', 'image/png', 'image/gif', 'image/webp']),
+    data: z.string(),
+  }),
+});
+
+const anthropicWireMessagesSchema = z.array(
+  z.object({
+    role: z.enum(['user', 'assistant']),
+    // NOTE: could be a string or an array of text/image blocks, but for a better implementation
+    //       we will assume it's always an array
+    // content: z.union([
+    //   z.array(z.union([anthropicWireTextBlockSchema, anthropicWireImageBlockSchema])),
+    //   z.string(),
+    // ]),
+    content: z.array(
+      z.union([
+        anthropicWireTextBlockSchema,
+        anthropicWireImageBlockSchema,
+      ]),
+    ),
+  }),
+);
+
+export const anthropicWireMessagesRequestSchema = z.object({
+  model: z.string(),
+
+  /**
+   * If you want to include a system prompt, you can use the top-level system parameter — there is no "system" role for input messages in the Messages API.
+   */
+  system: z.string().optional(),
+
+  /**
+   * (required) Input messages. - operates on alternating user and assistant conversational turns - the first message must always use the user role
+   * If the final message uses the assistant role, the response content will continue immediately from the content in that message.
+   * This can be used to constrain part of the model's response.
+   */
+  messages: anthropicWireMessagesSchema.refine(
+    (messages) => {
+
+      // Ensure the first message uses the user role
+      if (messages.length === 0 || messages[0].role !== 'user')
+        return false;
+
+      // Ensure messages alternate between user and assistant roles
+      for (let i = 1; i < messages.length; i++)
+        if (messages[i].role === messages[i - 1].role)
+          return false;
+
+      return true;
+    },
+    { message: `messages must alternate between User and Assistant roles, starting with the User role` },
+  ),
+
+  /**
+   * (required) The maximum number of tokens to generate before stopping.
+   */
+  max_tokens: z.number(),
+
+
+  /**
+   * (optional) Metadata to include with the request.
+   * user_id: This should be a uuid, hash value, or other opaque identifier.
+   */
+  metadata: z.object({
+    user_id: z.string().optional(),
+  }).optional(),
+
+  /**
+   * Custom text sequences that will cause the model to stop generating.
+   */
+  stop_sequences: z.array(z.string()).optional(),
+
+  /**
+   * Whether to incrementally stream the response using server-sent events. Default: false
+   */
+  stream: z.boolean().optional(),
+
+  /**
+   * Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks.
+   */
+  temperature: z.number().optional(),
+
+  /**
+   * Use nucleus sampling.
+   * Recommended for advanced use cases only. You usually only need to use temperature.
+   */
+  top_p: z.number().optional(),
+
+  /**
+   * Only sample from the top K options for each subsequent token.
+   * Recommended for advanced use cases only. You usually only need to use temperature.
+   */
+  top_k: z.number().optional(),
+});
+export type AnthropicWireMessagesRequest = z.infer<typeof anthropicWireMessagesRequestSchema>;
+
+
+export const anthropicWireMessagesResponseSchema = z.object({
+  // Unique object identifier.
+  id: z.string(),
+
+  // For Messages, this is always "message".
+  type: z.literal('message'),
+  // Conversational role of the generated message. This will always be "assistant".
+  role: z.literal('assistant'),
+  /**
+   * Content generated by the model.
+   * This is an array of content blocks, each of which has a type that determines its shape. Currently, the only type in responses is "text".
+   */
+  content: z.array(anthropicWireTextBlockSchema),
+
+  // The model that handled the request.
+  model: z.string(),
+
+  /**
+   * This may be one the following values:
+   *
+   * "end_turn": the model reached a natural stopping point
+   * "max_tokens": we exceeded the requested max_tokens or the model's maximum
+   * "stop_sequence": one of your provided custom stop_sequences was generated
+   * Note that these values are different than those in /v1/complete, where end_turn and stop_sequence were not differentiated.
+   *
+   * In non-streaming mode this value is always non-null. In streaming mode, it is null in the message_start event and non-null otherwise.
+   */
+  stop_reason: z.enum(['end_turn', 'max_tokens', 'stop_sequence']).nullable(),
+
+  // Which custom stop sequence was generated, if any.
+  stop_sequence: z.string().nullable(),
+
+  // Billing and rate-limit usage.
+  usage: z.object({
+    input_tokens: z.number(),
+    output_tokens: z.number(),
+  }),
+
+});
+export type AnthropicWireMessagesResponse = z.infer<typeof anthropicWireMessagesResponseSchema>;
@@ -2,12 +2,12 @@ import { z } from 'zod';
 import { NextRequest, NextResponse } from 'next/server';
 import { createParser as createEventsourceParser, EventSourceParseCallback, EventSourceParser, ParsedEvent, ReconnectInterval } from 'eventsource-parser';

-import { createEmptyReadableStream, debugGenerateCurlCommand, nonTrpcServerFetchOrThrow, safeErrorString, SERVER_DEBUG_WIRE } from '~/server/wire';
+import { createEmptyReadableStream, debugGenerateCurlCommand, nonTrpcServerFetchOrThrow, safeErrorString, SERVER_DEBUG_WIRE, serverCapitalizeFirstLetter } from '~/server/wire';


 // Anthropic server imports
-import type { AnthropicWire } from './anthropic/anthropic.wiretypes';
-import { anthropicAccess, anthropicAccessSchema, anthropicChatCompletionPayload } from './anthropic/anthropic.router';
+import { AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic/anthropic.wiretypes';
+import { anthropicAccess, anthropicAccessSchema, anthropicMessagesPayloadOrThrow } from './anthropic/anthropic.router';

 // Gemini server imports
 import { geminiAccess, geminiAccessSchema, geminiGenerateContentTextPayload } from './gemini/gemini.router';
@@ -38,7 +38,7 @@ type MuxingFormat = 'sse' | 'json-nl';
 * The peculiarity of our parser is the injection of a JSON structure at the beginning of the stream, to
 * communicate parameters before the text starts flowing to the client.
 */
-type AIStreamParser = (data: string) => { text: string, close: boolean };
+type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };


 const chatStreamingInputSchema = z.object({
@@ -74,9 +74,9 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
    let body: object;
    switch (access.dialect) {
      case 'anthropic':
-        requestAccess = anthropicAccess(access, '/v1/complete');
-        body = anthropicChatCompletionPayload(model, history, true);
-        vendorStreamParser = createStreamParserAnthropic();
+        requestAccess = anthropicAccess(access, '/v1/messages');
+        body = anthropicMessagesPayloadOrThrow(model, history, true);
+        vendorStreamParser = createStreamParserAnthropicMessages();
        break;

      case 'gemini':
@@ -121,7 +121,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
    console.error(`/api/llms/stream: fetch issue:`, access.dialect, fetchOrVendorError, requestAccess?.url);

    // client-side users visible message
-    return new NextResponse(`[Issue] ${access.dialect}: ${fetchOrVendorError}`
+    return new NextResponse(`[Issue] ${serverCapitalizeFirstLetter(access.dialect)}: ${fetchOrVendorError}`
      + (process.env.NODE_ENV === 'development' ? ` · [URL: ${requestAccess?.url}]` : ''), { status: 500 });
  }

@@ -217,7 +217,7 @@ function createEventStreamTransformer(muxingFormat: MuxingFormat, vendorTextPars
        }

        try {
-          const { text, close } = vendorTextParser(event.data);
+          const { text, close } = vendorTextParser(event.data, event.event);
          if (text)
            controller.enqueue(textEncoder.encode(text));
          if (close)
@@ -246,19 +246,94 @@ function createEventStreamTransformer(muxingFormat: MuxingFormat, vendorTextPars

 /// Stream Parsers

-function createStreamParserAnthropic(): AIStreamParser {
-  let hasBegun = false;
+function createStreamParserAnthropicMessages(): AIStreamParser {
+  let responseMessage: AnthropicWireMessagesResponse | null = null;
+  let hasErrored = false;

-  return (data: string) => {
+  // Note: at this stage, the parser only returns the text content as text, which is streamed as text
+  //       to the client. It is however building in parallel the responseMessage object, which is not
+  //       yet used, but contains token counts, for instance.
+  return (data: string, eventName?: string) => {
+    let text = '';

-    const json: AnthropicWire.Complete.Response = JSON.parse(data);
-    let text = json.completion;
+    // if we've errored, we should not be receiving more data
+    if (hasErrored)
+      console.log('Anthropic stream has errored already, but received more data:', data);

-    // hack: prepend the model name to the first packet
-    if (!hasBegun) {
-      hasBegun = true;
-      const firstPacket: ChatStreamingFirstOutputPacketSchema = { model: json.model };
-      text = JSON.stringify(firstPacket) + text;
+    switch (eventName) {
+      // Ignore pings
+      case 'ping':
+        break;
+
+      // Initialize the message content for a new message
+      case 'message_start':
+        const firstMessage = !responseMessage;
+        const { message } = JSON.parse(data);
+        responseMessage = anthropicWireMessagesResponseSchema.parse(message);
+        // hack: prepend the model name to the first packet
+        if (firstMessage) {
+          const firstPacket: ChatStreamingFirstOutputPacketSchema = { model: responseMessage.model };
+          text = JSON.stringify(firstPacket);
+        }
+        break;
+
+      // Initialize content block if needed
+      case 'content_block_start':
+        if (responseMessage) {
+          const { index, content_block } = JSON.parse(data);
+          if (responseMessage.content[index] === undefined)
+            responseMessage.content[index] = content_block;
+          text = responseMessage.content[index].text;
+        } else
+          throw new Error('Unexpected content block start');
+        break;
+
+      // Append delta text to the current message content
+      case 'content_block_delta':
+        if (responseMessage) {
+          const { index, delta } = JSON.parse(data);
+          if (delta.type !== 'text_delta')
+            throw new Error(`Unexpected content block non-text delta (${delta.type})`);
+          if (responseMessage.content[index] === undefined)
+            throw new Error(`Unexpected content block delta location (${index})`);
+          responseMessage.content[index].text += delta.text;
+          text = delta.text;
+        } else
+          throw new Error('Unexpected content block delta');
+        break;
+
+      // Finalize content block if needed.
+      case 'content_block_stop':
+        if (responseMessage) {
+          const { index } = JSON.parse(data);
+          if (responseMessage.content[index] === undefined)
+            throw new Error(`Unexpected content block end location (${index})`);
+        } else
+          throw new Error('Unexpected content block stop');
+        break;
+
+      // Optionally handle top-level message changes. Example: updating stop_reason
+      case 'message_delta':
+        if (responseMessage) {
+          const { delta } = JSON.parse(data);
+          Object.assign(responseMessage, delta);
+        } else
+          throw new Error('Unexpected message delta');
+        break;
+
+      // We can now close the message
+      case 'message_stop':
+        return { text: '', close: true };
+
+      // Occasionaly, the server will send errors, such as {"type": "error", "error": {"type": "overloaded_error", "message": "Overloaded"}}
+      case 'error':
+        hasErrored = true;
+        const { error } = JSON.parse(data);
+        const errorText = (error.type && error.message) ? `${error.type}: ${error.message}` : safeErrorString(error);
+        return { text: `[Anthropic Server Error] ${errorText}`, close: true };
+
+      default:
+        throw new Error(`Unexpected event name: ${eventName}`);
    }

    return { text, close: false };
@@ -1,7 +1,6 @@
 import * as React from 'react';

 import { Alert } from '@mui/joy';
-import WarningRoundedIcon from '@mui/icons-material/WarningRounded';

 import { FormInputKey } from '~/common/components/forms/FormInputKey';
 import { FormTextField } from '~/common/components/forms/FormTextField';
@@ -40,11 +39,9 @@ export function AnthropicSourceSetup(props: { sourceId: DModelSourceId }) {

  return <>

-    <Alert variant='soft' color='warning' startDecorator={<WarningRoundedIcon color='warning' />}>
+    <Alert variant='soft' color='success'>
      <div>
-        Note: <strong>Claude-3</strong> API support is being added as the Anthropic API has changed. Please refer to <Link
-        level='body-sm' href='https://github.com/enricoros/big-AGI/issues/443' target='_blank'>issue #443</Link> for
-        updates.
+        Note: <strong>Claude-3</strong> models are now supported.
      </div>
    </Alert>

@@ -86,4 +83,5 @@ export function AnthropicSourceSetup(props: { sourceId: DModelSourceId }) {
    {isError && <InlineError error={error} />}

  </>;
+  ;
 }
@@ -62,7 +62,7 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA

    const { llmRef, llmTemperature, llmResponseTokens } = llmOptions;
    try {
-      return await apiAsync.llmAnthropic.chatGenerate.mutate({
+      return await apiAsync.llmAnthropic.chatGenerateMessage.mutate({
        access,
        model: {
          id: llmRef,
@@ -48,6 +48,10 @@ export function safeErrorString(error: any): string | null {
  return error.toString();
 }

+export function serverCapitalizeFirstLetter(string: string) {
+  return string?.length ? (string.charAt(0).toUpperCase() + string.slice(1)) : string;
+}
+

 /**
 * Weak (meaning the string could be encoded poorly) function that returns a string that can be used to debug a request
Author	SHA1	Message	Date
Enrico Ros	2f59e12e20	Remove log	2024-03-06 22:20:40 -08:00
Enrico Ros	30e8652c2a	1.14.1: Release for Claude-3	2024-03-06 22:10:41 -08:00
Enrico Ros	5ee6aceb60	cleanups	2024-03-06 21:51:15 -08:00
Enrico Ros	6940b6a6d1	Anthropic: Full support for Claude-3 models. Closes #443 , #450 Thanks to @slapglif in #450 for a reference implementation.	2024-03-06 21:50:24 -08:00
Enrico Ros	4e33ce9415	misc	2024-03-06 20:56:32 -08:00
Enrico Ros	944e22bde6	Anthropic: if there's a single system message, treat it as-if it was a user message	2024-03-06 20:49:59 -08:00
Enrico Ros	6054fa0a26	Anthropic: use the new Messages format (thanks @slapglif #450 )	2024-03-06 20:42:33 -08:00
Enrico Ros	4db13cfed4	Anthropic: wire types (fully switch to the new Messages API)	2024-03-06 20:33:59 -08:00
Enrico Ros	6a6adda2e0	misc	2024-03-06 20:33:12 -08:00
Enrico Ros	4afa55c0db	Anthropic: update models	2024-03-06 18:36:07 -08:00